From 1a882fd2ee84ae28a06cf07902dc4645d8ad4a6d Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 18 Jan 2016 10:47:13 +0200 Subject: nir: move shader_enums.[ch] to compiler This way one can reuse it in glsl, nir or other infrastructure without pulling nir as dependency. Signed-off-by: Emil Velikov Acked-by: Matt Turner Acked-by: Jose Fonseca --- src/mesa/drivers/x11/SConscript | 1 + src/mesa/main/mtypes.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/mesa') diff --git a/src/mesa/drivers/x11/SConscript b/src/mesa/drivers/x11/SConscript index d29f9874f44..45419973d39 100644 --- a/src/mesa/drivers/x11/SConscript +++ b/src/mesa/drivers/x11/SConscript @@ -18,6 +18,7 @@ env.Prepend(LIBPATH = env['X11_LIBPATH']) env.Prepend(LIBS = [ glapi, mesautil, + compiler, glsl, mesa, ]) diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h index 15dd1ca7cdf..f3102dc21e3 100644 --- a/src/mesa/main/mtypes.h +++ b/src/mesa/main/mtypes.h @@ -42,7 +42,7 @@ #include "main/config.h" #include "glapi/glapi.h" #include "math/m_matrix.h" /* GLmatrix */ -#include "glsl/nir/shader_enums.h" +#include "compiler/shader_enums.h" #include "main/formats.h" /* MESA_FORMAT_COUNT */ -- cgit v1.2.3 From 24f984f64ae58c274f79eaf9148aea37df67131c Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 18 Jan 2016 11:35:29 +0200 Subject: nir: move glsl_types.{cpp,h} to compiler Allows us to remove the SCons workaround :-) Signed-off-by: Emil Velikov Acked-by: Matt Turner Acked-by: Jose Fonseca --- src/compiler/Makefile.sources | 2 + src/compiler/glsl_types.cpp | 1758 ++++++++++++++++++++ src/compiler/glsl_types.h | 887 ++++++++++ src/glsl/Makefile.sources | 2 - src/glsl/SConscript | 7 - src/glsl/ast_array_index.cpp | 2 +- src/glsl/ast_function.cpp | 2 +- src/glsl/ast_to_hir.cpp | 2 +- src/glsl/builtin_types.cpp | 2 +- src/glsl/glsl_parser.yy | 2 +- src/glsl/hir_field_selection.cpp | 2 +- src/glsl/ir.cpp | 2 +- src/glsl/ir.h | 2 +- src/glsl/ir_clone.cpp | 2 +- src/glsl/ir_constant_expression.cpp | 2 +- src/glsl/ir_function.cpp | 2 +- src/glsl/ir_print_visitor.cpp | 2 +- src/glsl/ir_reader.cpp | 2 +- src/glsl/ir_rvalue_visitor.cpp | 2 +- src/glsl/ir_set_program_inouts.cpp | 2 +- src/glsl/ir_validate.cpp | 2 +- src/glsl/ir_variable_refcount.cpp | 2 +- src/glsl/ir_variable_refcount.h | 2 +- src/glsl/loop_analysis.cpp | 2 +- src/glsl/loop_controls.cpp | 2 +- src/glsl/loop_unroll.cpp | 2 +- src/glsl/lower_const_arrays_to_uniforms.cpp | 2 +- src/glsl/lower_discard.cpp | 2 +- src/glsl/lower_discard_flow.cpp | 2 +- src/glsl/lower_if_to_cond_assign.cpp | 2 +- src/glsl/lower_instructions.cpp | 2 +- src/glsl/lower_jumps.cpp | 2 +- src/glsl/lower_mat_op_to_vec.cpp | 2 +- src/glsl/lower_offset_array.cpp | 2 +- src/glsl/lower_subroutine.cpp | 2 +- src/glsl/lower_variable_index_to_cond_assign.cpp | 2 +- src/glsl/lower_vec_index_to_cond_assign.cpp | 2 +- src/glsl/lower_vec_index_to_swizzle.cpp | 2 +- src/glsl/nir/glsl_types.cpp | 1758 -------------------- src/glsl/nir/glsl_types.h | 887 ---------- src/glsl/nir/nir_types.h | 4 +- src/glsl/opt_algebraic.cpp | 2 +- src/glsl/opt_array_splitting.cpp | 2 +- src/glsl/opt_conditional_discard.cpp | 2 +- src/glsl/opt_constant_folding.cpp | 2 +- src/glsl/opt_constant_propagation.cpp | 2 +- src/glsl/opt_constant_variable.cpp | 2 +- src/glsl/opt_copy_propagation.cpp | 2 +- src/glsl/opt_copy_propagation_elements.cpp | 2 +- src/glsl/opt_dead_builtin_varyings.cpp | 2 +- src/glsl/opt_dead_code.cpp | 2 +- src/glsl/opt_dead_code_local.cpp | 2 +- src/glsl/opt_dead_functions.cpp | 2 +- src/glsl/opt_function_inlining.cpp | 2 +- src/glsl/opt_minmax.cpp | 2 +- src/glsl/opt_noop_swizzle.cpp | 2 +- src/glsl/opt_structure_splitting.cpp | 2 +- src/glsl/opt_swizzle_swizzle.cpp | 2 +- src/glsl/opt_tree_grafting.cpp | 2 +- src/glsl/opt_vectorize.cpp | 2 +- src/mesa/SConscript | 1 - .../drivers/dri/i965/brw_cubemap_normalize.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- .../dri/i965/brw_fs_channel_expressions.cpp | 2 +- .../drivers/dri/i965/brw_fs_vector_splitting.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 2 +- .../dri/i965/brw_lower_unnormalized_offset.cpp | 2 +- src/mesa/main/ff_fragment_shader.cpp | 2 +- src/mesa/main/uniforms.c | 2 +- src/mesa/main/uniforms.h | 2 +- src/mesa/program/ir_to_mesa.cpp | 2 +- src/mesa/program/sampler.cpp | 2 +- 72 files changed, 2712 insertions(+), 2720 deletions(-) create mode 100644 src/compiler/glsl_types.cpp create mode 100644 src/compiler/glsl_types.h delete mode 100644 src/glsl/nir/glsl_types.cpp delete mode 100644 src/glsl/nir/glsl_types.h (limited to 'src/mesa') diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index 73e1771c8a7..38e75cfd8c6 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -1,4 +1,6 @@ LIBCOMPILER_FILES = \ builtin_type_macros.h \ + glsl_types.cpp \ + glsl_types.h \ shader_enums.c \ shader_enums.h diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp new file mode 100644 index 00000000000..7e60e7c1098 --- /dev/null +++ b/src/compiler/glsl_types.cpp @@ -0,0 +1,1758 @@ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include "main/macros.h" +#include "glsl/glsl_parser_extras.h" +#include "glsl_types.h" +#include "util/hash_table.h" + + +mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP; +hash_table *glsl_type::array_types = NULL; +hash_table *glsl_type::record_types = NULL; +hash_table *glsl_type::interface_types = NULL; +hash_table *glsl_type::subroutine_types = NULL; +void *glsl_type::mem_ctx = NULL; + +void +glsl_type::init_ralloc_type_ctx(void) +{ + if (glsl_type::mem_ctx == NULL) { + glsl_type::mem_ctx = ralloc_autofree_context(); + assert(glsl_type::mem_ctx != NULL); + } +} + +glsl_type::glsl_type(GLenum gl_type, + glsl_base_type base_type, unsigned vector_elements, + unsigned matrix_columns, const char *name) : + gl_type(gl_type), + base_type(base_type), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(vector_elements), matrix_columns(matrix_columns), + length(0) +{ + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + + mtx_unlock(&glsl_type::mutex); + + /* Neither dimension is zero or both dimensions are zero. + */ + assert((vector_elements == 0) == (matrix_columns == 0)); + memset(& fields, 0, sizeof(fields)); +} + +glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, + enum glsl_sampler_dim dim, bool shadow, bool array, + unsigned type, const char *name) : + gl_type(gl_type), + base_type(base_type), + sampler_dimensionality(dim), sampler_shadow(shadow), + sampler_array(array), sampler_type(type), interface_packing(0), + length(0) +{ + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + + mtx_unlock(&glsl_type::mutex); + + memset(& fields, 0, sizeof(fields)); + + if (base_type == GLSL_TYPE_SAMPLER) { + /* Samplers take no storage whatsoever. */ + matrix_columns = vector_elements = 0; + } else { + matrix_columns = vector_elements = 1; + } +} + +glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, + const char *name) : + gl_type(0), + base_type(GLSL_TYPE_STRUCT), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(num_fields) +{ + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + this->fields.structure = ralloc_array(this->mem_ctx, + glsl_struct_field, length); + + for (i = 0; i < length; i++) { + this->fields.structure[i].type = fields[i].type; + this->fields.structure[i].name = ralloc_strdup(this->fields.structure, + fields[i].name); + this->fields.structure[i].location = fields[i].location; + this->fields.structure[i].interpolation = fields[i].interpolation; + this->fields.structure[i].centroid = fields[i].centroid; + this->fields.structure[i].sample = fields[i].sample; + this->fields.structure[i].matrix_layout = fields[i].matrix_layout; + this->fields.structure[i].patch = fields[i].patch; + this->fields.structure[i].image_read_only = fields[i].image_read_only; + this->fields.structure[i].image_write_only = fields[i].image_write_only; + this->fields.structure[i].image_coherent = fields[i].image_coherent; + this->fields.structure[i].image_volatile = fields[i].image_volatile; + this->fields.structure[i].image_restrict = fields[i].image_restrict; + this->fields.structure[i].precision = fields[i].precision; + } + + mtx_unlock(&glsl_type::mutex); +} + +glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, + enum glsl_interface_packing packing, const char *name) : + gl_type(0), + base_type(GLSL_TYPE_INTERFACE), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing((unsigned) packing), + vector_elements(0), matrix_columns(0), + length(num_fields) +{ + unsigned int i; + + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(name != NULL); + this->name = ralloc_strdup(this->mem_ctx, name); + this->fields.structure = ralloc_array(this->mem_ctx, + glsl_struct_field, length); + for (i = 0; i < length; i++) { + this->fields.structure[i].type = fields[i].type; + this->fields.structure[i].name = ralloc_strdup(this->fields.structure, + fields[i].name); + this->fields.structure[i].location = fields[i].location; + this->fields.structure[i].interpolation = fields[i].interpolation; + this->fields.structure[i].centroid = fields[i].centroid; + this->fields.structure[i].sample = fields[i].sample; + this->fields.structure[i].matrix_layout = fields[i].matrix_layout; + this->fields.structure[i].patch = fields[i].patch; + this->fields.structure[i].precision = fields[i].precision; + } + + mtx_unlock(&glsl_type::mutex); +} + +glsl_type::glsl_type(const char *subroutine_name) : + gl_type(0), + base_type(GLSL_TYPE_SUBROUTINE), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(1), matrix_columns(1), + length(0) +{ + mtx_lock(&glsl_type::mutex); + + init_ralloc_type_ctx(); + assert(subroutine_name != NULL); + this->name = ralloc_strdup(this->mem_ctx, subroutine_name); + mtx_unlock(&glsl_type::mutex); +} + +bool +glsl_type::contains_sampler() const +{ + if (this->is_array()) { + return this->fields.array->contains_sampler(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_sampler()) + return true; + } + return false; + } else { + return this->is_sampler(); + } +} + + +bool +glsl_type::contains_integer() const +{ + if (this->is_array()) { + return this->fields.array->contains_integer(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_integer()) + return true; + } + return false; + } else { + return this->is_integer(); + } +} + +bool +glsl_type::contains_double() const +{ + if (this->is_array()) { + return this->fields.array->contains_double(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_double()) + return true; + } + return false; + } else { + return this->is_double(); + } +} + +bool +glsl_type::contains_opaque() const { + switch (base_type) { + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + return true; + case GLSL_TYPE_ARRAY: + return fields.array->contains_opaque(); + case GLSL_TYPE_STRUCT: + for (unsigned int i = 0; i < length; i++) { + if (fields.structure[i].type->contains_opaque()) + return true; + } + return false; + default: + return false; + } +} + +bool +glsl_type::contains_subroutine() const +{ + if (this->is_array()) { + return this->fields.array->contains_subroutine(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_subroutine()) + return true; + } + return false; + } else { + return this->is_subroutine(); + } +} + +gl_texture_index +glsl_type::sampler_index() const +{ + const glsl_type *const t = (this->is_array()) ? this->fields.array : this; + + assert(t->is_sampler()); + + switch (t->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + return (t->sampler_array) ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; + case GLSL_SAMPLER_DIM_2D: + return (t->sampler_array) ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; + case GLSL_SAMPLER_DIM_3D: + return TEXTURE_3D_INDEX; + case GLSL_SAMPLER_DIM_CUBE: + return (t->sampler_array) ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; + case GLSL_SAMPLER_DIM_RECT: + return TEXTURE_RECT_INDEX; + case GLSL_SAMPLER_DIM_BUF: + return TEXTURE_BUFFER_INDEX; + case GLSL_SAMPLER_DIM_EXTERNAL: + return TEXTURE_EXTERNAL_INDEX; + case GLSL_SAMPLER_DIM_MS: + return (t->sampler_array) ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; + default: + assert(!"Should not get here."); + return TEXTURE_BUFFER_INDEX; + } +} + +bool +glsl_type::contains_image() const +{ + if (this->is_array()) { + return this->fields.array->contains_image(); + } else if (this->is_record()) { + for (unsigned int i = 0; i < this->length; i++) { + if (this->fields.structure[i].type->contains_image()) + return true; + } + return false; + } else { + return this->is_image(); + } +} + +const glsl_type *glsl_type::get_base_type() const +{ + switch (base_type) { + case GLSL_TYPE_UINT: + return uint_type; + case GLSL_TYPE_INT: + return int_type; + case GLSL_TYPE_FLOAT: + return float_type; + case GLSL_TYPE_DOUBLE: + return double_type; + case GLSL_TYPE_BOOL: + return bool_type; + default: + return error_type; + } +} + + +const glsl_type *glsl_type::get_scalar_type() const +{ + const glsl_type *type = this; + + /* Handle arrays */ + while (type->base_type == GLSL_TYPE_ARRAY) + type = type->fields.array; + + /* Handle vectors and matrices */ + switch (type->base_type) { + case GLSL_TYPE_UINT: + return uint_type; + case GLSL_TYPE_INT: + return int_type; + case GLSL_TYPE_FLOAT: + return float_type; + case GLSL_TYPE_DOUBLE: + return double_type; + case GLSL_TYPE_BOOL: + return bool_type; + default: + /* Handle everything else */ + return type; + } +} + + +void +_mesa_glsl_release_types(void) +{ + /* Should only be called during atexit (either when unloading shared + * object, or if process terminates), so no mutex-locking should be + * necessary. + */ + if (glsl_type::array_types != NULL) { + _mesa_hash_table_destroy(glsl_type::array_types, NULL); + glsl_type::array_types = NULL; + } + + if (glsl_type::record_types != NULL) { + _mesa_hash_table_destroy(glsl_type::record_types, NULL); + glsl_type::record_types = NULL; + } + + if (glsl_type::interface_types != NULL) { + _mesa_hash_table_destroy(glsl_type::interface_types, NULL); + glsl_type::interface_types = NULL; + } +} + + +glsl_type::glsl_type(const glsl_type *array, unsigned length) : + base_type(GLSL_TYPE_ARRAY), + sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), + sampler_type(0), interface_packing(0), + vector_elements(0), matrix_columns(0), + length(length), name(NULL) +{ + this->fields.array = array; + /* Inherit the gl type of the base. The GL type is used for + * uniform/statevar handling in Mesa and the arrayness of the type + * is represented by the size rather than the type. + */ + this->gl_type = array->gl_type; + + /* Allow a maximum of 10 characters for the array size. This is enough + * for 32-bits of ~0. The extra 3 are for the '[', ']', and terminating + * NUL. + */ + const unsigned name_length = strlen(array->name) + 10 + 3; + + mtx_lock(&glsl_type::mutex); + char *const n = (char *) ralloc_size(this->mem_ctx, name_length); + mtx_unlock(&glsl_type::mutex); + + if (length == 0) + snprintf(n, name_length, "%s[]", array->name); + else { + /* insert outermost dimensions in the correct spot + * otherwise the dimension order will be backwards + */ + const char *pos = strchr(array->name, '['); + if (pos) { + int idx = pos - array->name; + snprintf(n, idx+1, "%s", array->name); + snprintf(n + idx, name_length - idx, "[%u]%s", + length, array->name + idx); + } else { + snprintf(n, name_length, "%s[%u]", array->name, length); + } + } + + this->name = n; +} + + +const glsl_type * +glsl_type::vec(unsigned components) +{ + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + float_type, vec2_type, vec3_type, vec4_type + }; + return ts[components - 1]; +} + +const glsl_type * +glsl_type::dvec(unsigned components) +{ + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + double_type, dvec2_type, dvec3_type, dvec4_type + }; + return ts[components - 1]; +} + +const glsl_type * +glsl_type::ivec(unsigned components) +{ + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + int_type, ivec2_type, ivec3_type, ivec4_type + }; + return ts[components - 1]; +} + + +const glsl_type * +glsl_type::uvec(unsigned components) +{ + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + uint_type, uvec2_type, uvec3_type, uvec4_type + }; + return ts[components - 1]; +} + + +const glsl_type * +glsl_type::bvec(unsigned components) +{ + if (components == 0 || components > 4) + return error_type; + + static const glsl_type *const ts[] = { + bool_type, bvec2_type, bvec3_type, bvec4_type + }; + return ts[components - 1]; +} + + +const glsl_type * +glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns) +{ + if (base_type == GLSL_TYPE_VOID) + return void_type; + + if ((rows < 1) || (rows > 4) || (columns < 1) || (columns > 4)) + return error_type; + + /* Treat GLSL vectors as Nx1 matrices. + */ + if (columns == 1) { + switch (base_type) { + case GLSL_TYPE_UINT: + return uvec(rows); + case GLSL_TYPE_INT: + return ivec(rows); + case GLSL_TYPE_FLOAT: + return vec(rows); + case GLSL_TYPE_DOUBLE: + return dvec(rows); + case GLSL_TYPE_BOOL: + return bvec(rows); + default: + return error_type; + } + } else { + if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1)) + return error_type; + + /* GLSL matrix types are named mat{COLUMNS}x{ROWS}. Only the following + * combinations are valid: + * + * 1 2 3 4 + * 1 + * 2 x x x + * 3 x x x + * 4 x x x + */ +#define IDX(c,r) (((c-1)*3) + (r-1)) + + if (base_type == GLSL_TYPE_DOUBLE) { + switch (IDX(columns, rows)) { + case IDX(2,2): return dmat2_type; + case IDX(2,3): return dmat2x3_type; + case IDX(2,4): return dmat2x4_type; + case IDX(3,2): return dmat3x2_type; + case IDX(3,3): return dmat3_type; + case IDX(3,4): return dmat3x4_type; + case IDX(4,2): return dmat4x2_type; + case IDX(4,3): return dmat4x3_type; + case IDX(4,4): return dmat4_type; + default: return error_type; + } + } else { + switch (IDX(columns, rows)) { + case IDX(2,2): return mat2_type; + case IDX(2,3): return mat2x3_type; + case IDX(2,4): return mat2x4_type; + case IDX(3,2): return mat3x2_type; + case IDX(3,3): return mat3_type; + case IDX(3,4): return mat3x4_type; + case IDX(4,2): return mat4x2_type; + case IDX(4,3): return mat4x3_type; + case IDX(4,4): return mat4_type; + default: return error_type; + } + } + } + + assert(!"Should not get here."); + return error_type; +} + +const glsl_type * +glsl_type::get_sampler_instance(enum glsl_sampler_dim dim, + bool shadow, + bool array, + glsl_base_type type) +{ + switch (type) { + case GLSL_TYPE_FLOAT: + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + if (shadow) + return (array ? sampler1DArrayShadow_type : sampler1DShadow_type); + else + return (array ? sampler1DArray_type : sampler1D_type); + case GLSL_SAMPLER_DIM_2D: + if (shadow) + return (array ? sampler2DArrayShadow_type : sampler2DShadow_type); + else + return (array ? sampler2DArray_type : sampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (shadow || array) + return error_type; + else + return sampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + if (shadow) + return (array ? samplerCubeArrayShadow_type : samplerCubeShadow_type); + else + return (array ? samplerCubeArray_type : samplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + if (shadow) + return sampler2DRectShadow_type; + else + return sampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (shadow || array) + return error_type; + else + return samplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + if (shadow) + return error_type; + return (array ? sampler2DMSArray_type : sampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + if (shadow || array) + return error_type; + else + return samplerExternalOES_type; + } + case GLSL_TYPE_INT: + if (shadow) + return error_type; + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? isampler1DArray_type : isampler1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? isampler2DArray_type : isampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return isampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? isamplerCubeArray_type : isamplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return isampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return isamplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? isampler2DMSArray_type : isampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + case GLSL_TYPE_UINT: + if (shadow) + return error_type; + switch (dim) { + case GLSL_SAMPLER_DIM_1D: + return (array ? usampler1DArray_type : usampler1D_type); + case GLSL_SAMPLER_DIM_2D: + return (array ? usampler2DArray_type : usampler2D_type); + case GLSL_SAMPLER_DIM_3D: + if (array) + return error_type; + return usampler3D_type; + case GLSL_SAMPLER_DIM_CUBE: + return (array ? usamplerCubeArray_type : usamplerCube_type); + case GLSL_SAMPLER_DIM_RECT: + if (array) + return error_type; + return usampler2DRect_type; + case GLSL_SAMPLER_DIM_BUF: + if (array) + return error_type; + return usamplerBuffer_type; + case GLSL_SAMPLER_DIM_MS: + return (array ? usampler2DMSArray_type : usampler2DMS_type); + case GLSL_SAMPLER_DIM_EXTERNAL: + return error_type; + } + default: + return error_type; + } + + unreachable("switch statement above should be complete"); +} + +const glsl_type * +glsl_type::get_array_instance(const glsl_type *base, unsigned array_size) +{ + /* Generate a name using the base type pointer in the key. This is + * done because the name of the base type may not be unique across + * shaders. For example, two shaders may have different record types + * named 'foo'. + */ + char key[128]; + snprintf(key, sizeof(key), "%p[%u]", (void *) base, array_size); + + mtx_lock(&glsl_type::mutex); + + if (array_types == NULL) { + array_types = _mesa_hash_table_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal); + } + + const struct hash_entry *entry = _mesa_hash_table_search(array_types, key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(base, array_size); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(array_types, + ralloc_strdup(mem_ctx, key), + (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_ARRAY); + assert(((glsl_type *) entry->data)->length == array_size); + assert(((glsl_type *) entry->data)->fields.array == base); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; +} + + +bool +glsl_type::record_compare(const glsl_type *b) const +{ + if (this->length != b->length) + return false; + + if (this->interface_packing != b->interface_packing) + return false; + + /* From the GLSL 4.20 specification (Sec 4.2): + * + * "Structures must have the same name, sequence of type names, and + * type definitions, and field names to be considered the same type." + * + * GLSL ES behaves the same (Ver 1.00 Sec 4.2.4, Ver 3.00 Sec 4.2.5). + * + * Note that we cannot force type name check when comparing unnamed + * structure types, these have a unique name assigned during parsing. + */ + if (!this->is_anonymous() && !b->is_anonymous()) + if (strcmp(this->name, b->name) != 0) + return false; + + for (unsigned i = 0; i < this->length; i++) { + if (this->fields.structure[i].type != b->fields.structure[i].type) + return false; + if (strcmp(this->fields.structure[i].name, + b->fields.structure[i].name) != 0) + return false; + if (this->fields.structure[i].matrix_layout + != b->fields.structure[i].matrix_layout) + return false; + if (this->fields.structure[i].location + != b->fields.structure[i].location) + return false; + if (this->fields.structure[i].interpolation + != b->fields.structure[i].interpolation) + return false; + if (this->fields.structure[i].centroid + != b->fields.structure[i].centroid) + return false; + if (this->fields.structure[i].sample + != b->fields.structure[i].sample) + return false; + if (this->fields.structure[i].patch + != b->fields.structure[i].patch) + return false; + if (this->fields.structure[i].image_read_only + != b->fields.structure[i].image_read_only) + return false; + if (this->fields.structure[i].image_write_only + != b->fields.structure[i].image_write_only) + return false; + if (this->fields.structure[i].image_coherent + != b->fields.structure[i].image_coherent) + return false; + if (this->fields.structure[i].image_volatile + != b->fields.structure[i].image_volatile) + return false; + if (this->fields.structure[i].image_restrict + != b->fields.structure[i].image_restrict) + return false; + if (this->fields.structure[i].precision + != b->fields.structure[i].precision) + return false; + } + + return true; +} + + +bool +glsl_type::record_key_compare(const void *a, const void *b) +{ + const glsl_type *const key1 = (glsl_type *) a; + const glsl_type *const key2 = (glsl_type *) b; + + return strcmp(key1->name, key2->name) == 0 && key1->record_compare(key2); +} + + +/** + * Generate an integer hash value for a glsl_type structure type. + */ +unsigned +glsl_type::record_key_hash(const void *a) +{ + const glsl_type *const key = (glsl_type *) a; + uintptr_t hash = key->length; + unsigned retval; + + for (unsigned i = 0; i < key->length; i++) { + /* casting pointer to uintptr_t */ + hash = (hash * 13 ) + (uintptr_t) key->fields.structure[i].type; + } + + if (sizeof(hash) == 8) + retval = (hash & 0xffffffff) ^ ((uint64_t) hash >> 32); + else + retval = hash; + + return retval; +} + + +const glsl_type * +glsl_type::get_record_instance(const glsl_struct_field *fields, + unsigned num_fields, + const char *name) +{ + const glsl_type key(fields, num_fields, name); + + mtx_lock(&glsl_type::mutex); + + if (record_types == NULL) { + record_types = _mesa_hash_table_create(NULL, record_key_hash, + record_key_compare); + } + + const struct hash_entry *entry = _mesa_hash_table_search(record_types, + &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(fields, num_fields, name); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(record_types, t, (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_STRUCT); + assert(((glsl_type *) entry->data)->length == num_fields); + assert(strcmp(((glsl_type *) entry->data)->name, name) == 0); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; +} + + +const glsl_type * +glsl_type::get_interface_instance(const glsl_struct_field *fields, + unsigned num_fields, + enum glsl_interface_packing packing, + const char *block_name) +{ + const glsl_type key(fields, num_fields, packing, block_name); + + mtx_lock(&glsl_type::mutex); + + if (interface_types == NULL) { + interface_types = _mesa_hash_table_create(NULL, record_key_hash, + record_key_compare); + } + + const struct hash_entry *entry = _mesa_hash_table_search(interface_types, + &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(fields, num_fields, + packing, block_name); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(interface_types, t, (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_INTERFACE); + assert(((glsl_type *) entry->data)->length == num_fields); + assert(strcmp(((glsl_type *) entry->data)->name, block_name) == 0); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; +} + +const glsl_type * +glsl_type::get_subroutine_instance(const char *subroutine_name) +{ + const glsl_type key(subroutine_name); + + mtx_lock(&glsl_type::mutex); + + if (subroutine_types == NULL) { + subroutine_types = _mesa_hash_table_create(NULL, record_key_hash, + record_key_compare); + } + + const struct hash_entry *entry = _mesa_hash_table_search(subroutine_types, + &key); + if (entry == NULL) { + mtx_unlock(&glsl_type::mutex); + const glsl_type *t = new glsl_type(subroutine_name); + mtx_lock(&glsl_type::mutex); + + entry = _mesa_hash_table_insert(subroutine_types, t, (void *) t); + } + + assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE); + assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0); + + mtx_unlock(&glsl_type::mutex); + + return (glsl_type *) entry->data; +} + + +const glsl_type * +glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) +{ + if (type_a == type_b) { + return type_a; + } else if (type_a->is_matrix() && type_b->is_matrix()) { + /* Matrix multiply. The columns of A must match the rows of B. Given + * the other previously tested constraints, this means the vector type + * of a row from A must be the same as the vector type of a column from + * B. + */ + if (type_a->row_type() == type_b->column_type()) { + /* The resulting matrix has the number of columns of matrix B and + * the number of rows of matrix A. We get the row count of A by + * looking at the size of a vector that makes up a column. The + * transpose (size of a row) is done for B. + */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_a->column_type()->vector_elements, + type_b->row_type()->vector_elements); + assert(type != error_type); + + return type; + } + } else if (type_a->is_matrix()) { + /* A is a matrix and B is a column vector. Columns of A must match + * rows of B. Given the other previously tested constraints, this + * means the vector type of a row from A must be the same as the + * vector the type of B. + */ + if (type_a->row_type() == type_b) { + /* The resulting vector has a number of elements equal to + * the number of rows of matrix A. */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_a->column_type()->vector_elements, + 1); + assert(type != error_type); + + return type; + } + } else { + assert(type_b->is_matrix()); + + /* A is a row vector and B is a matrix. Columns of A must match rows + * of B. Given the other previously tested constraints, this means + * the type of A must be the same as the vector type of a column from + * B. + */ + if (type_a == type_b->column_type()) { + /* The resulting vector has a number of elements equal to + * the number of columns of matrix B. */ + const glsl_type *const type = + get_instance(type_a->base_type, + type_b->row_type()->vector_elements, + 1); + assert(type != error_type); + + return type; + } + } + + return error_type; +} + + +const glsl_type * +glsl_type::field_type(const char *name) const +{ + if (this->base_type != GLSL_TYPE_STRUCT + && this->base_type != GLSL_TYPE_INTERFACE) + return error_type; + + for (unsigned i = 0; i < this->length; i++) { + if (strcmp(name, this->fields.structure[i].name) == 0) + return this->fields.structure[i].type; + } + + return error_type; +} + + +int +glsl_type::field_index(const char *name) const +{ + if (this->base_type != GLSL_TYPE_STRUCT + && this->base_type != GLSL_TYPE_INTERFACE) + return -1; + + for (unsigned i = 0; i < this->length; i++) { + if (strcmp(name, this->fields.structure[i].name) == 0) + return i; + } + + return -1; +} + + +unsigned +glsl_type::component_slots() const +{ + switch (this->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return this->components(); + + case GLSL_TYPE_DOUBLE: + return 2 * this->components(); + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->component_slots(); + + return size; + } + + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->component_slots(); + + case GLSL_TYPE_IMAGE: + return 1; + case GLSL_TYPE_SUBROUTINE: + return 1; + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + break; + } + + return 0; +} + +unsigned +glsl_type::record_location_offset(unsigned length) const +{ + unsigned offset = 0; + const glsl_type *t = this->without_array(); + if (t->is_record()) { + assert(length <= t->length); + + for (unsigned i = 0; i < length; i++) { + const glsl_type *st = t->fields.structure[i].type; + const glsl_type *wa = st->without_array(); + if (wa->is_record()) { + unsigned r_offset = wa->record_location_offset(wa->length); + offset += st->is_array() ? + st->arrays_of_arrays_size() * r_offset : r_offset; + } else if (st->is_array() && st->fields.array->is_array()) { + unsigned outer_array_size = st->length; + const glsl_type *base_type = st->fields.array; + + /* For arrays of arrays the outer arrays take up a uniform + * slot for each element. The innermost array elements share a + * single slot so we ignore the innermost array when calculating + * the offset. + */ + while (base_type->fields.array->is_array()) { + outer_array_size = outer_array_size * base_type->length; + base_type = base_type->fields.array; + } + offset += outer_array_size; + } else { + /* We dont worry about arrays here because unless the array + * contains a structure or another array it only takes up a single + * uniform slot. + */ + offset += 1; + } + } + } + return offset; +} + +unsigned +glsl_type::uniform_locations() const +{ + unsigned size = 0; + + switch (this->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_SUBROUTINE: + return 1; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->uniform_locations(); + return size; + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->uniform_locations(); + default: + return 0; + } +} + +bool +glsl_type::can_implicitly_convert_to(const glsl_type *desired, + _mesa_glsl_parse_state *state) const +{ + if (this == desired) + return true; + + /* There is no conversion among matrix types. */ + if (this->matrix_columns > 1 || desired->matrix_columns > 1) + return false; + + /* Vector size must match. */ + if (this->vector_elements != desired->vector_elements) + return false; + + /* int and uint can be converted to float. */ + if (desired->is_float() && this->is_integer()) + return true; + + /* With GLSL 4.0 / ARB_gpu_shader5, int can be converted to uint. + * Note that state may be NULL here, when resolving function calls in the + * linker. By this time, all the state-dependent checks have already + * happened though, so allow anything that's allowed in any shader version. */ + if ((!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable) && + desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT) + return true; + + /* No implicit conversions from double. */ + if ((!state || state->has_double()) && this->is_double()) + return false; + + /* Conversions from different types to double. */ + if ((!state || state->has_double()) && desired->is_double()) { + if (this->is_float()) + return true; + if (this->is_integer()) + return true; + } + + return false; +} + +unsigned +glsl_type::std140_base_alignment(bool row_major) const +{ + unsigned N = is_double() ? 8 : 4; + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_scalar() || this->is_vector()) { + switch (this->vector_elements) { + case 1: + return N; + case 2: + return 2 * N; + case 3: + case 4: + return 4 * N; + } + } + + /* (4) If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single + * array element, according to rules (1), (2), and (3), and rounded up + * to the base alignment of a vec4. The array may have padding at the + * end; the base offset of the member following the array is rounded up + * to the next multiple of the base alignment. + * + * (6) If the member is an array of column-major matrices with + * columns and rows, the matrix is stored identically to a row of + * * column vectors with components each, according to rule + * (4). + * + * (8) If the member is an array of row-major matrices with columns + * and rows, the matrix is stored identically to a row of * + * row vectors with components each, according to rule (4). + * + * (10) If the member is an array of structures, the elements of + * the array are laid out in order, according to rule (9). + */ + if (this->is_array()) { + if (this->fields.array->is_scalar() || + this->fields.array->is_vector() || + this->fields.array->is_matrix()) { + return MAX2(this->fields.array->std140_base_alignment(row_major), 16); + } else { + assert(this->fields.array->is_record() || + this->fields.array->is_array()); + return this->fields.array->std140_base_alignment(row_major); + } + } + + /* (5) If the member is a column-major matrix with columns and + * rows, the matrix is stored identically to an array of + * column vectors with components each, according to + * rule (4). + * + * (7) If the member is a row-major matrix with columns and + * rows, the matrix is stored identically to an array of + * row vectors with components each, according to rule (4). + */ + if (this->is_matrix()) { + const struct glsl_type *vec_type, *array_type; + int c = this->matrix_columns; + int r = this->vector_elements; + + if (row_major) { + vec_type = get_instance(base_type, c, 1); + array_type = glsl_type::get_array_instance(vec_type, r); + } else { + vec_type = get_instance(base_type, r, 1); + array_type = glsl_type::get_array_instance(vec_type, c); + } + + return array_type->std140_base_alignment(false); + } + + /* (9) If the member is a structure, the base alignment of the + * structure is , where is the largest base alignment + * value of any of its members, and rounded up to the base + * alignment of a vec4. The individual members of this + * sub-structure are then assigned offsets by applying this set + * of rules recursively, where the base offset of the first + * member of the sub-structure is equal to the aligned offset + * of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is + * rounded up to the next multiple of the base alignment of the + * structure. + */ + if (this->is_record()) { + unsigned base_alignment = 16; + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + base_alignment = MAX2(base_alignment, + field_type->std140_base_alignment(field_row_major)); + } + return base_alignment; + } + + assert(!"not reached"); + return -1; +} + +unsigned +glsl_type::std140_size(bool row_major) const +{ + unsigned N = is_double() ? 8 : 4; + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_scalar() || this->is_vector()) { + return this->vector_elements * N; + } + + /* (5) If the member is a column-major matrix with columns and + * rows, the matrix is stored identically to an array of + * column vectors with components each, according to + * rule (4). + * + * (6) If the member is an array of column-major matrices with + * columns and rows, the matrix is stored identically to a row of + * * column vectors with components each, according to rule + * (4). + * + * (7) If the member is a row-major matrix with columns and + * rows, the matrix is stored identically to an array of + * row vectors with components each, according to rule (4). + * + * (8) If the member is an array of row-major matrices with columns + * and rows, the matrix is stored identically to a row of * + * row vectors with components each, according to rule (4). + */ + if (this->without_array()->is_matrix()) { + const struct glsl_type *element_type; + const struct glsl_type *vec_type; + unsigned int array_len; + + if (this->is_array()) { + element_type = this->without_array(); + array_len = this->arrays_of_arrays_size(); + } else { + element_type = this; + array_len = 1; + } + + if (row_major) { + vec_type = get_instance(element_type->base_type, + element_type->matrix_columns, 1); + + array_len *= element_type->vector_elements; + } else { + vec_type = get_instance(element_type->base_type, + element_type->vector_elements, 1); + array_len *= element_type->matrix_columns; + } + const glsl_type *array_type = glsl_type::get_array_instance(vec_type, + array_len); + + return array_type->std140_size(false); + } + + /* (4) If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single + * array element, according to rules (1), (2), and (3), and rounded up + * to the base alignment of a vec4. The array may have padding at the + * end; the base offset of the member following the array is rounded up + * to the next multiple of the base alignment. + * + * (10) If the member is an array of structures, the elements of + * the array are laid out in order, according to rule (9). + */ + if (this->is_array()) { + if (this->without_array()->is_record()) { + return this->arrays_of_arrays_size() * + this->without_array()->std140_size(row_major); + } else { + unsigned element_base_align = + this->without_array()->std140_base_alignment(row_major); + return this->arrays_of_arrays_size() * MAX2(element_base_align, 16); + } + } + + /* (9) If the member is a structure, the base alignment of the + * structure is , where is the largest base alignment + * value of any of its members, and rounded up to the base + * alignment of a vec4. The individual members of this + * sub-structure are then assigned offsets by applying this set + * of rules recursively, where the base offset of the first + * member of the sub-structure is equal to the aligned offset + * of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is + * rounded up to the next multiple of the base alignment of the + * structure. + */ + if (this->is_record() || this->is_interface()) { + unsigned size = 0; + unsigned max_align = 0; + + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + unsigned align = field_type->std140_base_alignment(field_row_major); + + /* Ignore unsized arrays when calculating size */ + if (field_type->is_unsized_array()) + continue; + + size = glsl_align(size, align); + size += field_type->std140_size(field_row_major); + + max_align = MAX2(align, max_align); + + if (field_type->is_record() && (i + 1 < this->length)) + size = glsl_align(size, 16); + } + size = glsl_align(size, MAX2(max_align, 16)); + return size; + } + + assert(!"not reached"); + return -1; +} + +unsigned +glsl_type::std430_base_alignment(bool row_major) const +{ + + unsigned N = is_double() ? 8 : 4; + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_scalar() || this->is_vector()) { + switch (this->vector_elements) { + case 1: + return N; + case 2: + return 2 * N; + case 3: + case 4: + return 4 * N; + } + } + + /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout": + * + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures + * in rule 9 are not rounded up a multiple of the base alignment of a vec4. + */ + + /* (1) If the member is a scalar consuming basic machine units, the + * base alignment is . + * + * (2) If the member is a two- or four-component vector with components + * consuming basic machine units, the base alignment is 2 or + * 4, respectively. + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_array()) + return this->fields.array->std430_base_alignment(row_major); + + /* (5) If the member is a column-major matrix with columns and + * rows, the matrix is stored identically to an array of + * column vectors with components each, according to + * rule (4). + * + * (7) If the member is a row-major matrix with columns and + * rows, the matrix is stored identically to an array of + * row vectors with components each, according to rule (4). + */ + if (this->is_matrix()) { + const struct glsl_type *vec_type, *array_type; + int c = this->matrix_columns; + int r = this->vector_elements; + + if (row_major) { + vec_type = get_instance(base_type, c, 1); + array_type = glsl_type::get_array_instance(vec_type, r); + } else { + vec_type = get_instance(base_type, r, 1); + array_type = glsl_type::get_array_instance(vec_type, c); + } + + return array_type->std430_base_alignment(false); + } + + /* (9) If the member is a structure, the base alignment of the + * structure is , where is the largest base alignment + * value of any of its members, and rounded up to the base + * alignment of a vec4. The individual members of this + * sub-structure are then assigned offsets by applying this set + * of rules recursively, where the base offset of the first + * member of the sub-structure is equal to the aligned offset + * of the structure. The structure may have padding at the end; + * the base offset of the member following the sub-structure is + * rounded up to the next multiple of the base alignment of the + * structure. + */ + if (this->is_record()) { + unsigned base_alignment = 0; + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + base_alignment = MAX2(base_alignment, + field_type->std430_base_alignment(field_row_major)); + } + assert(base_alignment > 0); + return base_alignment; + } + assert(!"not reached"); + return -1; +} + +unsigned +glsl_type::std430_array_stride(bool row_major) const +{ + unsigned N = is_double() ? 8 : 4; + + /* Notice that the array stride of a vec3 is not 3 * N but 4 * N. + * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout" + * + * (3) If the member is a three-component vector with components consuming + * basic machine units, the base alignment is 4. + */ + if (this->is_vector() && this->vector_elements == 3) + return 4 * N; + + /* By default use std430_size(row_major) */ + return this->std430_size(row_major); +} + +unsigned +glsl_type::std430_size(bool row_major) const +{ + unsigned N = is_double() ? 8 : 4; + + /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout": + * + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures + * in rule 9 are not rounded up a multiple of the base alignment of a vec4. + */ + if (this->is_scalar() || this->is_vector()) + return this->vector_elements * N; + + if (this->without_array()->is_matrix()) { + const struct glsl_type *element_type; + const struct glsl_type *vec_type; + unsigned int array_len; + + if (this->is_array()) { + element_type = this->without_array(); + array_len = this->arrays_of_arrays_size(); + } else { + element_type = this; + array_len = 1; + } + + if (row_major) { + vec_type = get_instance(element_type->base_type, + element_type->matrix_columns, 1); + + array_len *= element_type->vector_elements; + } else { + vec_type = get_instance(element_type->base_type, + element_type->vector_elements, 1); + array_len *= element_type->matrix_columns; + } + const glsl_type *array_type = glsl_type::get_array_instance(vec_type, + array_len); + + return array_type->std430_size(false); + } + + if (this->is_array()) { + if (this->without_array()->is_record()) + return this->arrays_of_arrays_size() * + this->without_array()->std430_size(row_major); + else + return this->arrays_of_arrays_size() * + this->without_array()->std430_base_alignment(row_major); + } + + if (this->is_record() || this->is_interface()) { + unsigned size = 0; + unsigned max_align = 0; + + for (unsigned i = 0; i < this->length; i++) { + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(this->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + const struct glsl_type *field_type = this->fields.structure[i].type; + unsigned align = field_type->std430_base_alignment(field_row_major); + size = glsl_align(size, align); + size += field_type->std430_size(field_row_major); + + max_align = MAX2(align, max_align); + } + size = glsl_align(size, max_align); + return size; + } + + assert(!"not reached"); + return -1; +} + +unsigned +glsl_type::count_attribute_slots(bool vertex_input_slots) const +{ + /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: + * + * "A scalar input counts the same amount against this limit as a vec4, + * so applications may want to consider packing groups of four + * unrelated float inputs together into a vector to better utilize the + * capabilities of the underlying hardware. A matrix input will use up + * multiple locations. The number of locations used will equal the + * number of columns in the matrix." + * + * The spec does not explicitly say how arrays are counted. However, it + * should be safe to assume the total number of slots consumed by an array + * is the number of entries in the array multiplied by the number of slots + * consumed by a single element of the array. + * + * The spec says nothing about how structs are counted, because vertex + * attributes are not allowed to be (or contain) structs. However, Mesa + * allows varying structs, the number of varying slots taken up by a + * varying struct is simply equal to the sum of the number of slots taken + * up by each element. + * + * Doubles are counted different depending on whether they are vertex + * inputs or everything else. Vertex inputs from ARB_vertex_attrib_64bit + * take one location no matter what size they are, otherwise dvec3/4 + * take two locations. + */ + switch (this->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + return this->matrix_columns; + case GLSL_TYPE_DOUBLE: + if (this->vector_elements > 2 && !vertex_input_slots) + return this->matrix_columns * 2; + else + return this->matrix_columns; + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + + for (unsigned i = 0; i < this->length; i++) + size += this->fields.structure[i].type->count_attribute_slots(vertex_input_slots); + + return size; + } + + case GLSL_TYPE_ARRAY: + return this->length * this->fields.array->count_attribute_slots(vertex_input_slots); + + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_ERROR: + break; + } + + assert(!"Unexpected type in count_attribute_slots()"); + + return 0; +} + +int +glsl_type::coordinate_components() const +{ + int size; + + switch (sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + size = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_MS: + case GLSL_SAMPLER_DIM_EXTERNAL: + size = 2; + break; + case GLSL_SAMPLER_DIM_3D: + case GLSL_SAMPLER_DIM_CUBE: + size = 3; + break; + default: + assert(!"Should not get here."); + size = 1; + break; + } + + /* Array textures need an additional component for the array index, except + * for cubemap array images that behave like a 2D array of interleaved + * cubemap faces. + */ + if (sampler_array && + !(base_type == GLSL_TYPE_IMAGE && + sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE)) + size += 1; + + return size; +} + +/** + * Declarations of type flyweights (glsl_type::_foo_type) and + * convenience pointers (glsl_type::foo_type). + * @{ + */ +#define DECL_TYPE(NAME, ...) \ + const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \ + const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type; + +#define STRUCT_TYPE(NAME) + +#include "compiler/builtin_type_macros.h" +/** @} */ diff --git a/src/compiler/glsl_types.h b/src/compiler/glsl_types.h new file mode 100644 index 00000000000..e63d7945c9f --- /dev/null +++ b/src/compiler/glsl_types.h @@ -0,0 +1,887 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_TYPES_H +#define GLSL_TYPES_H + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct _mesa_glsl_parse_state; +struct glsl_symbol_table; + +extern void +_mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state); + +extern void +_mesa_glsl_release_types(void); + +#ifdef __cplusplus +} +#endif + +enum glsl_base_type { + GLSL_TYPE_UINT = 0, + GLSL_TYPE_INT, + GLSL_TYPE_FLOAT, + GLSL_TYPE_DOUBLE, + GLSL_TYPE_BOOL, + GLSL_TYPE_SAMPLER, + GLSL_TYPE_IMAGE, + GLSL_TYPE_ATOMIC_UINT, + GLSL_TYPE_STRUCT, + GLSL_TYPE_INTERFACE, + GLSL_TYPE_ARRAY, + GLSL_TYPE_VOID, + GLSL_TYPE_SUBROUTINE, + GLSL_TYPE_ERROR +}; + +enum glsl_sampler_dim { + GLSL_SAMPLER_DIM_1D = 0, + GLSL_SAMPLER_DIM_2D, + GLSL_SAMPLER_DIM_3D, + GLSL_SAMPLER_DIM_CUBE, + GLSL_SAMPLER_DIM_RECT, + GLSL_SAMPLER_DIM_BUF, + GLSL_SAMPLER_DIM_EXTERNAL, + GLSL_SAMPLER_DIM_MS +}; + +enum glsl_interface_packing { + GLSL_INTERFACE_PACKING_STD140, + GLSL_INTERFACE_PACKING_SHARED, + GLSL_INTERFACE_PACKING_PACKED, + GLSL_INTERFACE_PACKING_STD430 +}; + +enum glsl_matrix_layout { + /** + * The layout of the matrix is inherited from the object containing the + * matrix (the top level structure or the uniform block). + */ + GLSL_MATRIX_LAYOUT_INHERITED, + + /** + * Explicit column-major layout + * + * If a uniform block doesn't have an explicit layout set, it will default + * to this layout. + */ + GLSL_MATRIX_LAYOUT_COLUMN_MAJOR, + + /** + * Row-major layout + */ + GLSL_MATRIX_LAYOUT_ROW_MAJOR +}; + +enum { + GLSL_PRECISION_NONE = 0, + GLSL_PRECISION_HIGH, + GLSL_PRECISION_MEDIUM, + GLSL_PRECISION_LOW +}; + +#ifdef __cplusplus +#include "GL/gl.h" +#include "util/ralloc.h" +#include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */ + +struct glsl_type { + GLenum gl_type; + glsl_base_type base_type; + + unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */ + unsigned sampler_shadow:1; + unsigned sampler_array:1; + unsigned sampler_type:2; /**< Type of data returned using this + * sampler or image. Only \c + * GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT, + * and \c GLSL_TYPE_UINT are valid. + */ + unsigned interface_packing:2; + + /* Callers of this ralloc-based new need not call delete. It's + * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */ + static void* operator new(size_t size) + { + mtx_lock(&glsl_type::mutex); + + /* mem_ctx should have been created by the static members */ + assert(glsl_type::mem_ctx != NULL); + + void *type; + + type = ralloc_size(glsl_type::mem_ctx, size); + assert(type != NULL); + + mtx_unlock(&glsl_type::mutex); + + return type; + } + + /* If the user *does* call delete, that's OK, we will just + * ralloc_free in that case. */ + static void operator delete(void *type) + { + mtx_lock(&glsl_type::mutex); + ralloc_free(type); + mtx_unlock(&glsl_type::mutex); + } + + /** + * \name Vector and matrix element counts + * + * For scalars, each of these values will be 1. For non-numeric types + * these will be 0. + */ + /*@{*/ + uint8_t vector_elements; /**< 1, 2, 3, or 4 vector elements. */ + uint8_t matrix_columns; /**< 1, 2, 3, or 4 matrix columns. */ + /*@}*/ + + /** + * For \c GLSL_TYPE_ARRAY, this is the length of the array. For + * \c GLSL_TYPE_STRUCT or \c GLSL_TYPE_INTERFACE, it is the number of + * elements in the structure and the number of values pointed to by + * \c fields.structure (below). + */ + unsigned length; + + /** + * Name of the data type + * + * Will never be \c NULL. + */ + const char *name; + + /** + * Subtype of composite data types. + */ + union { + const struct glsl_type *array; /**< Type of array elements. */ + const struct glsl_type *parameters; /**< Parameters to function. */ + struct glsl_struct_field *structure; /**< List of struct fields. */ + } fields; + + /** + * \name Pointers to various public type singletons + */ + /*@{*/ +#undef DECL_TYPE +#define DECL_TYPE(NAME, ...) \ + static const glsl_type *const NAME##_type; +#undef STRUCT_TYPE +#define STRUCT_TYPE(NAME) \ + static const glsl_type *const struct_##NAME##_type; +#include "compiler/builtin_type_macros.h" + /*@}*/ + + /** + * Convenience accessors for vector types (shorter than get_instance()). + * @{ + */ + static const glsl_type *vec(unsigned components); + static const glsl_type *dvec(unsigned components); + static const glsl_type *ivec(unsigned components); + static const glsl_type *uvec(unsigned components); + static const glsl_type *bvec(unsigned components); + /**@}*/ + + /** + * For numeric and boolean derived types returns the basic scalar type + * + * If the type is a numeric or boolean scalar, vector, or matrix type, + * this function gets the scalar type of the individual components. For + * all other types, including arrays of numeric or boolean types, the + * error type is returned. + */ + const glsl_type *get_base_type() const; + + /** + * Get the basic scalar type which this type aggregates. + * + * If the type is a numeric or boolean scalar, vector, or matrix, or an + * array of any of those, this function gets the scalar type of the + * individual components. For structs and arrays of structs, this function + * returns the struct type. For samplers and arrays of samplers, this + * function returns the sampler type. + */ + const glsl_type *get_scalar_type() const; + + /** + * Get the instance of a built-in scalar, vector, or matrix type + */ + static const glsl_type *get_instance(unsigned base_type, unsigned rows, + unsigned columns); + + /** + * Get the instance of a sampler type + */ + static const glsl_type *get_sampler_instance(enum glsl_sampler_dim dim, + bool shadow, + bool array, + glsl_base_type type); + + + /** + * Get the instance of an array type + */ + static const glsl_type *get_array_instance(const glsl_type *base, + unsigned elements); + + /** + * Get the instance of a record type + */ + static const glsl_type *get_record_instance(const glsl_struct_field *fields, + unsigned num_fields, + const char *name); + + /** + * Get the instance of an interface block type + */ + static const glsl_type *get_interface_instance(const glsl_struct_field *fields, + unsigned num_fields, + enum glsl_interface_packing packing, + const char *block_name); + + /** + * Get the instance of an subroutine type + */ + static const glsl_type *get_subroutine_instance(const char *subroutine_name); + + /** + * Get the type resulting from a multiplication of \p type_a * \p type_b + */ + static const glsl_type *get_mul_type(const glsl_type *type_a, + const glsl_type *type_b); + + /** + * Query the total number of scalars that make up a scalar, vector or matrix + */ + unsigned components() const + { + return vector_elements * matrix_columns; + } + + /** + * Calculate the number of components slots required to hold this type + * + * This is used to determine how many uniform or varying locations a type + * might occupy. + */ + unsigned component_slots() const; + + /** + * Calculate offset between the base location of the struct in + * uniform storage and a struct member. + * For the initial call, length is the index of the member to find the + * offset for. + */ + unsigned record_location_offset(unsigned length) const; + + /** + * Calculate the number of unique values from glGetUniformLocation for the + * elements of the type. + * + * This is used to allocate slots in the UniformRemapTable, the amount of + * locations may not match with actual used storage space by the driver. + */ + unsigned uniform_locations() const; + + /** + * Calculate the number of attribute slots required to hold this type + * + * This implements the language rules of GLSL 1.50 for counting the number + * of slots used by a vertex attribute. It also determines the number of + * varying slots the type will use up in the absence of varying packing + * (and thus, it can be used to measure the number of varying slots used by + * the varyings that are generated by lower_packed_varyings). + * + * For vertex shader attributes - doubles only take one slot. + * For inter-shader varyings - dvec3/dvec4 take two slots. + */ + unsigned count_attribute_slots(bool vertex_input_slots) const; + + /** + * Alignment in bytes of the start of this type in a std140 uniform + * block. + */ + unsigned std140_base_alignment(bool row_major) const; + + /** Size in bytes of this type in a std140 uniform block. + * + * Note that this is not GL_UNIFORM_SIZE (which is the number of + * elements in the array) + */ + unsigned std140_size(bool row_major) const; + + /** + * Alignment in bytes of the start of this type in a std430 shader + * storage block. + */ + unsigned std430_base_alignment(bool row_major) const; + + /** + * Calculate array stride in bytes of this type in a std430 shader storage + * block. + */ + unsigned std430_array_stride(bool row_major) const; + + /** + * Size in bytes of this type in a std430 shader storage block. + * + * Note that this is not GL_BUFFER_SIZE + */ + unsigned std430_size(bool row_major) const; + + /** + * \brief Can this type be implicitly converted to another? + * + * \return True if the types are identical or if this type can be converted + * to \c desired according to Section 4.1.10 of the GLSL spec. + * + * \verbatim + * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10 + * Implicit Conversions: + * + * In some situations, an expression and its type will be implicitly + * converted to a different type. The following table shows all allowed + * implicit conversions: + * + * Type of expression | Can be implicitly converted to + * -------------------------------------------------- + * int float + * uint + * + * ivec2 vec2 + * uvec2 + * + * ivec3 vec3 + * uvec3 + * + * ivec4 vec4 + * uvec4 + * + * There are no implicit array or structure conversions. For example, + * an array of int cannot be implicitly converted to an array of float. + * There are no implicit conversions between signed and unsigned + * integers. + * \endverbatim + */ + bool can_implicitly_convert_to(const glsl_type *desired, + _mesa_glsl_parse_state *state) const; + + /** + * Query whether or not a type is a scalar (non-vector and non-matrix). + */ + bool is_scalar() const + { + return (vector_elements == 1) + && (base_type >= GLSL_TYPE_UINT) + && (base_type <= GLSL_TYPE_BOOL); + } + + /** + * Query whether or not a type is a vector + */ + bool is_vector() const + { + return (vector_elements > 1) + && (matrix_columns == 1) + && (base_type >= GLSL_TYPE_UINT) + && (base_type <= GLSL_TYPE_BOOL); + } + + /** + * Query whether or not a type is a matrix + */ + bool is_matrix() const + { + /* GLSL only has float matrices. */ + return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT || base_type == GLSL_TYPE_DOUBLE); + } + + /** + * Query whether or not a type is a non-array numeric type + */ + bool is_numeric() const + { + return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE); + } + + /** + * Query whether or not a type is an integral type + */ + bool is_integer() const + { + return (base_type == GLSL_TYPE_UINT) || (base_type == GLSL_TYPE_INT); + } + + /** + * Query whether or not type is an integral type, or for struct and array + * types, contains an integral type. + */ + bool contains_integer() const; + + /** + * Query whether or not type is a double type, or for struct and array + * types, contains a double type. + */ + bool contains_double() const; + + /** + * Query whether or not a type is a float type + */ + bool is_float() const + { + return base_type == GLSL_TYPE_FLOAT; + } + + /** + * Query whether or not a type is a double type + */ + bool is_double() const + { + return base_type == GLSL_TYPE_DOUBLE; + } + + /** + * Query whether a double takes two slots. + */ + bool is_dual_slot_double() const + { + return base_type == GLSL_TYPE_DOUBLE && vector_elements > 2; + } + + /** + * Query whether or not a type is a non-array boolean type + */ + bool is_boolean() const + { + return base_type == GLSL_TYPE_BOOL; + } + + /** + * Query whether or not a type is a sampler + */ + bool is_sampler() const + { + return base_type == GLSL_TYPE_SAMPLER; + } + + /** + * Query whether or not type is a sampler, or for struct and array + * types, contains a sampler. + */ + bool contains_sampler() const; + + /** + * Get the Mesa texture target index for a sampler type. + */ + gl_texture_index sampler_index() const; + + /** + * Query whether or not type is an image, or for struct and array + * types, contains an image. + */ + bool contains_image() const; + + /** + * Query whether or not a type is an image + */ + bool is_image() const + { + return base_type == GLSL_TYPE_IMAGE; + } + + /** + * Query whether or not a type is an array + */ + bool is_array() const + { + return base_type == GLSL_TYPE_ARRAY; + } + + bool is_array_of_arrays() const + { + return is_array() && fields.array->is_array(); + } + + /** + * Query whether or not a type is a record + */ + bool is_record() const + { + return base_type == GLSL_TYPE_STRUCT; + } + + /** + * Query whether or not a type is an interface + */ + bool is_interface() const + { + return base_type == GLSL_TYPE_INTERFACE; + } + + /** + * Query whether or not a type is the void type singleton. + */ + bool is_void() const + { + return base_type == GLSL_TYPE_VOID; + } + + /** + * Query whether or not a type is the error type singleton. + */ + bool is_error() const + { + return base_type == GLSL_TYPE_ERROR; + } + + /** + * Query if a type is unnamed/anonymous (named by the parser) + */ + + bool is_subroutine() const + { + return base_type == GLSL_TYPE_SUBROUTINE; + } + bool contains_subroutine() const; + + bool is_anonymous() const + { + return !strncmp(name, "#anon", 5); + } + + /** + * Get the type stripped of any arrays + * + * \return + * Pointer to the type of elements of the first non-array type for array + * types, or pointer to itself for non-array types. + */ + const glsl_type *without_array() const + { + const glsl_type *t = this; + + while (t->is_array()) + t = t->fields.array; + + return t; + } + + /** + * Return the total number of elements in an array including the elements + * in arrays of arrays. + */ + unsigned arrays_of_arrays_size() const + { + if (!is_array()) + return 0; + + unsigned size = length; + const glsl_type *base_type = fields.array; + + while (base_type->is_array()) { + size = size * base_type->length; + base_type = base_type->fields.array; + } + return size; + } + + /** + * Return the amount of atomic counter storage required for a type. + */ + unsigned atomic_size() const + { + if (base_type == GLSL_TYPE_ATOMIC_UINT) + return ATOMIC_COUNTER_SIZE; + else if (is_array()) + return length * fields.array->atomic_size(); + else + return 0; + } + + /** + * Return whether a type contains any atomic counters. + */ + bool contains_atomic() const + { + return atomic_size() > 0; + } + + /** + * Return whether a type contains any opaque types. + */ + bool contains_opaque() const; + + /** + * Query the full type of a matrix row + * + * \return + * If the type is not a matrix, \c glsl_type::error_type is returned. + * Otherwise a type matching the rows of the matrix is returned. + */ + const glsl_type *row_type() const + { + return is_matrix() + ? get_instance(base_type, matrix_columns, 1) + : error_type; + } + + /** + * Query the full type of a matrix column + * + * \return + * If the type is not a matrix, \c glsl_type::error_type is returned. + * Otherwise a type matching the columns of the matrix is returned. + */ + const glsl_type *column_type() const + { + return is_matrix() + ? get_instance(base_type, vector_elements, 1) + : error_type; + } + + /** + * Get the type of a structure field + * + * \return + * Pointer to the type of the named field. If the type is not a structure + * or the named field does not exist, \c glsl_type::error_type is returned. + */ + const glsl_type *field_type(const char *name) const; + + /** + * Get the location of a field within a record type + */ + int field_index(const char *name) const; + + /** + * Query the number of elements in an array type + * + * \return + * The number of elements in the array for array types or -1 for non-array + * types. If the number of elements in the array has not yet been declared, + * zero is returned. + */ + int array_size() const + { + return is_array() ? length : -1; + } + + /** + * Query whether the array size for all dimensions has been declared. + */ + bool is_unsized_array() const + { + return is_array() && length == 0; + } + + /** + * Return the number of coordinate components needed for this + * sampler or image type. + * + * This is based purely on the sampler's dimensionality. For example, this + * returns 1 for sampler1D, and 3 for sampler2DArray. + * + * Note that this is often different than actual coordinate type used in + * a texturing built-in function, since those pack additional values (such + * as the shadow comparitor or projector) into the coordinate type. + */ + int coordinate_components() const; + + /** + * Compare a record type against another record type. + * + * This is useful for matching record types declared across shader stages. + */ + bool record_compare(const glsl_type *b) const; + +private: + + static mtx_t mutex; + + /** + * ralloc context for all glsl_type allocations + * + * Set on the first call to \c glsl_type::new. + */ + static void *mem_ctx; + + void init_ralloc_type_ctx(void); + + /** Constructor for vector and matrix types */ + glsl_type(GLenum gl_type, + glsl_base_type base_type, unsigned vector_elements, + unsigned matrix_columns, const char *name); + + /** Constructor for sampler or image types */ + glsl_type(GLenum gl_type, glsl_base_type base_type, + enum glsl_sampler_dim dim, bool shadow, bool array, + unsigned type, const char *name); + + /** Constructor for record types */ + glsl_type(const glsl_struct_field *fields, unsigned num_fields, + const char *name); + + /** Constructor for interface types */ + glsl_type(const glsl_struct_field *fields, unsigned num_fields, + enum glsl_interface_packing packing, const char *name); + + /** Constructor for array types */ + glsl_type(const glsl_type *array, unsigned length); + + /** Constructor for subroutine types */ + glsl_type(const char *name); + + /** Hash table containing the known array types. */ + static struct hash_table *array_types; + + /** Hash table containing the known record types. */ + static struct hash_table *record_types; + + /** Hash table containing the known interface types. */ + static struct hash_table *interface_types; + + /** Hash table containing the known subroutine types. */ + static struct hash_table *subroutine_types; + + static bool record_key_compare(const void *a, const void *b); + static unsigned record_key_hash(const void *key); + + /** + * \name Built-in type flyweights + */ + /*@{*/ +#undef DECL_TYPE +#define DECL_TYPE(NAME, ...) static const glsl_type _##NAME##_type; +#undef STRUCT_TYPE +#define STRUCT_TYPE(NAME) static const glsl_type _struct_##NAME##_type; +#include "compiler/builtin_type_macros.h" + /*@}*/ + + /** + * \name Friend functions. + * + * These functions are friends because they must have C linkage and the + * need to call various private methods or access various private static + * data. + */ + /*@{*/ + friend void _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *); + friend void _mesa_glsl_release_types(void); + /*@}*/ +}; + +struct glsl_struct_field { + const struct glsl_type *type; + const char *name; + + /** + * For interface blocks, gl_varying_slot corresponding to the input/output + * if this is a built-in input/output (i.e. a member of the built-in + * gl_PerVertex interface block); -1 otherwise. + * + * Ignored for structs. + */ + int location; + + /** + * For interface blocks, the interpolation mode (as in + * ir_variable::interpolation). 0 otherwise. + */ + unsigned interpolation:2; + + /** + * For interface blocks, 1 if this variable uses centroid interpolation (as + * in ir_variable::centroid). 0 otherwise. + */ + unsigned centroid:1; + + /** + * For interface blocks, 1 if this variable uses sample interpolation (as + * in ir_variable::sample). 0 otherwise. + */ + unsigned sample:1; + + /** + * Layout of the matrix. Uses glsl_matrix_layout values. + */ + unsigned matrix_layout:2; + + /** + * For interface blocks, 1 if this variable is a per-patch input or output + * (as in ir_variable::patch). 0 otherwise. + */ + unsigned patch:1; + + /** + * Precision qualifier + */ + unsigned precision:2; + + /** + * Image qualifiers, applicable to buffer variables defined in shader + * storage buffer objects (SSBOs) + */ + unsigned image_read_only:1; + unsigned image_write_only:1; + unsigned image_coherent:1; + unsigned image_volatile:1; + unsigned image_restrict:1; + + glsl_struct_field(const struct glsl_type *_type, const char *_name) + : type(_type), name(_name), location(-1), interpolation(0), centroid(0), + sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), + precision(GLSL_PRECISION_NONE) + { + /* empty */ + } + + glsl_struct_field() + { + /* empty */ + } +}; + +static inline unsigned int +glsl_align(unsigned int a, unsigned int align) +{ + return (a + align - 1) / align * align; +} + +#undef DECL_TYPE +#undef STRUCT_TYPE +#endif /* __cplusplus */ + +#endif /* GLSL_TYPES_H */ diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources index 5a576bc3c7e..a3df4c4263d 100644 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@ -18,8 +18,6 @@ NIR_GENERATED_FILES = \ nir/nir_opt_algebraic.c NIR_FILES = \ - nir/glsl_types.cpp \ - nir/glsl_types.h \ nir/nir.c \ nir/nir.h \ nir/nir_array.h \ diff --git a/src/glsl/SConscript b/src/glsl/SConscript index e89d4e0eb01..ef82a9d317a 100644 --- a/src/glsl/SConscript +++ b/src/glsl/SConscript @@ -16,7 +16,6 @@ env.Prepend(CPPPATH = [ '#src/gallium/include', '#src/gallium/auxiliary', '#src/glsl', - '#src/glsl/nir', '#src/glsl/glcpp', ]) @@ -61,12 +60,6 @@ source_lists = env.ParseSourceList('Makefile.sources') for l in ('LIBGLCPP_FILES', 'LIBGLSL_FILES'): glsl_sources += source_lists[l] -# add nir/glsl_types.cpp manually, because SCons still doesn't know about NIR. -# XXX: Remove this once we build NIR and NIR_FILES. -glsl_sources += [ - 'nir/glsl_types.cpp', -] - if env['msvc']: env.Prepend(CPPPATH = ['#/src/getopt']) env.PrependUnique(LIBS = [getopt]) diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp index ca7a9a10c36..f5baeb9ea32 100644 --- a/src/glsl/ast_array_index.cpp +++ b/src/glsl/ast_array_index.cpp @@ -22,7 +22,7 @@ */ #include "ast.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "ir.h" void diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp index e32a588f091..0eb456a2b1f 100644 --- a/src/glsl/ast_function.cpp +++ b/src/glsl/ast_function.cpp @@ -23,7 +23,7 @@ #include "glsl_symbol_table.h" #include "ast.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "ir.h" #include "main/core.h" /* for MIN2 */ #include "main/shaderobj.h" diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index cb57c16f089..dfd31966eb0 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -52,7 +52,7 @@ #include "glsl_symbol_table.h" #include "glsl_parser_extras.h" #include "ast.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "program/hash_table.h" #include "main/shaderobj.h" #include "ir.h" diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp index 4402e2898b2..ee24bd5e411 100644 --- a/src/glsl/builtin_types.cpp +++ b/src/glsl/builtin_types.cpp @@ -34,7 +34,7 @@ * version and set of enabled extensions. */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "glsl_parser_extras.h" #include "util/macros.h" diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy index 2109fb2eedd..99bd0e61d0e 100644 --- a/src/glsl/glsl_parser.yy +++ b/src/glsl/glsl_parser.yy @@ -31,7 +31,7 @@ #include "ast.h" #include "glsl_parser_extras.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "main/context.h" #ifdef _MSC_VER diff --git a/src/glsl/hir_field_selection.cpp b/src/glsl/hir_field_selection.cpp index 92bb4139194..eab08ad8235 100644 --- a/src/glsl/hir_field_selection.cpp +++ b/src/glsl/hir_field_selection.cpp @@ -24,7 +24,7 @@ #include "ir.h" #include "glsl_parser_extras.h" #include "ast.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" ir_rvalue * _mesa_ast_field_selection_to_hir(const ast_expression *expr, diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp index b424edd8e96..de9d314bae4 100644 --- a/src/glsl/ir.cpp +++ b/src/glsl/ir.cpp @@ -23,7 +23,7 @@ #include #include "main/core.h" /* for MAX2 */ #include "ir.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" ir_rvalue::ir_rvalue(enum ir_node_type t) : ir_instruction(t) diff --git a/src/glsl/ir.h b/src/glsl/ir.h index 5b845c6e856..bd7b5506343 100644 --- a/src/glsl/ir.h +++ b/src/glsl/ir.h @@ -30,7 +30,7 @@ #include #include "util/ralloc.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "list.h" #include "ir_visitor.h" #include "ir_hierarchical_visitor.h" diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp index bee60a241e4..0965b0d3719 100644 --- a/src/glsl/ir_clone.cpp +++ b/src/glsl/ir_clone.cpp @@ -24,7 +24,7 @@ #include #include "main/compiler.h" #include "ir.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "program/hash_table.h" ir_rvalue * diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp index c99a8239cbb..fbbf7794da6 100644 --- a/src/glsl/ir_constant_expression.cpp +++ b/src/glsl/ir_constant_expression.cpp @@ -38,7 +38,7 @@ #include "util/rounding.h" /* for _mesa_roundeven */ #include "util/half_float.h" #include "ir.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "program/hash_table.h" static float diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 93034bedb5a..0b4cb4bd30d 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -21,7 +21,7 @@ * DEALINGS IN THE SOFTWARE. */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "ir.h" #include "glsl_parser_extras.h" #include "main/errors.h" diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp index fd7bc2eea98..960b23fe0ed 100644 --- a/src/glsl/ir_print_visitor.cpp +++ b/src/glsl/ir_print_visitor.cpp @@ -22,7 +22,7 @@ */ #include "ir_print_visitor.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "glsl_parser_extras.h" #include "main/macros.h" #include "util/hash_table.h" diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp index 7c0af1b712f..15315aac522 100644 --- a/src/glsl/ir_reader.cpp +++ b/src/glsl/ir_reader.cpp @@ -23,7 +23,7 @@ #include "ir_reader.h" #include "glsl_parser_extras.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "s_expression.h" static const bool debug = false; diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp index 6486838b8b8..6ab6cf02176 100644 --- a/src/glsl/ir_rvalue_visitor.cpp +++ b/src/glsl/ir_rvalue_visitor.cpp @@ -32,7 +32,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_rvalue_visitor.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" ir_visitor_status ir_rvalue_base_visitor::rvalue_visit(ir_expression *ir) diff --git a/src/glsl/ir_set_program_inouts.cpp b/src/glsl/ir_set_program_inouts.cpp index a2dea67c6a9..df06923b870 100644 --- a/src/glsl/ir_set_program_inouts.cpp +++ b/src/glsl/ir_set_program_inouts.cpp @@ -40,7 +40,7 @@ #include "main/core.h" /* for struct gl_program */ #include "ir.h" #include "ir_visitor.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp index 94814799b9b..cad7069bf98 100644 --- a/src/glsl/ir_validate.cpp +++ b/src/glsl/ir_validate.cpp @@ -37,7 +37,7 @@ #include "ir_hierarchical_visitor.h" #include "util/hash_table.h" #include "util/set.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/ir_variable_refcount.cpp b/src/glsl/ir_variable_refcount.cpp index 790627bd1e3..8306be10b9c 100644 --- a/src/glsl/ir_variable_refcount.cpp +++ b/src/glsl/ir_variable_refcount.cpp @@ -32,7 +32,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_variable_refcount.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "util/hash_table.h" ir_variable_refcount_visitor::ir_variable_refcount_visitor() diff --git a/src/glsl/ir_variable_refcount.h b/src/glsl/ir_variable_refcount.h index 5c74c314781..08a11c01495 100644 --- a/src/glsl/ir_variable_refcount.h +++ b/src/glsl/ir_variable_refcount.h @@ -31,7 +31,7 @@ #include "ir.h" #include "ir_visitor.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" struct assignment_entry { exec_node link; diff --git a/src/glsl/loop_analysis.cpp b/src/glsl/loop_analysis.cpp index 21d46ebce53..096a80abb34 100644 --- a/src/glsl/loop_analysis.cpp +++ b/src/glsl/loop_analysis.cpp @@ -21,7 +21,7 @@ * DEALINGS IN THE SOFTWARE. */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "loop_analysis.h" #include "ir_hierarchical_visitor.h" diff --git a/src/glsl/loop_controls.cpp b/src/glsl/loop_controls.cpp index 51804bb5fe8..c717605ec74 100644 --- a/src/glsl/loop_controls.cpp +++ b/src/glsl/loop_controls.cpp @@ -23,7 +23,7 @@ #include #include "main/compiler.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "loop_analysis.h" #include "ir_hierarchical_visitor.h" diff --git a/src/glsl/loop_unroll.cpp b/src/glsl/loop_unroll.cpp index b9ea3507782..aea2743cdb1 100644 --- a/src/glsl/loop_unroll.cpp +++ b/src/glsl/loop_unroll.cpp @@ -21,7 +21,7 @@ * DEALINGS IN THE SOFTWARE. */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "loop_analysis.h" #include "ir_hierarchical_visitor.h" diff --git a/src/glsl/lower_const_arrays_to_uniforms.cpp b/src/glsl/lower_const_arrays_to_uniforms.cpp index 44967dcdb53..2d024d4b78c 100644 --- a/src/glsl/lower_const_arrays_to_uniforms.cpp +++ b/src/glsl/lower_const_arrays_to_uniforms.cpp @@ -40,7 +40,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_rvalue_visitor.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { class lower_const_array_visitor : public ir_rvalue_visitor { diff --git a/src/glsl/lower_discard.cpp b/src/glsl/lower_discard.cpp index b44d2a6d28d..b62eb20dcb4 100644 --- a/src/glsl/lower_discard.cpp +++ b/src/glsl/lower_discard.cpp @@ -105,7 +105,7 @@ * Unconditional discards are treated as having a condition of "true". */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "ir.h" namespace { diff --git a/src/glsl/lower_discard_flow.cpp b/src/glsl/lower_discard_flow.cpp index ee45bf22155..9d0a56b230d 100644 --- a/src/glsl/lower_discard_flow.cpp +++ b/src/glsl/lower_discard_flow.cpp @@ -44,7 +44,7 @@ * interpretation. */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "ir.h" #include "program/hash_table.h" diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp index 3232ce92aab..6a7034794b2 100644 --- a/src/glsl/lower_if_to_cond_assign.cpp +++ b/src/glsl/lower_if_to_cond_assign.cpp @@ -45,7 +45,7 @@ * to attempt to flatten any if-statements appearing at depth > N. */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "ir.h" #include "program/hash_table.h" diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp index 7c2d4d7ce51..1875149b7a6 100644 --- a/src/glsl/lower_instructions.cpp +++ b/src/glsl/lower_instructions.cpp @@ -117,7 +117,7 @@ #include "c99_math.h" #include "program/prog_instruction.h" /* for swizzle */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "ir.h" #include "ir_builder.h" #include "ir_optimization.h" diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp index ec7a0c537ff..3cfa2e00ae8 100644 --- a/src/glsl/lower_jumps.cpp +++ b/src/glsl/lower_jumps.cpp @@ -56,7 +56,7 @@ * prevents further optimization, and thus is not currently performed. */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include #include "ir.h" diff --git a/src/glsl/lower_mat_op_to_vec.cpp b/src/glsl/lower_mat_op_to_vec.cpp index e96cda216dd..266fdc6a250 100644 --- a/src/glsl/lower_mat_op_to_vec.cpp +++ b/src/glsl/lower_mat_op_to_vec.cpp @@ -33,7 +33,7 @@ #include "ir.h" #include "ir_expression_flattening.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/lower_offset_array.cpp b/src/glsl/lower_offset_array.cpp index c30f8014342..96486c3a711 100644 --- a/src/glsl/lower_offset_array.cpp +++ b/src/glsl/lower_offset_array.cpp @@ -31,7 +31,7 @@ * \author Chris Forbes */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "ir.h" #include "ir_builder.h" #include "ir_optimization.h" diff --git a/src/glsl/lower_subroutine.cpp b/src/glsl/lower_subroutine.cpp index ac8ade13d99..e80c1be768a 100644 --- a/src/glsl/lower_subroutine.cpp +++ b/src/glsl/lower_subroutine.cpp @@ -27,7 +27,7 @@ * lowers subroutines to an if ladder. */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "glsl_parser_extras.h" #include "ir.h" #include "ir_builder.h" diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp index a1ba9345e32..278d5450bfb 100644 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ b/src/glsl/lower_variable_index_to_cond_assign.cpp @@ -49,7 +49,7 @@ #include "ir.h" #include "ir_rvalue_visitor.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "main/macros.h" /** diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp index b6238825f8a..784db085924 100644 --- a/src/glsl/lower_vec_index_to_cond_assign.cpp +++ b/src/glsl/lower_vec_index_to_cond_assign.cpp @@ -39,7 +39,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/lower_vec_index_to_swizzle.cpp b/src/glsl/lower_vec_index_to_swizzle.cpp index 4d4d2f17ef6..8b18e95509c 100644 --- a/src/glsl/lower_vec_index_to_swizzle.cpp +++ b/src/glsl/lower_vec_index_to_swizzle.cpp @@ -32,7 +32,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "main/macros.h" /** diff --git a/src/glsl/nir/glsl_types.cpp b/src/glsl/nir/glsl_types.cpp deleted file mode 100644 index 655d3b1fe48..00000000000 --- a/src/glsl/nir/glsl_types.cpp +++ /dev/null @@ -1,1758 +0,0 @@ -/* - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include "main/macros.h" -#include "glsl_parser_extras.h" -#include "glsl_types.h" -#include "util/hash_table.h" - - -mtx_t glsl_type::mutex = _MTX_INITIALIZER_NP; -hash_table *glsl_type::array_types = NULL; -hash_table *glsl_type::record_types = NULL; -hash_table *glsl_type::interface_types = NULL; -hash_table *glsl_type::subroutine_types = NULL; -void *glsl_type::mem_ctx = NULL; - -void -glsl_type::init_ralloc_type_ctx(void) -{ - if (glsl_type::mem_ctx == NULL) { - glsl_type::mem_ctx = ralloc_autofree_context(); - assert(glsl_type::mem_ctx != NULL); - } -} - -glsl_type::glsl_type(GLenum gl_type, - glsl_base_type base_type, unsigned vector_elements, - unsigned matrix_columns, const char *name) : - gl_type(gl_type), - base_type(base_type), - sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing(0), - vector_elements(vector_elements), matrix_columns(matrix_columns), - length(0) -{ - mtx_lock(&glsl_type::mutex); - - init_ralloc_type_ctx(); - assert(name != NULL); - this->name = ralloc_strdup(this->mem_ctx, name); - - mtx_unlock(&glsl_type::mutex); - - /* Neither dimension is zero or both dimensions are zero. - */ - assert((vector_elements == 0) == (matrix_columns == 0)); - memset(& fields, 0, sizeof(fields)); -} - -glsl_type::glsl_type(GLenum gl_type, glsl_base_type base_type, - enum glsl_sampler_dim dim, bool shadow, bool array, - unsigned type, const char *name) : - gl_type(gl_type), - base_type(base_type), - sampler_dimensionality(dim), sampler_shadow(shadow), - sampler_array(array), sampler_type(type), interface_packing(0), - length(0) -{ - mtx_lock(&glsl_type::mutex); - - init_ralloc_type_ctx(); - assert(name != NULL); - this->name = ralloc_strdup(this->mem_ctx, name); - - mtx_unlock(&glsl_type::mutex); - - memset(& fields, 0, sizeof(fields)); - - if (base_type == GLSL_TYPE_SAMPLER) { - /* Samplers take no storage whatsoever. */ - matrix_columns = vector_elements = 0; - } else { - matrix_columns = vector_elements = 1; - } -} - -glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, - const char *name) : - gl_type(0), - base_type(GLSL_TYPE_STRUCT), - sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing(0), - vector_elements(0), matrix_columns(0), - length(num_fields) -{ - unsigned int i; - - mtx_lock(&glsl_type::mutex); - - init_ralloc_type_ctx(); - assert(name != NULL); - this->name = ralloc_strdup(this->mem_ctx, name); - this->fields.structure = ralloc_array(this->mem_ctx, - glsl_struct_field, length); - - for (i = 0; i < length; i++) { - this->fields.structure[i].type = fields[i].type; - this->fields.structure[i].name = ralloc_strdup(this->fields.structure, - fields[i].name); - this->fields.structure[i].location = fields[i].location; - this->fields.structure[i].interpolation = fields[i].interpolation; - this->fields.structure[i].centroid = fields[i].centroid; - this->fields.structure[i].sample = fields[i].sample; - this->fields.structure[i].matrix_layout = fields[i].matrix_layout; - this->fields.structure[i].patch = fields[i].patch; - this->fields.structure[i].image_read_only = fields[i].image_read_only; - this->fields.structure[i].image_write_only = fields[i].image_write_only; - this->fields.structure[i].image_coherent = fields[i].image_coherent; - this->fields.structure[i].image_volatile = fields[i].image_volatile; - this->fields.structure[i].image_restrict = fields[i].image_restrict; - this->fields.structure[i].precision = fields[i].precision; - } - - mtx_unlock(&glsl_type::mutex); -} - -glsl_type::glsl_type(const glsl_struct_field *fields, unsigned num_fields, - enum glsl_interface_packing packing, const char *name) : - gl_type(0), - base_type(GLSL_TYPE_INTERFACE), - sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing((unsigned) packing), - vector_elements(0), matrix_columns(0), - length(num_fields) -{ - unsigned int i; - - mtx_lock(&glsl_type::mutex); - - init_ralloc_type_ctx(); - assert(name != NULL); - this->name = ralloc_strdup(this->mem_ctx, name); - this->fields.structure = ralloc_array(this->mem_ctx, - glsl_struct_field, length); - for (i = 0; i < length; i++) { - this->fields.structure[i].type = fields[i].type; - this->fields.structure[i].name = ralloc_strdup(this->fields.structure, - fields[i].name); - this->fields.structure[i].location = fields[i].location; - this->fields.structure[i].interpolation = fields[i].interpolation; - this->fields.structure[i].centroid = fields[i].centroid; - this->fields.structure[i].sample = fields[i].sample; - this->fields.structure[i].matrix_layout = fields[i].matrix_layout; - this->fields.structure[i].patch = fields[i].patch; - this->fields.structure[i].precision = fields[i].precision; - } - - mtx_unlock(&glsl_type::mutex); -} - -glsl_type::glsl_type(const char *subroutine_name) : - gl_type(0), - base_type(GLSL_TYPE_SUBROUTINE), - sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing(0), - vector_elements(1), matrix_columns(1), - length(0) -{ - mtx_lock(&glsl_type::mutex); - - init_ralloc_type_ctx(); - assert(subroutine_name != NULL); - this->name = ralloc_strdup(this->mem_ctx, subroutine_name); - mtx_unlock(&glsl_type::mutex); -} - -bool -glsl_type::contains_sampler() const -{ - if (this->is_array()) { - return this->fields.array->contains_sampler(); - } else if (this->is_record()) { - for (unsigned int i = 0; i < this->length; i++) { - if (this->fields.structure[i].type->contains_sampler()) - return true; - } - return false; - } else { - return this->is_sampler(); - } -} - - -bool -glsl_type::contains_integer() const -{ - if (this->is_array()) { - return this->fields.array->contains_integer(); - } else if (this->is_record()) { - for (unsigned int i = 0; i < this->length; i++) { - if (this->fields.structure[i].type->contains_integer()) - return true; - } - return false; - } else { - return this->is_integer(); - } -} - -bool -glsl_type::contains_double() const -{ - if (this->is_array()) { - return this->fields.array->contains_double(); - } else if (this->is_record()) { - for (unsigned int i = 0; i < this->length; i++) { - if (this->fields.structure[i].type->contains_double()) - return true; - } - return false; - } else { - return this->is_double(); - } -} - -bool -glsl_type::contains_opaque() const { - switch (base_type) { - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_ATOMIC_UINT: - return true; - case GLSL_TYPE_ARRAY: - return fields.array->contains_opaque(); - case GLSL_TYPE_STRUCT: - for (unsigned int i = 0; i < length; i++) { - if (fields.structure[i].type->contains_opaque()) - return true; - } - return false; - default: - return false; - } -} - -bool -glsl_type::contains_subroutine() const -{ - if (this->is_array()) { - return this->fields.array->contains_subroutine(); - } else if (this->is_record()) { - for (unsigned int i = 0; i < this->length; i++) { - if (this->fields.structure[i].type->contains_subroutine()) - return true; - } - return false; - } else { - return this->is_subroutine(); - } -} - -gl_texture_index -glsl_type::sampler_index() const -{ - const glsl_type *const t = (this->is_array()) ? this->fields.array : this; - - assert(t->is_sampler()); - - switch (t->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: - return (t->sampler_array) ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; - case GLSL_SAMPLER_DIM_2D: - return (t->sampler_array) ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; - case GLSL_SAMPLER_DIM_3D: - return TEXTURE_3D_INDEX; - case GLSL_SAMPLER_DIM_CUBE: - return (t->sampler_array) ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX; - case GLSL_SAMPLER_DIM_RECT: - return TEXTURE_RECT_INDEX; - case GLSL_SAMPLER_DIM_BUF: - return TEXTURE_BUFFER_INDEX; - case GLSL_SAMPLER_DIM_EXTERNAL: - return TEXTURE_EXTERNAL_INDEX; - case GLSL_SAMPLER_DIM_MS: - return (t->sampler_array) ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX; - default: - assert(!"Should not get here."); - return TEXTURE_BUFFER_INDEX; - } -} - -bool -glsl_type::contains_image() const -{ - if (this->is_array()) { - return this->fields.array->contains_image(); - } else if (this->is_record()) { - for (unsigned int i = 0; i < this->length; i++) { - if (this->fields.structure[i].type->contains_image()) - return true; - } - return false; - } else { - return this->is_image(); - } -} - -const glsl_type *glsl_type::get_base_type() const -{ - switch (base_type) { - case GLSL_TYPE_UINT: - return uint_type; - case GLSL_TYPE_INT: - return int_type; - case GLSL_TYPE_FLOAT: - return float_type; - case GLSL_TYPE_DOUBLE: - return double_type; - case GLSL_TYPE_BOOL: - return bool_type; - default: - return error_type; - } -} - - -const glsl_type *glsl_type::get_scalar_type() const -{ - const glsl_type *type = this; - - /* Handle arrays */ - while (type->base_type == GLSL_TYPE_ARRAY) - type = type->fields.array; - - /* Handle vectors and matrices */ - switch (type->base_type) { - case GLSL_TYPE_UINT: - return uint_type; - case GLSL_TYPE_INT: - return int_type; - case GLSL_TYPE_FLOAT: - return float_type; - case GLSL_TYPE_DOUBLE: - return double_type; - case GLSL_TYPE_BOOL: - return bool_type; - default: - /* Handle everything else */ - return type; - } -} - - -void -_mesa_glsl_release_types(void) -{ - /* Should only be called during atexit (either when unloading shared - * object, or if process terminates), so no mutex-locking should be - * necessary. - */ - if (glsl_type::array_types != NULL) { - _mesa_hash_table_destroy(glsl_type::array_types, NULL); - glsl_type::array_types = NULL; - } - - if (glsl_type::record_types != NULL) { - _mesa_hash_table_destroy(glsl_type::record_types, NULL); - glsl_type::record_types = NULL; - } - - if (glsl_type::interface_types != NULL) { - _mesa_hash_table_destroy(glsl_type::interface_types, NULL); - glsl_type::interface_types = NULL; - } -} - - -glsl_type::glsl_type(const glsl_type *array, unsigned length) : - base_type(GLSL_TYPE_ARRAY), - sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), - sampler_type(0), interface_packing(0), - vector_elements(0), matrix_columns(0), - length(length), name(NULL) -{ - this->fields.array = array; - /* Inherit the gl type of the base. The GL type is used for - * uniform/statevar handling in Mesa and the arrayness of the type - * is represented by the size rather than the type. - */ - this->gl_type = array->gl_type; - - /* Allow a maximum of 10 characters for the array size. This is enough - * for 32-bits of ~0. The extra 3 are for the '[', ']', and terminating - * NUL. - */ - const unsigned name_length = strlen(array->name) + 10 + 3; - - mtx_lock(&glsl_type::mutex); - char *const n = (char *) ralloc_size(this->mem_ctx, name_length); - mtx_unlock(&glsl_type::mutex); - - if (length == 0) - snprintf(n, name_length, "%s[]", array->name); - else { - /* insert outermost dimensions in the correct spot - * otherwise the dimension order will be backwards - */ - const char *pos = strchr(array->name, '['); - if (pos) { - int idx = pos - array->name; - snprintf(n, idx+1, "%s", array->name); - snprintf(n + idx, name_length - idx, "[%u]%s", - length, array->name + idx); - } else { - snprintf(n, name_length, "%s[%u]", array->name, length); - } - } - - this->name = n; -} - - -const glsl_type * -glsl_type::vec(unsigned components) -{ - if (components == 0 || components > 4) - return error_type; - - static const glsl_type *const ts[] = { - float_type, vec2_type, vec3_type, vec4_type - }; - return ts[components - 1]; -} - -const glsl_type * -glsl_type::dvec(unsigned components) -{ - if (components == 0 || components > 4) - return error_type; - - static const glsl_type *const ts[] = { - double_type, dvec2_type, dvec3_type, dvec4_type - }; - return ts[components - 1]; -} - -const glsl_type * -glsl_type::ivec(unsigned components) -{ - if (components == 0 || components > 4) - return error_type; - - static const glsl_type *const ts[] = { - int_type, ivec2_type, ivec3_type, ivec4_type - }; - return ts[components - 1]; -} - - -const glsl_type * -glsl_type::uvec(unsigned components) -{ - if (components == 0 || components > 4) - return error_type; - - static const glsl_type *const ts[] = { - uint_type, uvec2_type, uvec3_type, uvec4_type - }; - return ts[components - 1]; -} - - -const glsl_type * -glsl_type::bvec(unsigned components) -{ - if (components == 0 || components > 4) - return error_type; - - static const glsl_type *const ts[] = { - bool_type, bvec2_type, bvec3_type, bvec4_type - }; - return ts[components - 1]; -} - - -const glsl_type * -glsl_type::get_instance(unsigned base_type, unsigned rows, unsigned columns) -{ - if (base_type == GLSL_TYPE_VOID) - return void_type; - - if ((rows < 1) || (rows > 4) || (columns < 1) || (columns > 4)) - return error_type; - - /* Treat GLSL vectors as Nx1 matrices. - */ - if (columns == 1) { - switch (base_type) { - case GLSL_TYPE_UINT: - return uvec(rows); - case GLSL_TYPE_INT: - return ivec(rows); - case GLSL_TYPE_FLOAT: - return vec(rows); - case GLSL_TYPE_DOUBLE: - return dvec(rows); - case GLSL_TYPE_BOOL: - return bvec(rows); - default: - return error_type; - } - } else { - if ((base_type != GLSL_TYPE_FLOAT && base_type != GLSL_TYPE_DOUBLE) || (rows == 1)) - return error_type; - - /* GLSL matrix types are named mat{COLUMNS}x{ROWS}. Only the following - * combinations are valid: - * - * 1 2 3 4 - * 1 - * 2 x x x - * 3 x x x - * 4 x x x - */ -#define IDX(c,r) (((c-1)*3) + (r-1)) - - if (base_type == GLSL_TYPE_DOUBLE) { - switch (IDX(columns, rows)) { - case IDX(2,2): return dmat2_type; - case IDX(2,3): return dmat2x3_type; - case IDX(2,4): return dmat2x4_type; - case IDX(3,2): return dmat3x2_type; - case IDX(3,3): return dmat3_type; - case IDX(3,4): return dmat3x4_type; - case IDX(4,2): return dmat4x2_type; - case IDX(4,3): return dmat4x3_type; - case IDX(4,4): return dmat4_type; - default: return error_type; - } - } else { - switch (IDX(columns, rows)) { - case IDX(2,2): return mat2_type; - case IDX(2,3): return mat2x3_type; - case IDX(2,4): return mat2x4_type; - case IDX(3,2): return mat3x2_type; - case IDX(3,3): return mat3_type; - case IDX(3,4): return mat3x4_type; - case IDX(4,2): return mat4x2_type; - case IDX(4,3): return mat4x3_type; - case IDX(4,4): return mat4_type; - default: return error_type; - } - } - } - - assert(!"Should not get here."); - return error_type; -} - -const glsl_type * -glsl_type::get_sampler_instance(enum glsl_sampler_dim dim, - bool shadow, - bool array, - glsl_base_type type) -{ - switch (type) { - case GLSL_TYPE_FLOAT: - switch (dim) { - case GLSL_SAMPLER_DIM_1D: - if (shadow) - return (array ? sampler1DArrayShadow_type : sampler1DShadow_type); - else - return (array ? sampler1DArray_type : sampler1D_type); - case GLSL_SAMPLER_DIM_2D: - if (shadow) - return (array ? sampler2DArrayShadow_type : sampler2DShadow_type); - else - return (array ? sampler2DArray_type : sampler2D_type); - case GLSL_SAMPLER_DIM_3D: - if (shadow || array) - return error_type; - else - return sampler3D_type; - case GLSL_SAMPLER_DIM_CUBE: - if (shadow) - return (array ? samplerCubeArrayShadow_type : samplerCubeShadow_type); - else - return (array ? samplerCubeArray_type : samplerCube_type); - case GLSL_SAMPLER_DIM_RECT: - if (array) - return error_type; - if (shadow) - return sampler2DRectShadow_type; - else - return sampler2DRect_type; - case GLSL_SAMPLER_DIM_BUF: - if (shadow || array) - return error_type; - else - return samplerBuffer_type; - case GLSL_SAMPLER_DIM_MS: - if (shadow) - return error_type; - return (array ? sampler2DMSArray_type : sampler2DMS_type); - case GLSL_SAMPLER_DIM_EXTERNAL: - if (shadow || array) - return error_type; - else - return samplerExternalOES_type; - } - case GLSL_TYPE_INT: - if (shadow) - return error_type; - switch (dim) { - case GLSL_SAMPLER_DIM_1D: - return (array ? isampler1DArray_type : isampler1D_type); - case GLSL_SAMPLER_DIM_2D: - return (array ? isampler2DArray_type : isampler2D_type); - case GLSL_SAMPLER_DIM_3D: - if (array) - return error_type; - return isampler3D_type; - case GLSL_SAMPLER_DIM_CUBE: - return (array ? isamplerCubeArray_type : isamplerCube_type); - case GLSL_SAMPLER_DIM_RECT: - if (array) - return error_type; - return isampler2DRect_type; - case GLSL_SAMPLER_DIM_BUF: - if (array) - return error_type; - return isamplerBuffer_type; - case GLSL_SAMPLER_DIM_MS: - return (array ? isampler2DMSArray_type : isampler2DMS_type); - case GLSL_SAMPLER_DIM_EXTERNAL: - return error_type; - } - case GLSL_TYPE_UINT: - if (shadow) - return error_type; - switch (dim) { - case GLSL_SAMPLER_DIM_1D: - return (array ? usampler1DArray_type : usampler1D_type); - case GLSL_SAMPLER_DIM_2D: - return (array ? usampler2DArray_type : usampler2D_type); - case GLSL_SAMPLER_DIM_3D: - if (array) - return error_type; - return usampler3D_type; - case GLSL_SAMPLER_DIM_CUBE: - return (array ? usamplerCubeArray_type : usamplerCube_type); - case GLSL_SAMPLER_DIM_RECT: - if (array) - return error_type; - return usampler2DRect_type; - case GLSL_SAMPLER_DIM_BUF: - if (array) - return error_type; - return usamplerBuffer_type; - case GLSL_SAMPLER_DIM_MS: - return (array ? usampler2DMSArray_type : usampler2DMS_type); - case GLSL_SAMPLER_DIM_EXTERNAL: - return error_type; - } - default: - return error_type; - } - - unreachable("switch statement above should be complete"); -} - -const glsl_type * -glsl_type::get_array_instance(const glsl_type *base, unsigned array_size) -{ - /* Generate a name using the base type pointer in the key. This is - * done because the name of the base type may not be unique across - * shaders. For example, two shaders may have different record types - * named 'foo'. - */ - char key[128]; - snprintf(key, sizeof(key), "%p[%u]", (void *) base, array_size); - - mtx_lock(&glsl_type::mutex); - - if (array_types == NULL) { - array_types = _mesa_hash_table_create(NULL, _mesa_key_hash_string, - _mesa_key_string_equal); - } - - const struct hash_entry *entry = _mesa_hash_table_search(array_types, key); - if (entry == NULL) { - mtx_unlock(&glsl_type::mutex); - const glsl_type *t = new glsl_type(base, array_size); - mtx_lock(&glsl_type::mutex); - - entry = _mesa_hash_table_insert(array_types, - ralloc_strdup(mem_ctx, key), - (void *) t); - } - - assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_ARRAY); - assert(((glsl_type *) entry->data)->length == array_size); - assert(((glsl_type *) entry->data)->fields.array == base); - - mtx_unlock(&glsl_type::mutex); - - return (glsl_type *) entry->data; -} - - -bool -glsl_type::record_compare(const glsl_type *b) const -{ - if (this->length != b->length) - return false; - - if (this->interface_packing != b->interface_packing) - return false; - - /* From the GLSL 4.20 specification (Sec 4.2): - * - * "Structures must have the same name, sequence of type names, and - * type definitions, and field names to be considered the same type." - * - * GLSL ES behaves the same (Ver 1.00 Sec 4.2.4, Ver 3.00 Sec 4.2.5). - * - * Note that we cannot force type name check when comparing unnamed - * structure types, these have a unique name assigned during parsing. - */ - if (!this->is_anonymous() && !b->is_anonymous()) - if (strcmp(this->name, b->name) != 0) - return false; - - for (unsigned i = 0; i < this->length; i++) { - if (this->fields.structure[i].type != b->fields.structure[i].type) - return false; - if (strcmp(this->fields.structure[i].name, - b->fields.structure[i].name) != 0) - return false; - if (this->fields.structure[i].matrix_layout - != b->fields.structure[i].matrix_layout) - return false; - if (this->fields.structure[i].location - != b->fields.structure[i].location) - return false; - if (this->fields.structure[i].interpolation - != b->fields.structure[i].interpolation) - return false; - if (this->fields.structure[i].centroid - != b->fields.structure[i].centroid) - return false; - if (this->fields.structure[i].sample - != b->fields.structure[i].sample) - return false; - if (this->fields.structure[i].patch - != b->fields.structure[i].patch) - return false; - if (this->fields.structure[i].image_read_only - != b->fields.structure[i].image_read_only) - return false; - if (this->fields.structure[i].image_write_only - != b->fields.structure[i].image_write_only) - return false; - if (this->fields.structure[i].image_coherent - != b->fields.structure[i].image_coherent) - return false; - if (this->fields.structure[i].image_volatile - != b->fields.structure[i].image_volatile) - return false; - if (this->fields.structure[i].image_restrict - != b->fields.structure[i].image_restrict) - return false; - if (this->fields.structure[i].precision - != b->fields.structure[i].precision) - return false; - } - - return true; -} - - -bool -glsl_type::record_key_compare(const void *a, const void *b) -{ - const glsl_type *const key1 = (glsl_type *) a; - const glsl_type *const key2 = (glsl_type *) b; - - return strcmp(key1->name, key2->name) == 0 && key1->record_compare(key2); -} - - -/** - * Generate an integer hash value for a glsl_type structure type. - */ -unsigned -glsl_type::record_key_hash(const void *a) -{ - const glsl_type *const key = (glsl_type *) a; - uintptr_t hash = key->length; - unsigned retval; - - for (unsigned i = 0; i < key->length; i++) { - /* casting pointer to uintptr_t */ - hash = (hash * 13 ) + (uintptr_t) key->fields.structure[i].type; - } - - if (sizeof(hash) == 8) - retval = (hash & 0xffffffff) ^ ((uint64_t) hash >> 32); - else - retval = hash; - - return retval; -} - - -const glsl_type * -glsl_type::get_record_instance(const glsl_struct_field *fields, - unsigned num_fields, - const char *name) -{ - const glsl_type key(fields, num_fields, name); - - mtx_lock(&glsl_type::mutex); - - if (record_types == NULL) { - record_types = _mesa_hash_table_create(NULL, record_key_hash, - record_key_compare); - } - - const struct hash_entry *entry = _mesa_hash_table_search(record_types, - &key); - if (entry == NULL) { - mtx_unlock(&glsl_type::mutex); - const glsl_type *t = new glsl_type(fields, num_fields, name); - mtx_lock(&glsl_type::mutex); - - entry = _mesa_hash_table_insert(record_types, t, (void *) t); - } - - assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_STRUCT); - assert(((glsl_type *) entry->data)->length == num_fields); - assert(strcmp(((glsl_type *) entry->data)->name, name) == 0); - - mtx_unlock(&glsl_type::mutex); - - return (glsl_type *) entry->data; -} - - -const glsl_type * -glsl_type::get_interface_instance(const glsl_struct_field *fields, - unsigned num_fields, - enum glsl_interface_packing packing, - const char *block_name) -{ - const glsl_type key(fields, num_fields, packing, block_name); - - mtx_lock(&glsl_type::mutex); - - if (interface_types == NULL) { - interface_types = _mesa_hash_table_create(NULL, record_key_hash, - record_key_compare); - } - - const struct hash_entry *entry = _mesa_hash_table_search(interface_types, - &key); - if (entry == NULL) { - mtx_unlock(&glsl_type::mutex); - const glsl_type *t = new glsl_type(fields, num_fields, - packing, block_name); - mtx_lock(&glsl_type::mutex); - - entry = _mesa_hash_table_insert(interface_types, t, (void *) t); - } - - assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_INTERFACE); - assert(((glsl_type *) entry->data)->length == num_fields); - assert(strcmp(((glsl_type *) entry->data)->name, block_name) == 0); - - mtx_unlock(&glsl_type::mutex); - - return (glsl_type *) entry->data; -} - -const glsl_type * -glsl_type::get_subroutine_instance(const char *subroutine_name) -{ - const glsl_type key(subroutine_name); - - mtx_lock(&glsl_type::mutex); - - if (subroutine_types == NULL) { - subroutine_types = _mesa_hash_table_create(NULL, record_key_hash, - record_key_compare); - } - - const struct hash_entry *entry = _mesa_hash_table_search(subroutine_types, - &key); - if (entry == NULL) { - mtx_unlock(&glsl_type::mutex); - const glsl_type *t = new glsl_type(subroutine_name); - mtx_lock(&glsl_type::mutex); - - entry = _mesa_hash_table_insert(subroutine_types, t, (void *) t); - } - - assert(((glsl_type *) entry->data)->base_type == GLSL_TYPE_SUBROUTINE); - assert(strcmp(((glsl_type *) entry->data)->name, subroutine_name) == 0); - - mtx_unlock(&glsl_type::mutex); - - return (glsl_type *) entry->data; -} - - -const glsl_type * -glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b) -{ - if (type_a == type_b) { - return type_a; - } else if (type_a->is_matrix() && type_b->is_matrix()) { - /* Matrix multiply. The columns of A must match the rows of B. Given - * the other previously tested constraints, this means the vector type - * of a row from A must be the same as the vector type of a column from - * B. - */ - if (type_a->row_type() == type_b->column_type()) { - /* The resulting matrix has the number of columns of matrix B and - * the number of rows of matrix A. We get the row count of A by - * looking at the size of a vector that makes up a column. The - * transpose (size of a row) is done for B. - */ - const glsl_type *const type = - get_instance(type_a->base_type, - type_a->column_type()->vector_elements, - type_b->row_type()->vector_elements); - assert(type != error_type); - - return type; - } - } else if (type_a->is_matrix()) { - /* A is a matrix and B is a column vector. Columns of A must match - * rows of B. Given the other previously tested constraints, this - * means the vector type of a row from A must be the same as the - * vector the type of B. - */ - if (type_a->row_type() == type_b) { - /* The resulting vector has a number of elements equal to - * the number of rows of matrix A. */ - const glsl_type *const type = - get_instance(type_a->base_type, - type_a->column_type()->vector_elements, - 1); - assert(type != error_type); - - return type; - } - } else { - assert(type_b->is_matrix()); - - /* A is a row vector and B is a matrix. Columns of A must match rows - * of B. Given the other previously tested constraints, this means - * the type of A must be the same as the vector type of a column from - * B. - */ - if (type_a == type_b->column_type()) { - /* The resulting vector has a number of elements equal to - * the number of columns of matrix B. */ - const glsl_type *const type = - get_instance(type_a->base_type, - type_b->row_type()->vector_elements, - 1); - assert(type != error_type); - - return type; - } - } - - return error_type; -} - - -const glsl_type * -glsl_type::field_type(const char *name) const -{ - if (this->base_type != GLSL_TYPE_STRUCT - && this->base_type != GLSL_TYPE_INTERFACE) - return error_type; - - for (unsigned i = 0; i < this->length; i++) { - if (strcmp(name, this->fields.structure[i].name) == 0) - return this->fields.structure[i].type; - } - - return error_type; -} - - -int -glsl_type::field_index(const char *name) const -{ - if (this->base_type != GLSL_TYPE_STRUCT - && this->base_type != GLSL_TYPE_INTERFACE) - return -1; - - for (unsigned i = 0; i < this->length; i++) { - if (strcmp(name, this->fields.structure[i].name) == 0) - return i; - } - - return -1; -} - - -unsigned -glsl_type::component_slots() const -{ - switch (this->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - return this->components(); - - case GLSL_TYPE_DOUBLE: - return 2 * this->components(); - - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_INTERFACE: { - unsigned size = 0; - - for (unsigned i = 0; i < this->length; i++) - size += this->fields.structure[i].type->component_slots(); - - return size; - } - - case GLSL_TYPE_ARRAY: - return this->length * this->fields.array->component_slots(); - - case GLSL_TYPE_IMAGE: - return 1; - case GLSL_TYPE_SUBROUTINE: - return 1; - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - break; - } - - return 0; -} - -unsigned -glsl_type::record_location_offset(unsigned length) const -{ - unsigned offset = 0; - const glsl_type *t = this->without_array(); - if (t->is_record()) { - assert(length <= t->length); - - for (unsigned i = 0; i < length; i++) { - const glsl_type *st = t->fields.structure[i].type; - const glsl_type *wa = st->without_array(); - if (wa->is_record()) { - unsigned r_offset = wa->record_location_offset(wa->length); - offset += st->is_array() ? - st->arrays_of_arrays_size() * r_offset : r_offset; - } else if (st->is_array() && st->fields.array->is_array()) { - unsigned outer_array_size = st->length; - const glsl_type *base_type = st->fields.array; - - /* For arrays of arrays the outer arrays take up a uniform - * slot for each element. The innermost array elements share a - * single slot so we ignore the innermost array when calculating - * the offset. - */ - while (base_type->fields.array->is_array()) { - outer_array_size = outer_array_size * base_type->length; - base_type = base_type->fields.array; - } - offset += outer_array_size; - } else { - /* We dont worry about arrays here because unless the array - * contains a structure or another array it only takes up a single - * uniform slot. - */ - offset += 1; - } - } - } - return offset; -} - -unsigned -glsl_type::uniform_locations() const -{ - unsigned size = 0; - - switch (this->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_SUBROUTINE: - return 1; - - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_INTERFACE: - for (unsigned i = 0; i < this->length; i++) - size += this->fields.structure[i].type->uniform_locations(); - return size; - case GLSL_TYPE_ARRAY: - return this->length * this->fields.array->uniform_locations(); - default: - return 0; - } -} - -bool -glsl_type::can_implicitly_convert_to(const glsl_type *desired, - _mesa_glsl_parse_state *state) const -{ - if (this == desired) - return true; - - /* There is no conversion among matrix types. */ - if (this->matrix_columns > 1 || desired->matrix_columns > 1) - return false; - - /* Vector size must match. */ - if (this->vector_elements != desired->vector_elements) - return false; - - /* int and uint can be converted to float. */ - if (desired->is_float() && this->is_integer()) - return true; - - /* With GLSL 4.0 / ARB_gpu_shader5, int can be converted to uint. - * Note that state may be NULL here, when resolving function calls in the - * linker. By this time, all the state-dependent checks have already - * happened though, so allow anything that's allowed in any shader version. */ - if ((!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable) && - desired->base_type == GLSL_TYPE_UINT && this->base_type == GLSL_TYPE_INT) - return true; - - /* No implicit conversions from double. */ - if ((!state || state->has_double()) && this->is_double()) - return false; - - /* Conversions from different types to double. */ - if ((!state || state->has_double()) && desired->is_double()) { - if (this->is_float()) - return true; - if (this->is_integer()) - return true; - } - - return false; -} - -unsigned -glsl_type::std140_base_alignment(bool row_major) const -{ - unsigned N = is_double() ? 8 : 4; - - /* (1) If the member is a scalar consuming basic machine units, the - * base alignment is . - * - * (2) If the member is a two- or four-component vector with components - * consuming basic machine units, the base alignment is 2 or - * 4, respectively. - * - * (3) If the member is a three-component vector with components consuming - * basic machine units, the base alignment is 4. - */ - if (this->is_scalar() || this->is_vector()) { - switch (this->vector_elements) { - case 1: - return N; - case 2: - return 2 * N; - case 3: - case 4: - return 4 * N; - } - } - - /* (4) If the member is an array of scalars or vectors, the base alignment - * and array stride are set to match the base alignment of a single - * array element, according to rules (1), (2), and (3), and rounded up - * to the base alignment of a vec4. The array may have padding at the - * end; the base offset of the member following the array is rounded up - * to the next multiple of the base alignment. - * - * (6) If the member is an array of column-major matrices with - * columns and rows, the matrix is stored identically to a row of - * * column vectors with components each, according to rule - * (4). - * - * (8) If the member is an array of row-major matrices with columns - * and rows, the matrix is stored identically to a row of * - * row vectors with components each, according to rule (4). - * - * (10) If the member is an array of structures, the elements of - * the array are laid out in order, according to rule (9). - */ - if (this->is_array()) { - if (this->fields.array->is_scalar() || - this->fields.array->is_vector() || - this->fields.array->is_matrix()) { - return MAX2(this->fields.array->std140_base_alignment(row_major), 16); - } else { - assert(this->fields.array->is_record() || - this->fields.array->is_array()); - return this->fields.array->std140_base_alignment(row_major); - } - } - - /* (5) If the member is a column-major matrix with columns and - * rows, the matrix is stored identically to an array of - * column vectors with components each, according to - * rule (4). - * - * (7) If the member is a row-major matrix with columns and - * rows, the matrix is stored identically to an array of - * row vectors with components each, according to rule (4). - */ - if (this->is_matrix()) { - const struct glsl_type *vec_type, *array_type; - int c = this->matrix_columns; - int r = this->vector_elements; - - if (row_major) { - vec_type = get_instance(base_type, c, 1); - array_type = glsl_type::get_array_instance(vec_type, r); - } else { - vec_type = get_instance(base_type, r, 1); - array_type = glsl_type::get_array_instance(vec_type, c); - } - - return array_type->std140_base_alignment(false); - } - - /* (9) If the member is a structure, the base alignment of the - * structure is , where is the largest base alignment - * value of any of its members, and rounded up to the base - * alignment of a vec4. The individual members of this - * sub-structure are then assigned offsets by applying this set - * of rules recursively, where the base offset of the first - * member of the sub-structure is equal to the aligned offset - * of the structure. The structure may have padding at the end; - * the base offset of the member following the sub-structure is - * rounded up to the next multiple of the base alignment of the - * structure. - */ - if (this->is_record()) { - unsigned base_alignment = 16; - for (unsigned i = 0; i < this->length; i++) { - bool field_row_major = row_major; - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(this->fields.structure[i].matrix_layout); - if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { - field_row_major = true; - } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { - field_row_major = false; - } - - const struct glsl_type *field_type = this->fields.structure[i].type; - base_alignment = MAX2(base_alignment, - field_type->std140_base_alignment(field_row_major)); - } - return base_alignment; - } - - assert(!"not reached"); - return -1; -} - -unsigned -glsl_type::std140_size(bool row_major) const -{ - unsigned N = is_double() ? 8 : 4; - - /* (1) If the member is a scalar consuming basic machine units, the - * base alignment is . - * - * (2) If the member is a two- or four-component vector with components - * consuming basic machine units, the base alignment is 2 or - * 4, respectively. - * - * (3) If the member is a three-component vector with components consuming - * basic machine units, the base alignment is 4. - */ - if (this->is_scalar() || this->is_vector()) { - return this->vector_elements * N; - } - - /* (5) If the member is a column-major matrix with columns and - * rows, the matrix is stored identically to an array of - * column vectors with components each, according to - * rule (4). - * - * (6) If the member is an array of column-major matrices with - * columns and rows, the matrix is stored identically to a row of - * * column vectors with components each, according to rule - * (4). - * - * (7) If the member is a row-major matrix with columns and - * rows, the matrix is stored identically to an array of - * row vectors with components each, according to rule (4). - * - * (8) If the member is an array of row-major matrices with columns - * and rows, the matrix is stored identically to a row of * - * row vectors with components each, according to rule (4). - */ - if (this->without_array()->is_matrix()) { - const struct glsl_type *element_type; - const struct glsl_type *vec_type; - unsigned int array_len; - - if (this->is_array()) { - element_type = this->without_array(); - array_len = this->arrays_of_arrays_size(); - } else { - element_type = this; - array_len = 1; - } - - if (row_major) { - vec_type = get_instance(element_type->base_type, - element_type->matrix_columns, 1); - - array_len *= element_type->vector_elements; - } else { - vec_type = get_instance(element_type->base_type, - element_type->vector_elements, 1); - array_len *= element_type->matrix_columns; - } - const glsl_type *array_type = glsl_type::get_array_instance(vec_type, - array_len); - - return array_type->std140_size(false); - } - - /* (4) If the member is an array of scalars or vectors, the base alignment - * and array stride are set to match the base alignment of a single - * array element, according to rules (1), (2), and (3), and rounded up - * to the base alignment of a vec4. The array may have padding at the - * end; the base offset of the member following the array is rounded up - * to the next multiple of the base alignment. - * - * (10) If the member is an array of structures, the elements of - * the array are laid out in order, according to rule (9). - */ - if (this->is_array()) { - if (this->without_array()->is_record()) { - return this->arrays_of_arrays_size() * - this->without_array()->std140_size(row_major); - } else { - unsigned element_base_align = - this->without_array()->std140_base_alignment(row_major); - return this->arrays_of_arrays_size() * MAX2(element_base_align, 16); - } - } - - /* (9) If the member is a structure, the base alignment of the - * structure is , where is the largest base alignment - * value of any of its members, and rounded up to the base - * alignment of a vec4. The individual members of this - * sub-structure are then assigned offsets by applying this set - * of rules recursively, where the base offset of the first - * member of the sub-structure is equal to the aligned offset - * of the structure. The structure may have padding at the end; - * the base offset of the member following the sub-structure is - * rounded up to the next multiple of the base alignment of the - * structure. - */ - if (this->is_record() || this->is_interface()) { - unsigned size = 0; - unsigned max_align = 0; - - for (unsigned i = 0; i < this->length; i++) { - bool field_row_major = row_major; - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(this->fields.structure[i].matrix_layout); - if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { - field_row_major = true; - } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { - field_row_major = false; - } - - const struct glsl_type *field_type = this->fields.structure[i].type; - unsigned align = field_type->std140_base_alignment(field_row_major); - - /* Ignore unsized arrays when calculating size */ - if (field_type->is_unsized_array()) - continue; - - size = glsl_align(size, align); - size += field_type->std140_size(field_row_major); - - max_align = MAX2(align, max_align); - - if (field_type->is_record() && (i + 1 < this->length)) - size = glsl_align(size, 16); - } - size = glsl_align(size, MAX2(max_align, 16)); - return size; - } - - assert(!"not reached"); - return -1; -} - -unsigned -glsl_type::std430_base_alignment(bool row_major) const -{ - - unsigned N = is_double() ? 8 : 4; - - /* (1) If the member is a scalar consuming basic machine units, the - * base alignment is . - * - * (2) If the member is a two- or four-component vector with components - * consuming basic machine units, the base alignment is 2 or - * 4, respectively. - * - * (3) If the member is a three-component vector with components consuming - * basic machine units, the base alignment is 4. - */ - if (this->is_scalar() || this->is_vector()) { - switch (this->vector_elements) { - case 1: - return N; - case 2: - return 2 * N; - case 3: - case 4: - return 4 * N; - } - } - - /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout": - * - * "When using the std430 storage layout, shader storage blocks will be - * laid out in buffer storage identically to uniform and shader storage - * blocks using the std140 layout, except that the base alignment and - * stride of arrays of scalars and vectors in rule 4 and of structures - * in rule 9 are not rounded up a multiple of the base alignment of a vec4. - */ - - /* (1) If the member is a scalar consuming basic machine units, the - * base alignment is . - * - * (2) If the member is a two- or four-component vector with components - * consuming basic machine units, the base alignment is 2 or - * 4, respectively. - * - * (3) If the member is a three-component vector with components consuming - * basic machine units, the base alignment is 4. - */ - if (this->is_array()) - return this->fields.array->std430_base_alignment(row_major); - - /* (5) If the member is a column-major matrix with columns and - * rows, the matrix is stored identically to an array of - * column vectors with components each, according to - * rule (4). - * - * (7) If the member is a row-major matrix with columns and - * rows, the matrix is stored identically to an array of - * row vectors with components each, according to rule (4). - */ - if (this->is_matrix()) { - const struct glsl_type *vec_type, *array_type; - int c = this->matrix_columns; - int r = this->vector_elements; - - if (row_major) { - vec_type = get_instance(base_type, c, 1); - array_type = glsl_type::get_array_instance(vec_type, r); - } else { - vec_type = get_instance(base_type, r, 1); - array_type = glsl_type::get_array_instance(vec_type, c); - } - - return array_type->std430_base_alignment(false); - } - - /* (9) If the member is a structure, the base alignment of the - * structure is , where is the largest base alignment - * value of any of its members, and rounded up to the base - * alignment of a vec4. The individual members of this - * sub-structure are then assigned offsets by applying this set - * of rules recursively, where the base offset of the first - * member of the sub-structure is equal to the aligned offset - * of the structure. The structure may have padding at the end; - * the base offset of the member following the sub-structure is - * rounded up to the next multiple of the base alignment of the - * structure. - */ - if (this->is_record()) { - unsigned base_alignment = 0; - for (unsigned i = 0; i < this->length; i++) { - bool field_row_major = row_major; - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(this->fields.structure[i].matrix_layout); - if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { - field_row_major = true; - } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { - field_row_major = false; - } - - const struct glsl_type *field_type = this->fields.structure[i].type; - base_alignment = MAX2(base_alignment, - field_type->std430_base_alignment(field_row_major)); - } - assert(base_alignment > 0); - return base_alignment; - } - assert(!"not reached"); - return -1; -} - -unsigned -glsl_type::std430_array_stride(bool row_major) const -{ - unsigned N = is_double() ? 8 : 4; - - /* Notice that the array stride of a vec3 is not 3 * N but 4 * N. - * See OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout" - * - * (3) If the member is a three-component vector with components consuming - * basic machine units, the base alignment is 4. - */ - if (this->is_vector() && this->vector_elements == 3) - return 4 * N; - - /* By default use std430_size(row_major) */ - return this->std430_size(row_major); -} - -unsigned -glsl_type::std430_size(bool row_major) const -{ - unsigned N = is_double() ? 8 : 4; - - /* OpenGL 4.30 spec, section 7.6.2.2 "Standard Uniform Block Layout": - * - * "When using the std430 storage layout, shader storage blocks will be - * laid out in buffer storage identically to uniform and shader storage - * blocks using the std140 layout, except that the base alignment and - * stride of arrays of scalars and vectors in rule 4 and of structures - * in rule 9 are not rounded up a multiple of the base alignment of a vec4. - */ - if (this->is_scalar() || this->is_vector()) - return this->vector_elements * N; - - if (this->without_array()->is_matrix()) { - const struct glsl_type *element_type; - const struct glsl_type *vec_type; - unsigned int array_len; - - if (this->is_array()) { - element_type = this->without_array(); - array_len = this->arrays_of_arrays_size(); - } else { - element_type = this; - array_len = 1; - } - - if (row_major) { - vec_type = get_instance(element_type->base_type, - element_type->matrix_columns, 1); - - array_len *= element_type->vector_elements; - } else { - vec_type = get_instance(element_type->base_type, - element_type->vector_elements, 1); - array_len *= element_type->matrix_columns; - } - const glsl_type *array_type = glsl_type::get_array_instance(vec_type, - array_len); - - return array_type->std430_size(false); - } - - if (this->is_array()) { - if (this->without_array()->is_record()) - return this->arrays_of_arrays_size() * - this->without_array()->std430_size(row_major); - else - return this->arrays_of_arrays_size() * - this->without_array()->std430_base_alignment(row_major); - } - - if (this->is_record() || this->is_interface()) { - unsigned size = 0; - unsigned max_align = 0; - - for (unsigned i = 0; i < this->length; i++) { - bool field_row_major = row_major; - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(this->fields.structure[i].matrix_layout); - if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { - field_row_major = true; - } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { - field_row_major = false; - } - - const struct glsl_type *field_type = this->fields.structure[i].type; - unsigned align = field_type->std430_base_alignment(field_row_major); - size = glsl_align(size, align); - size += field_type->std430_size(field_row_major); - - max_align = MAX2(align, max_align); - } - size = glsl_align(size, max_align); - return size; - } - - assert(!"not reached"); - return -1; -} - -unsigned -glsl_type::count_attribute_slots(bool vertex_input_slots) const -{ - /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: - * - * "A scalar input counts the same amount against this limit as a vec4, - * so applications may want to consider packing groups of four - * unrelated float inputs together into a vector to better utilize the - * capabilities of the underlying hardware. A matrix input will use up - * multiple locations. The number of locations used will equal the - * number of columns in the matrix." - * - * The spec does not explicitly say how arrays are counted. However, it - * should be safe to assume the total number of slots consumed by an array - * is the number of entries in the array multiplied by the number of slots - * consumed by a single element of the array. - * - * The spec says nothing about how structs are counted, because vertex - * attributes are not allowed to be (or contain) structs. However, Mesa - * allows varying structs, the number of varying slots taken up by a - * varying struct is simply equal to the sum of the number of slots taken - * up by each element. - * - * Doubles are counted different depending on whether they are vertex - * inputs or everything else. Vertex inputs from ARB_vertex_attrib_64bit - * take one location no matter what size they are, otherwise dvec3/4 - * take two locations. - */ - switch (this->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - return this->matrix_columns; - case GLSL_TYPE_DOUBLE: - if (this->vector_elements > 2 && !vertex_input_slots) - return this->matrix_columns * 2; - else - return this->matrix_columns; - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_INTERFACE: { - unsigned size = 0; - - for (unsigned i = 0; i < this->length; i++) - size += this->fields.structure[i].type->count_attribute_slots(vertex_input_slots); - - return size; - } - - case GLSL_TYPE_ARRAY: - return this->length * this->fields.array->count_attribute_slots(vertex_input_slots); - - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_VOID: - case GLSL_TYPE_SUBROUTINE: - case GLSL_TYPE_ERROR: - break; - } - - assert(!"Unexpected type in count_attribute_slots()"); - - return 0; -} - -int -glsl_type::coordinate_components() const -{ - int size; - - switch (sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: - case GLSL_SAMPLER_DIM_BUF: - size = 1; - break; - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_MS: - case GLSL_SAMPLER_DIM_EXTERNAL: - size = 2; - break; - case GLSL_SAMPLER_DIM_3D: - case GLSL_SAMPLER_DIM_CUBE: - size = 3; - break; - default: - assert(!"Should not get here."); - size = 1; - break; - } - - /* Array textures need an additional component for the array index, except - * for cubemap array images that behave like a 2D array of interleaved - * cubemap faces. - */ - if (sampler_array && - !(base_type == GLSL_TYPE_IMAGE && - sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE)) - size += 1; - - return size; -} - -/** - * Declarations of type flyweights (glsl_type::_foo_type) and - * convenience pointers (glsl_type::foo_type). - * @{ - */ -#define DECL_TYPE(NAME, ...) \ - const glsl_type glsl_type::_##NAME##_type = glsl_type(__VA_ARGS__, #NAME); \ - const glsl_type *const glsl_type::NAME##_type = &glsl_type::_##NAME##_type; - -#define STRUCT_TYPE(NAME) - -#include "compiler/builtin_type_macros.h" -/** @} */ diff --git a/src/glsl/nir/glsl_types.h b/src/glsl/nir/glsl_types.h deleted file mode 100644 index e63d7945c9f..00000000000 --- a/src/glsl/nir/glsl_types.h +++ /dev/null @@ -1,887 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef GLSL_TYPES_H -#define GLSL_TYPES_H - -#include -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct _mesa_glsl_parse_state; -struct glsl_symbol_table; - -extern void -_mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state); - -extern void -_mesa_glsl_release_types(void); - -#ifdef __cplusplus -} -#endif - -enum glsl_base_type { - GLSL_TYPE_UINT = 0, - GLSL_TYPE_INT, - GLSL_TYPE_FLOAT, - GLSL_TYPE_DOUBLE, - GLSL_TYPE_BOOL, - GLSL_TYPE_SAMPLER, - GLSL_TYPE_IMAGE, - GLSL_TYPE_ATOMIC_UINT, - GLSL_TYPE_STRUCT, - GLSL_TYPE_INTERFACE, - GLSL_TYPE_ARRAY, - GLSL_TYPE_VOID, - GLSL_TYPE_SUBROUTINE, - GLSL_TYPE_ERROR -}; - -enum glsl_sampler_dim { - GLSL_SAMPLER_DIM_1D = 0, - GLSL_SAMPLER_DIM_2D, - GLSL_SAMPLER_DIM_3D, - GLSL_SAMPLER_DIM_CUBE, - GLSL_SAMPLER_DIM_RECT, - GLSL_SAMPLER_DIM_BUF, - GLSL_SAMPLER_DIM_EXTERNAL, - GLSL_SAMPLER_DIM_MS -}; - -enum glsl_interface_packing { - GLSL_INTERFACE_PACKING_STD140, - GLSL_INTERFACE_PACKING_SHARED, - GLSL_INTERFACE_PACKING_PACKED, - GLSL_INTERFACE_PACKING_STD430 -}; - -enum glsl_matrix_layout { - /** - * The layout of the matrix is inherited from the object containing the - * matrix (the top level structure or the uniform block). - */ - GLSL_MATRIX_LAYOUT_INHERITED, - - /** - * Explicit column-major layout - * - * If a uniform block doesn't have an explicit layout set, it will default - * to this layout. - */ - GLSL_MATRIX_LAYOUT_COLUMN_MAJOR, - - /** - * Row-major layout - */ - GLSL_MATRIX_LAYOUT_ROW_MAJOR -}; - -enum { - GLSL_PRECISION_NONE = 0, - GLSL_PRECISION_HIGH, - GLSL_PRECISION_MEDIUM, - GLSL_PRECISION_LOW -}; - -#ifdef __cplusplus -#include "GL/gl.h" -#include "util/ralloc.h" -#include "main/mtypes.h" /* for gl_texture_index, C++'s enum rules are broken */ - -struct glsl_type { - GLenum gl_type; - glsl_base_type base_type; - - unsigned sampler_dimensionality:3; /**< \see glsl_sampler_dim */ - unsigned sampler_shadow:1; - unsigned sampler_array:1; - unsigned sampler_type:2; /**< Type of data returned using this - * sampler or image. Only \c - * GLSL_TYPE_FLOAT, \c GLSL_TYPE_INT, - * and \c GLSL_TYPE_UINT are valid. - */ - unsigned interface_packing:2; - - /* Callers of this ralloc-based new need not call delete. It's - * easier to just ralloc_free 'mem_ctx' (or any of its ancestors). */ - static void* operator new(size_t size) - { - mtx_lock(&glsl_type::mutex); - - /* mem_ctx should have been created by the static members */ - assert(glsl_type::mem_ctx != NULL); - - void *type; - - type = ralloc_size(glsl_type::mem_ctx, size); - assert(type != NULL); - - mtx_unlock(&glsl_type::mutex); - - return type; - } - - /* If the user *does* call delete, that's OK, we will just - * ralloc_free in that case. */ - static void operator delete(void *type) - { - mtx_lock(&glsl_type::mutex); - ralloc_free(type); - mtx_unlock(&glsl_type::mutex); - } - - /** - * \name Vector and matrix element counts - * - * For scalars, each of these values will be 1. For non-numeric types - * these will be 0. - */ - /*@{*/ - uint8_t vector_elements; /**< 1, 2, 3, or 4 vector elements. */ - uint8_t matrix_columns; /**< 1, 2, 3, or 4 matrix columns. */ - /*@}*/ - - /** - * For \c GLSL_TYPE_ARRAY, this is the length of the array. For - * \c GLSL_TYPE_STRUCT or \c GLSL_TYPE_INTERFACE, it is the number of - * elements in the structure and the number of values pointed to by - * \c fields.structure (below). - */ - unsigned length; - - /** - * Name of the data type - * - * Will never be \c NULL. - */ - const char *name; - - /** - * Subtype of composite data types. - */ - union { - const struct glsl_type *array; /**< Type of array elements. */ - const struct glsl_type *parameters; /**< Parameters to function. */ - struct glsl_struct_field *structure; /**< List of struct fields. */ - } fields; - - /** - * \name Pointers to various public type singletons - */ - /*@{*/ -#undef DECL_TYPE -#define DECL_TYPE(NAME, ...) \ - static const glsl_type *const NAME##_type; -#undef STRUCT_TYPE -#define STRUCT_TYPE(NAME) \ - static const glsl_type *const struct_##NAME##_type; -#include "compiler/builtin_type_macros.h" - /*@}*/ - - /** - * Convenience accessors for vector types (shorter than get_instance()). - * @{ - */ - static const glsl_type *vec(unsigned components); - static const glsl_type *dvec(unsigned components); - static const glsl_type *ivec(unsigned components); - static const glsl_type *uvec(unsigned components); - static const glsl_type *bvec(unsigned components); - /**@}*/ - - /** - * For numeric and boolean derived types returns the basic scalar type - * - * If the type is a numeric or boolean scalar, vector, or matrix type, - * this function gets the scalar type of the individual components. For - * all other types, including arrays of numeric or boolean types, the - * error type is returned. - */ - const glsl_type *get_base_type() const; - - /** - * Get the basic scalar type which this type aggregates. - * - * If the type is a numeric or boolean scalar, vector, or matrix, or an - * array of any of those, this function gets the scalar type of the - * individual components. For structs and arrays of structs, this function - * returns the struct type. For samplers and arrays of samplers, this - * function returns the sampler type. - */ - const glsl_type *get_scalar_type() const; - - /** - * Get the instance of a built-in scalar, vector, or matrix type - */ - static const glsl_type *get_instance(unsigned base_type, unsigned rows, - unsigned columns); - - /** - * Get the instance of a sampler type - */ - static const glsl_type *get_sampler_instance(enum glsl_sampler_dim dim, - bool shadow, - bool array, - glsl_base_type type); - - - /** - * Get the instance of an array type - */ - static const glsl_type *get_array_instance(const glsl_type *base, - unsigned elements); - - /** - * Get the instance of a record type - */ - static const glsl_type *get_record_instance(const glsl_struct_field *fields, - unsigned num_fields, - const char *name); - - /** - * Get the instance of an interface block type - */ - static const glsl_type *get_interface_instance(const glsl_struct_field *fields, - unsigned num_fields, - enum glsl_interface_packing packing, - const char *block_name); - - /** - * Get the instance of an subroutine type - */ - static const glsl_type *get_subroutine_instance(const char *subroutine_name); - - /** - * Get the type resulting from a multiplication of \p type_a * \p type_b - */ - static const glsl_type *get_mul_type(const glsl_type *type_a, - const glsl_type *type_b); - - /** - * Query the total number of scalars that make up a scalar, vector or matrix - */ - unsigned components() const - { - return vector_elements * matrix_columns; - } - - /** - * Calculate the number of components slots required to hold this type - * - * This is used to determine how many uniform or varying locations a type - * might occupy. - */ - unsigned component_slots() const; - - /** - * Calculate offset between the base location of the struct in - * uniform storage and a struct member. - * For the initial call, length is the index of the member to find the - * offset for. - */ - unsigned record_location_offset(unsigned length) const; - - /** - * Calculate the number of unique values from glGetUniformLocation for the - * elements of the type. - * - * This is used to allocate slots in the UniformRemapTable, the amount of - * locations may not match with actual used storage space by the driver. - */ - unsigned uniform_locations() const; - - /** - * Calculate the number of attribute slots required to hold this type - * - * This implements the language rules of GLSL 1.50 for counting the number - * of slots used by a vertex attribute. It also determines the number of - * varying slots the type will use up in the absence of varying packing - * (and thus, it can be used to measure the number of varying slots used by - * the varyings that are generated by lower_packed_varyings). - * - * For vertex shader attributes - doubles only take one slot. - * For inter-shader varyings - dvec3/dvec4 take two slots. - */ - unsigned count_attribute_slots(bool vertex_input_slots) const; - - /** - * Alignment in bytes of the start of this type in a std140 uniform - * block. - */ - unsigned std140_base_alignment(bool row_major) const; - - /** Size in bytes of this type in a std140 uniform block. - * - * Note that this is not GL_UNIFORM_SIZE (which is the number of - * elements in the array) - */ - unsigned std140_size(bool row_major) const; - - /** - * Alignment in bytes of the start of this type in a std430 shader - * storage block. - */ - unsigned std430_base_alignment(bool row_major) const; - - /** - * Calculate array stride in bytes of this type in a std430 shader storage - * block. - */ - unsigned std430_array_stride(bool row_major) const; - - /** - * Size in bytes of this type in a std430 shader storage block. - * - * Note that this is not GL_BUFFER_SIZE - */ - unsigned std430_size(bool row_major) const; - - /** - * \brief Can this type be implicitly converted to another? - * - * \return True if the types are identical or if this type can be converted - * to \c desired according to Section 4.1.10 of the GLSL spec. - * - * \verbatim - * From page 25 (31 of the pdf) of the GLSL 1.50 spec, Section 4.1.10 - * Implicit Conversions: - * - * In some situations, an expression and its type will be implicitly - * converted to a different type. The following table shows all allowed - * implicit conversions: - * - * Type of expression | Can be implicitly converted to - * -------------------------------------------------- - * int float - * uint - * - * ivec2 vec2 - * uvec2 - * - * ivec3 vec3 - * uvec3 - * - * ivec4 vec4 - * uvec4 - * - * There are no implicit array or structure conversions. For example, - * an array of int cannot be implicitly converted to an array of float. - * There are no implicit conversions between signed and unsigned - * integers. - * \endverbatim - */ - bool can_implicitly_convert_to(const glsl_type *desired, - _mesa_glsl_parse_state *state) const; - - /** - * Query whether or not a type is a scalar (non-vector and non-matrix). - */ - bool is_scalar() const - { - return (vector_elements == 1) - && (base_type >= GLSL_TYPE_UINT) - && (base_type <= GLSL_TYPE_BOOL); - } - - /** - * Query whether or not a type is a vector - */ - bool is_vector() const - { - return (vector_elements > 1) - && (matrix_columns == 1) - && (base_type >= GLSL_TYPE_UINT) - && (base_type <= GLSL_TYPE_BOOL); - } - - /** - * Query whether or not a type is a matrix - */ - bool is_matrix() const - { - /* GLSL only has float matrices. */ - return (matrix_columns > 1) && (base_type == GLSL_TYPE_FLOAT || base_type == GLSL_TYPE_DOUBLE); - } - - /** - * Query whether or not a type is a non-array numeric type - */ - bool is_numeric() const - { - return (base_type >= GLSL_TYPE_UINT) && (base_type <= GLSL_TYPE_DOUBLE); - } - - /** - * Query whether or not a type is an integral type - */ - bool is_integer() const - { - return (base_type == GLSL_TYPE_UINT) || (base_type == GLSL_TYPE_INT); - } - - /** - * Query whether or not type is an integral type, or for struct and array - * types, contains an integral type. - */ - bool contains_integer() const; - - /** - * Query whether or not type is a double type, or for struct and array - * types, contains a double type. - */ - bool contains_double() const; - - /** - * Query whether or not a type is a float type - */ - bool is_float() const - { - return base_type == GLSL_TYPE_FLOAT; - } - - /** - * Query whether or not a type is a double type - */ - bool is_double() const - { - return base_type == GLSL_TYPE_DOUBLE; - } - - /** - * Query whether a double takes two slots. - */ - bool is_dual_slot_double() const - { - return base_type == GLSL_TYPE_DOUBLE && vector_elements > 2; - } - - /** - * Query whether or not a type is a non-array boolean type - */ - bool is_boolean() const - { - return base_type == GLSL_TYPE_BOOL; - } - - /** - * Query whether or not a type is a sampler - */ - bool is_sampler() const - { - return base_type == GLSL_TYPE_SAMPLER; - } - - /** - * Query whether or not type is a sampler, or for struct and array - * types, contains a sampler. - */ - bool contains_sampler() const; - - /** - * Get the Mesa texture target index for a sampler type. - */ - gl_texture_index sampler_index() const; - - /** - * Query whether or not type is an image, or for struct and array - * types, contains an image. - */ - bool contains_image() const; - - /** - * Query whether or not a type is an image - */ - bool is_image() const - { - return base_type == GLSL_TYPE_IMAGE; - } - - /** - * Query whether or not a type is an array - */ - bool is_array() const - { - return base_type == GLSL_TYPE_ARRAY; - } - - bool is_array_of_arrays() const - { - return is_array() && fields.array->is_array(); - } - - /** - * Query whether or not a type is a record - */ - bool is_record() const - { - return base_type == GLSL_TYPE_STRUCT; - } - - /** - * Query whether or not a type is an interface - */ - bool is_interface() const - { - return base_type == GLSL_TYPE_INTERFACE; - } - - /** - * Query whether or not a type is the void type singleton. - */ - bool is_void() const - { - return base_type == GLSL_TYPE_VOID; - } - - /** - * Query whether or not a type is the error type singleton. - */ - bool is_error() const - { - return base_type == GLSL_TYPE_ERROR; - } - - /** - * Query if a type is unnamed/anonymous (named by the parser) - */ - - bool is_subroutine() const - { - return base_type == GLSL_TYPE_SUBROUTINE; - } - bool contains_subroutine() const; - - bool is_anonymous() const - { - return !strncmp(name, "#anon", 5); - } - - /** - * Get the type stripped of any arrays - * - * \return - * Pointer to the type of elements of the first non-array type for array - * types, or pointer to itself for non-array types. - */ - const glsl_type *without_array() const - { - const glsl_type *t = this; - - while (t->is_array()) - t = t->fields.array; - - return t; - } - - /** - * Return the total number of elements in an array including the elements - * in arrays of arrays. - */ - unsigned arrays_of_arrays_size() const - { - if (!is_array()) - return 0; - - unsigned size = length; - const glsl_type *base_type = fields.array; - - while (base_type->is_array()) { - size = size * base_type->length; - base_type = base_type->fields.array; - } - return size; - } - - /** - * Return the amount of atomic counter storage required for a type. - */ - unsigned atomic_size() const - { - if (base_type == GLSL_TYPE_ATOMIC_UINT) - return ATOMIC_COUNTER_SIZE; - else if (is_array()) - return length * fields.array->atomic_size(); - else - return 0; - } - - /** - * Return whether a type contains any atomic counters. - */ - bool contains_atomic() const - { - return atomic_size() > 0; - } - - /** - * Return whether a type contains any opaque types. - */ - bool contains_opaque() const; - - /** - * Query the full type of a matrix row - * - * \return - * If the type is not a matrix, \c glsl_type::error_type is returned. - * Otherwise a type matching the rows of the matrix is returned. - */ - const glsl_type *row_type() const - { - return is_matrix() - ? get_instance(base_type, matrix_columns, 1) - : error_type; - } - - /** - * Query the full type of a matrix column - * - * \return - * If the type is not a matrix, \c glsl_type::error_type is returned. - * Otherwise a type matching the columns of the matrix is returned. - */ - const glsl_type *column_type() const - { - return is_matrix() - ? get_instance(base_type, vector_elements, 1) - : error_type; - } - - /** - * Get the type of a structure field - * - * \return - * Pointer to the type of the named field. If the type is not a structure - * or the named field does not exist, \c glsl_type::error_type is returned. - */ - const glsl_type *field_type(const char *name) const; - - /** - * Get the location of a field within a record type - */ - int field_index(const char *name) const; - - /** - * Query the number of elements in an array type - * - * \return - * The number of elements in the array for array types or -1 for non-array - * types. If the number of elements in the array has not yet been declared, - * zero is returned. - */ - int array_size() const - { - return is_array() ? length : -1; - } - - /** - * Query whether the array size for all dimensions has been declared. - */ - bool is_unsized_array() const - { - return is_array() && length == 0; - } - - /** - * Return the number of coordinate components needed for this - * sampler or image type. - * - * This is based purely on the sampler's dimensionality. For example, this - * returns 1 for sampler1D, and 3 for sampler2DArray. - * - * Note that this is often different than actual coordinate type used in - * a texturing built-in function, since those pack additional values (such - * as the shadow comparitor or projector) into the coordinate type. - */ - int coordinate_components() const; - - /** - * Compare a record type against another record type. - * - * This is useful for matching record types declared across shader stages. - */ - bool record_compare(const glsl_type *b) const; - -private: - - static mtx_t mutex; - - /** - * ralloc context for all glsl_type allocations - * - * Set on the first call to \c glsl_type::new. - */ - static void *mem_ctx; - - void init_ralloc_type_ctx(void); - - /** Constructor for vector and matrix types */ - glsl_type(GLenum gl_type, - glsl_base_type base_type, unsigned vector_elements, - unsigned matrix_columns, const char *name); - - /** Constructor for sampler or image types */ - glsl_type(GLenum gl_type, glsl_base_type base_type, - enum glsl_sampler_dim dim, bool shadow, bool array, - unsigned type, const char *name); - - /** Constructor for record types */ - glsl_type(const glsl_struct_field *fields, unsigned num_fields, - const char *name); - - /** Constructor for interface types */ - glsl_type(const glsl_struct_field *fields, unsigned num_fields, - enum glsl_interface_packing packing, const char *name); - - /** Constructor for array types */ - glsl_type(const glsl_type *array, unsigned length); - - /** Constructor for subroutine types */ - glsl_type(const char *name); - - /** Hash table containing the known array types. */ - static struct hash_table *array_types; - - /** Hash table containing the known record types. */ - static struct hash_table *record_types; - - /** Hash table containing the known interface types. */ - static struct hash_table *interface_types; - - /** Hash table containing the known subroutine types. */ - static struct hash_table *subroutine_types; - - static bool record_key_compare(const void *a, const void *b); - static unsigned record_key_hash(const void *key); - - /** - * \name Built-in type flyweights - */ - /*@{*/ -#undef DECL_TYPE -#define DECL_TYPE(NAME, ...) static const glsl_type _##NAME##_type; -#undef STRUCT_TYPE -#define STRUCT_TYPE(NAME) static const glsl_type _struct_##NAME##_type; -#include "compiler/builtin_type_macros.h" - /*@}*/ - - /** - * \name Friend functions. - * - * These functions are friends because they must have C linkage and the - * need to call various private methods or access various private static - * data. - */ - /*@{*/ - friend void _mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *); - friend void _mesa_glsl_release_types(void); - /*@}*/ -}; - -struct glsl_struct_field { - const struct glsl_type *type; - const char *name; - - /** - * For interface blocks, gl_varying_slot corresponding to the input/output - * if this is a built-in input/output (i.e. a member of the built-in - * gl_PerVertex interface block); -1 otherwise. - * - * Ignored for structs. - */ - int location; - - /** - * For interface blocks, the interpolation mode (as in - * ir_variable::interpolation). 0 otherwise. - */ - unsigned interpolation:2; - - /** - * For interface blocks, 1 if this variable uses centroid interpolation (as - * in ir_variable::centroid). 0 otherwise. - */ - unsigned centroid:1; - - /** - * For interface blocks, 1 if this variable uses sample interpolation (as - * in ir_variable::sample). 0 otherwise. - */ - unsigned sample:1; - - /** - * Layout of the matrix. Uses glsl_matrix_layout values. - */ - unsigned matrix_layout:2; - - /** - * For interface blocks, 1 if this variable is a per-patch input or output - * (as in ir_variable::patch). 0 otherwise. - */ - unsigned patch:1; - - /** - * Precision qualifier - */ - unsigned precision:2; - - /** - * Image qualifiers, applicable to buffer variables defined in shader - * storage buffer objects (SSBOs) - */ - unsigned image_read_only:1; - unsigned image_write_only:1; - unsigned image_coherent:1; - unsigned image_volatile:1; - unsigned image_restrict:1; - - glsl_struct_field(const struct glsl_type *_type, const char *_name) - : type(_type), name(_name), location(-1), interpolation(0), centroid(0), - sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), - precision(GLSL_PRECISION_NONE) - { - /* empty */ - } - - glsl_struct_field() - { - /* empty */ - } -}; - -static inline unsigned int -glsl_align(unsigned int a, unsigned int align) -{ - return (a + align - 1) / align * align; -} - -#undef DECL_TYPE -#undef STRUCT_TYPE -#endif /* __cplusplus */ - -#endif /* GLSL_TYPES_H */ diff --git a/src/glsl/nir/nir_types.h b/src/glsl/nir/nir_types.h index 64a75f607d5..197978804cc 100644 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@ -29,9 +29,9 @@ #include -/* C wrapper around glsl_types.h */ +/* C wrapper around compiler/glsl_types.h */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #ifdef __cplusplus extern "C" { diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp index c4b87151199..1e58062cb0d 100644 --- a/src/glsl/opt_algebraic.cpp +++ b/src/glsl/opt_algebraic.cpp @@ -33,7 +33,7 @@ #include "ir_rvalue_visitor.h" #include "ir_optimization.h" #include "ir_builder.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" using namespace ir_builder; diff --git a/src/glsl/opt_array_splitting.cpp b/src/glsl/opt_array_splitting.cpp index 89ce76bed2b..cceec6b6431 100644 --- a/src/glsl/opt_array_splitting.cpp +++ b/src/glsl/opt_array_splitting.cpp @@ -36,7 +36,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_rvalue_visitor.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" static bool debug = false; diff --git a/src/glsl/opt_conditional_discard.cpp b/src/glsl/opt_conditional_discard.cpp index 8a3ad24873e..1ca8803f643 100644 --- a/src/glsl/opt_conditional_discard.cpp +++ b/src/glsl/opt_conditional_discard.cpp @@ -33,7 +33,7 @@ * (discard ) */ -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "ir.h" namespace { diff --git a/src/glsl/opt_constant_folding.cpp b/src/glsl/opt_constant_folding.cpp index 4aae3f0ddf2..150a17b2af6 100644 --- a/src/glsl/opt_constant_folding.cpp +++ b/src/glsl/opt_constant_folding.cpp @@ -30,7 +30,7 @@ #include "ir_visitor.h" #include "ir_rvalue_visitor.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp index fb24a4fad04..416ba16a3c5 100644 --- a/src/glsl/opt_constant_propagation.cpp +++ b/src/glsl/opt_constant_propagation.cpp @@ -39,7 +39,7 @@ #include "ir_rvalue_visitor.h" #include "ir_basic_block.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "util/hash_table.h" namespace { diff --git a/src/glsl/opt_constant_variable.cpp b/src/glsl/opt_constant_variable.cpp index 56f6a819e1e..3ddb12904c7 100644 --- a/src/glsl/opt_constant_variable.cpp +++ b/src/glsl/opt_constant_variable.cpp @@ -35,7 +35,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "util/hash_table.h" namespace { diff --git a/src/glsl/opt_copy_propagation.cpp b/src/glsl/opt_copy_propagation.cpp index 5d4cb4fe613..310708db868 100644 --- a/src/glsl/opt_copy_propagation.cpp +++ b/src/glsl/opt_copy_propagation.cpp @@ -36,7 +36,7 @@ #include "ir_visitor.h" #include "ir_basic_block.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/opt_copy_propagation_elements.cpp b/src/glsl/opt_copy_propagation_elements.cpp index 353a5c66841..a6791801943 100644 --- a/src/glsl/opt_copy_propagation_elements.cpp +++ b/src/glsl/opt_copy_propagation_elements.cpp @@ -45,7 +45,7 @@ #include "ir_rvalue_visitor.h" #include "ir_basic_block.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" static bool debug = false; diff --git a/src/glsl/opt_dead_builtin_varyings.cpp b/src/glsl/opt_dead_builtin_varyings.cpp index 53871130e12..37bcbccf0c5 100644 --- a/src/glsl/opt_dead_builtin_varyings.cpp +++ b/src/glsl/opt_dead_builtin_varyings.cpp @@ -51,7 +51,7 @@ #include "ir_rvalue_visitor.h" #include "ir_optimization.h" #include "ir_print_visitor.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "link_varyings.h" namespace { diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp index c2ce0b94ece..dbdb7de8bb8 100644 --- a/src/glsl/opt_dead_code.cpp +++ b/src/glsl/opt_dead_code.cpp @@ -30,7 +30,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_variable_refcount.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "util/hash_table.h" static bool debug = false; diff --git a/src/glsl/opt_dead_code_local.cpp b/src/glsl/opt_dead_code_local.cpp index ee9f22c0373..d38fd2bf638 100644 --- a/src/glsl/opt_dead_code_local.cpp +++ b/src/glsl/opt_dead_code_local.cpp @@ -36,7 +36,7 @@ #include "ir.h" #include "ir_basic_block.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" static bool debug = false; diff --git a/src/glsl/opt_dead_functions.cpp b/src/glsl/opt_dead_functions.cpp index 5dff165215a..2e90b650fa8 100644 --- a/src/glsl/opt_dead_functions.cpp +++ b/src/glsl/opt_dead_functions.cpp @@ -30,7 +30,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_expression_flattening.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/opt_function_inlining.cpp b/src/glsl/opt_function_inlining.cpp index 84a9e4fa093..19f5fae0a17 100644 --- a/src/glsl/opt_function_inlining.cpp +++ b/src/glsl/opt_function_inlining.cpp @@ -31,7 +31,7 @@ #include "ir_visitor.h" #include "ir_function_inlining.h" #include "ir_expression_flattening.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "program/hash_table.h" static void diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp index 23d0b109d8b..29482ee69de 100644 --- a/src/glsl/opt_minmax.cpp +++ b/src/glsl/opt_minmax.cpp @@ -36,7 +36,7 @@ #include "ir_optimization.h" #include "ir_builder.h" #include "program/prog_instruction.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "main/macros.h" using namespace ir_builder; diff --git a/src/glsl/opt_noop_swizzle.cpp b/src/glsl/opt_noop_swizzle.cpp index 586ad5e6107..41890ab2b15 100644 --- a/src/glsl/opt_noop_swizzle.cpp +++ b/src/glsl/opt_noop_swizzle.cpp @@ -32,7 +32,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_rvalue_visitor.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/opt_structure_splitting.cpp b/src/glsl/opt_structure_splitting.cpp index abf4310feb3..0d18a2f7584 100644 --- a/src/glsl/opt_structure_splitting.cpp +++ b/src/glsl/opt_structure_splitting.cpp @@ -35,7 +35,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_rvalue_visitor.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/opt_swizzle_swizzle.cpp b/src/glsl/opt_swizzle_swizzle.cpp index 7564c6b3b10..7285474b089 100644 --- a/src/glsl/opt_swizzle_swizzle.cpp +++ b/src/glsl/opt_swizzle_swizzle.cpp @@ -30,7 +30,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp index cd58213c019..83effb7424c 100644 --- a/src/glsl/opt_tree_grafting.cpp +++ b/src/glsl/opt_tree_grafting.cpp @@ -52,7 +52,7 @@ #include "ir_variable_refcount.h" #include "ir_basic_block.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" namespace { diff --git a/src/glsl/opt_vectorize.cpp b/src/glsl/opt_vectorize.cpp index 2f71a83583b..88318cd8a6e 100644 --- a/src/glsl/opt_vectorize.cpp +++ b/src/glsl/opt_vectorize.cpp @@ -55,7 +55,7 @@ #include "ir.h" #include "ir_visitor.h" #include "ir_optimization.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "program/prog_instruction.h" namespace { diff --git a/src/mesa/SConscript b/src/mesa/SConscript index c986326d2bf..5b80a216fef 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -16,7 +16,6 @@ env.Append(CPPPATH = [ '#/src', '#/src/mapi', '#/src/glsl', - '#/src/glsl/nir', '#/src/mesa', '#/src/gallium/include', '#/src/gallium/auxiliary', diff --git a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp index 33d2048e657..3d6aa74d830 100644 --- a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp +++ b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp @@ -30,7 +30,7 @@ * \author Eric Anholt */ -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" #include "glsl/ir.h" #include "program/prog_instruction.h" /* For WRITEMASK_* */ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index cbeab6f616c..41a3f81b9d8 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -38,7 +38,7 @@ #include "brw_cfg.h" #include "brw_program.h" #include "brw_dead_control_flow.h" -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" using namespace brw; diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 21f0b703d00..22877797522 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -43,7 +43,7 @@ #include "glsl/ir.h" #include "glsl/ir_expression_flattening.h" -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" class ir_channel_expressions_visitor : public ir_hierarchical_visitor { public: diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 0c8ad76aa50..0970959bbd6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -40,7 +40,7 @@ #include "main/imports.h" #include "glsl/ir.h" #include "glsl/ir_rvalue_visitor.h" -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" #include "util/hash_table.h" static bool debug = false; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 25240ad65fa..aad512f4be6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -28,7 +28,7 @@ * in the GLSL IR or in the native code. */ #include "brw_fs.h" -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" using namespace brw; diff --git a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp index 4219d471def..eafbdf68ea0 100644 --- a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp +++ b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp @@ -31,7 +31,7 @@ * \author Chris Forbes */ -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" #include "glsl/ir.h" #include "glsl/ir_builder.h" diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index e63d0f1ec55..b2b1ee9ba44 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -40,7 +40,7 @@ #include "glsl/ir_optimization.h" #include "glsl/glsl_parser_extras.h" #include "glsl/glsl_symbol_table.h" -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" #include "program/ir_to_mesa.h" #include "program/program.h" #include "program/programopt.h" diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 47f80ce2001..47010badbc6 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -44,7 +44,7 @@ #include "main/uniforms.h" #include "main/enums.h" #include "ir_uniform.h" -#include "glsl_types.h" +#include "compiler/glsl_types.h" #include "program/program.h" /** diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index 2f88b65043d..baf49a752b1 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -27,7 +27,7 @@ #define UNIFORMS_H #include "main/glheader.h" -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" #include "glsl/ir_uniform.h" #include "program/prog_parameter.h" diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 9cde28dfc0a..70c477ec4fd 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -42,7 +42,7 @@ #include "glsl/ir_optimization.h" #include "glsl/ir_uniform.h" #include "glsl/glsl_parser_extras.h" -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" #include "glsl/linker.h" #include "glsl/program.h" #include "program/hash_table.h" diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index 84e2504baba..9137d1858e9 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -24,7 +24,7 @@ */ #include "main/mtypes.h" -#include "glsl/nir/glsl_types.h" +#include "compiler/glsl_types.h" #include "glsl/ir.h" #include "glsl/ir_uniform.h" #include "glsl/ir_visitor.h" -- cgit v1.2.3 From a39a8fbbaa129f4e52f2a3ad2747182e9a74d910 Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 18 Jan 2016 12:54:03 +0200 Subject: nir: move to compiler/ Signed-off-by: Emil Velikov Acked-by: Matt Turner Acked-by: Jose Fonseca --- src/compiler/Android.gen.mk | 91 + src/compiler/Android.mk | 23 + src/compiler/Makefile.am | 78 + src/compiler/Makefile.sources | 71 + src/compiler/nir/.gitignore | 5 + src/compiler/nir/README | 118 ++ src/compiler/nir/glsl_to_nir.cpp | 2031 +++++++++++++++++++ src/compiler/nir/glsl_to_nir.h | 41 + src/compiler/nir/nir.c | 1665 +++++++++++++++ src/compiler/nir/nir.h | 2111 ++++++++++++++++++++ src/compiler/nir/nir_algebraic.py | 305 +++ src/compiler/nir/nir_array.h | 96 + src/compiler/nir/nir_builder.h | 364 ++++ src/compiler/nir/nir_builder_opcodes_h.py | 38 + src/compiler/nir/nir_clone.c | 659 ++++++ src/compiler/nir/nir_constant_expressions.h | 31 + src/compiler/nir/nir_constant_expressions.py | 336 ++++ src/compiler/nir/nir_control_flow.c | 808 ++++++++ src/compiler/nir/nir_control_flow.h | 162 ++ src/compiler/nir/nir_control_flow_private.h | 37 + src/compiler/nir/nir_dominance.c | 350 ++++ src/compiler/nir/nir_from_ssa.c | 805 ++++++++ src/compiler/nir/nir_gs_count_vertices.c | 93 + src/compiler/nir/nir_instr_set.c | 519 +++++ src/compiler/nir/nir_instr_set.h | 62 + src/compiler/nir/nir_intrinsics.c | 49 + src/compiler/nir/nir_intrinsics.h | 316 +++ src/compiler/nir/nir_liveness.c | 297 +++ src/compiler/nir/nir_lower_alu_to_scalar.c | 210 ++ src/compiler/nir/nir_lower_atomics.c | 166 ++ src/compiler/nir/nir_lower_clip.c | 339 ++++ src/compiler/nir/nir_lower_global_vars_to_local.c | 113 ++ src/compiler/nir/nir_lower_gs_intrinsics.c | 219 ++ src/compiler/nir/nir_lower_idiv.c | 151 ++ src/compiler/nir/nir_lower_io.c | 350 ++++ src/compiler/nir/nir_lower_load_const_to_scalar.c | 89 + src/compiler/nir/nir_lower_locals_to_regs.c | 396 ++++ .../nir/nir_lower_outputs_to_temporaries.c | 133 ++ src/compiler/nir/nir_lower_phis_to_scalar.c | 293 +++ src/compiler/nir/nir_lower_samplers.c | 187 ++ src/compiler/nir/nir_lower_system_values.c | 98 + src/compiler/nir/nir_lower_tex.c | 355 ++++ src/compiler/nir/nir_lower_to_source_mods.c | 196 ++ src/compiler/nir/nir_lower_two_sided_color.c | 212 ++ src/compiler/nir/nir_lower_var_copies.c | 190 ++ src/compiler/nir/nir_lower_vars_to_ssa.c | 973 +++++++++ src/compiler/nir/nir_lower_vec_to_movs.c | 310 +++ src/compiler/nir/nir_metadata.c | 90 + src/compiler/nir/nir_move_vec_src_uses_to_dest.c | 197 ++ src/compiler/nir/nir_normalize_cubemap_coords.c | 120 ++ src/compiler/nir/nir_opcodes.py | 668 +++++++ src/compiler/nir/nir_opcodes_c.py | 55 + src/compiler/nir/nir_opcodes_h.py | 47 + src/compiler/nir/nir_opt_algebraic.py | 285 +++ src/compiler/nir/nir_opt_constant_folding.c | 201 ++ src/compiler/nir/nir_opt_copy_propagate.c | 290 +++ src/compiler/nir/nir_opt_cse.c | 93 + src/compiler/nir/nir_opt_dce.c | 183 ++ src/compiler/nir/nir_opt_dead_cf.c | 358 ++++ src/compiler/nir/nir_opt_gcm.c | 494 +++++ src/compiler/nir/nir_opt_global_to_local.c | 102 + src/compiler/nir/nir_opt_peephole_select.c | 256 +++ src/compiler/nir/nir_opt_remove_phis.c | 130 ++ src/compiler/nir/nir_opt_undef.c | 104 + src/compiler/nir/nir_print.c | 1069 ++++++++++ src/compiler/nir/nir_remove_dead_variables.c | 141 ++ src/compiler/nir/nir_search.c | 379 ++++ src/compiler/nir/nir_search.h | 99 + src/compiler/nir/nir_split_var_copies.c | 285 +++ src/compiler/nir/nir_sweep.c | 173 ++ src/compiler/nir/nir_to_ssa.c | 536 +++++ src/compiler/nir/nir_validate.c | 1071 ++++++++++ src/compiler/nir/nir_vla.h | 54 + src/compiler/nir/nir_worklist.c | 144 ++ src/compiler/nir/nir_worklist.h | 91 + src/compiler/nir/tests/control_flow_tests.cpp | 148 ++ src/gallium/auxiliary/Makefile.am | 2 +- src/gallium/auxiliary/nir/tgsi_to_nir.c | 8 +- src/gallium/auxiliary/nir/tgsi_to_nir.h | 2 +- src/gallium/drivers/freedreno/Makefile.am | 4 +- src/gallium/drivers/freedreno/ir3/ir3_nir.h | 2 +- .../drivers/freedreno/ir3/ir3_nir_lower_if_else.c | 4 +- src/gallium/drivers/vc4/Makefile.am | 2 +- src/gallium/drivers/vc4/vc4_nir_lower_blend.c | 2 +- src/gallium/drivers/vc4/vc4_nir_lower_io.c | 2 +- src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c | 2 +- src/gallium/drivers/vc4/vc4_program.c | 4 +- src/gallium/drivers/vc4/vc4_qir.h | 2 +- src/gallium/targets/pipe-loader/Makefile.am | 2 +- src/gallium/targets/xa/Makefile.am | 2 +- src/glsl/Android.gen.mk | 54 - src/glsl/Makefile.am | 62 +- src/glsl/nir/.gitignore | 5 - src/glsl/nir/README | 118 -- src/glsl/nir/glsl_to_nir.cpp | 2031 ------------------- src/glsl/nir/glsl_to_nir.h | 41 - src/glsl/nir/nir.c | 1665 --------------- src/glsl/nir/nir.h | 2111 -------------------- src/glsl/nir/nir_algebraic.py | 305 --- src/glsl/nir/nir_array.h | 96 - src/glsl/nir/nir_builder.h | 364 ---- src/glsl/nir/nir_builder_opcodes_h.py | 38 - src/glsl/nir/nir_clone.c | 659 ------ src/glsl/nir/nir_constant_expressions.h | 31 - src/glsl/nir/nir_constant_expressions.py | 336 ---- src/glsl/nir/nir_control_flow.c | 808 -------- src/glsl/nir/nir_control_flow.h | 162 -- src/glsl/nir/nir_control_flow_private.h | 37 - src/glsl/nir/nir_dominance.c | 350 ---- src/glsl/nir/nir_from_ssa.c | 805 -------- src/glsl/nir/nir_gs_count_vertices.c | 93 - src/glsl/nir/nir_instr_set.c | 519 ----- src/glsl/nir/nir_instr_set.h | 62 - src/glsl/nir/nir_intrinsics.c | 49 - src/glsl/nir/nir_intrinsics.h | 316 --- src/glsl/nir/nir_liveness.c | 297 --- src/glsl/nir/nir_lower_alu_to_scalar.c | 210 -- src/glsl/nir/nir_lower_atomics.c | 166 -- src/glsl/nir/nir_lower_clip.c | 339 ---- src/glsl/nir/nir_lower_global_vars_to_local.c | 113 -- src/glsl/nir/nir_lower_gs_intrinsics.c | 219 -- src/glsl/nir/nir_lower_idiv.c | 151 -- src/glsl/nir/nir_lower_io.c | 350 ---- src/glsl/nir/nir_lower_load_const_to_scalar.c | 89 - src/glsl/nir/nir_lower_locals_to_regs.c | 396 ---- src/glsl/nir/nir_lower_outputs_to_temporaries.c | 133 -- src/glsl/nir/nir_lower_phis_to_scalar.c | 293 --- src/glsl/nir/nir_lower_samplers.c | 187 -- src/glsl/nir/nir_lower_system_values.c | 98 - src/glsl/nir/nir_lower_tex.c | 355 ---- src/glsl/nir/nir_lower_to_source_mods.c | 196 -- src/glsl/nir/nir_lower_two_sided_color.c | 212 -- src/glsl/nir/nir_lower_var_copies.c | 190 -- src/glsl/nir/nir_lower_vars_to_ssa.c | 973 --------- src/glsl/nir/nir_lower_vec_to_movs.c | 310 --- src/glsl/nir/nir_metadata.c | 90 - src/glsl/nir/nir_move_vec_src_uses_to_dest.c | 197 -- src/glsl/nir/nir_normalize_cubemap_coords.c | 120 -- src/glsl/nir/nir_opcodes.py | 668 ------- src/glsl/nir/nir_opcodes_c.py | 55 - src/glsl/nir/nir_opcodes_h.py | 47 - src/glsl/nir/nir_opt_algebraic.py | 285 --- src/glsl/nir/nir_opt_constant_folding.c | 201 -- src/glsl/nir/nir_opt_copy_propagate.c | 290 --- src/glsl/nir/nir_opt_cse.c | 93 - src/glsl/nir/nir_opt_dce.c | 183 -- src/glsl/nir/nir_opt_dead_cf.c | 358 ---- src/glsl/nir/nir_opt_gcm.c | 494 ----- src/glsl/nir/nir_opt_global_to_local.c | 102 - src/glsl/nir/nir_opt_peephole_select.c | 256 --- src/glsl/nir/nir_opt_remove_phis.c | 130 -- src/glsl/nir/nir_opt_undef.c | 104 - src/glsl/nir/nir_print.c | 1069 ---------- src/glsl/nir/nir_remove_dead_variables.c | 141 -- src/glsl/nir/nir_search.c | 379 ---- src/glsl/nir/nir_search.h | 99 - src/glsl/nir/nir_split_var_copies.c | 285 --- src/glsl/nir/nir_sweep.c | 173 -- src/glsl/nir/nir_to_ssa.c | 536 ----- src/glsl/nir/nir_validate.c | 1071 ---------- src/glsl/nir/nir_vla.h | 54 - src/glsl/nir/nir_worklist.c | 144 -- src/glsl/nir/nir_worklist.h | 91 - src/glsl/nir/tests/control_flow_tests.cpp | 148 -- src/mesa/Android.libmesa_dricore.mk | 2 +- src/mesa/Android.libmesa_glsl_utils.mk | 4 +- src/mesa/Android.libmesa_st_mesa.mk | 2 +- src/mesa/Makefile.sources | 3 +- src/mesa/drivers/dri/i965/Makefile.am | 3 +- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- src/mesa/drivers/dri/i965/brw_nir.c | 4 +- src/mesa/drivers/dri/i965/brw_nir.h | 2 +- src/mesa/drivers/dri/i965/brw_vec4.h | 2 +- src/mesa/drivers/dri/i965/intel_asm_annotation.c | 2 +- src/mesa/drivers/dri/i965/intel_screen.c | 2 +- src/mesa/program/Android.mk | 2 +- src/mesa/program/prog_to_nir.c | 4 +- 177 files changed, 24444 insertions(+), 24291 deletions(-) create mode 100644 src/compiler/Android.gen.mk create mode 100644 src/compiler/nir/.gitignore create mode 100644 src/compiler/nir/README create mode 100644 src/compiler/nir/glsl_to_nir.cpp create mode 100644 src/compiler/nir/glsl_to_nir.h create mode 100644 src/compiler/nir/nir.c create mode 100644 src/compiler/nir/nir.h create mode 100644 src/compiler/nir/nir_algebraic.py create mode 100644 src/compiler/nir/nir_array.h create mode 100644 src/compiler/nir/nir_builder.h create mode 100644 src/compiler/nir/nir_builder_opcodes_h.py create mode 100644 src/compiler/nir/nir_clone.c create mode 100644 src/compiler/nir/nir_constant_expressions.h create mode 100644 src/compiler/nir/nir_constant_expressions.py create mode 100644 src/compiler/nir/nir_control_flow.c create mode 100644 src/compiler/nir/nir_control_flow.h create mode 100644 src/compiler/nir/nir_control_flow_private.h create mode 100644 src/compiler/nir/nir_dominance.c create mode 100644 src/compiler/nir/nir_from_ssa.c create mode 100644 src/compiler/nir/nir_gs_count_vertices.c create mode 100644 src/compiler/nir/nir_instr_set.c create mode 100644 src/compiler/nir/nir_instr_set.h create mode 100644 src/compiler/nir/nir_intrinsics.c create mode 100644 src/compiler/nir/nir_intrinsics.h create mode 100644 src/compiler/nir/nir_liveness.c create mode 100644 src/compiler/nir/nir_lower_alu_to_scalar.c create mode 100644 src/compiler/nir/nir_lower_atomics.c create mode 100644 src/compiler/nir/nir_lower_clip.c create mode 100644 src/compiler/nir/nir_lower_global_vars_to_local.c create mode 100644 src/compiler/nir/nir_lower_gs_intrinsics.c create mode 100644 src/compiler/nir/nir_lower_idiv.c create mode 100644 src/compiler/nir/nir_lower_io.c create mode 100644 src/compiler/nir/nir_lower_load_const_to_scalar.c create mode 100644 src/compiler/nir/nir_lower_locals_to_regs.c create mode 100644 src/compiler/nir/nir_lower_outputs_to_temporaries.c create mode 100644 src/compiler/nir/nir_lower_phis_to_scalar.c create mode 100644 src/compiler/nir/nir_lower_samplers.c create mode 100644 src/compiler/nir/nir_lower_system_values.c create mode 100644 src/compiler/nir/nir_lower_tex.c create mode 100644 src/compiler/nir/nir_lower_to_source_mods.c create mode 100644 src/compiler/nir/nir_lower_two_sided_color.c create mode 100644 src/compiler/nir/nir_lower_var_copies.c create mode 100644 src/compiler/nir/nir_lower_vars_to_ssa.c create mode 100644 src/compiler/nir/nir_lower_vec_to_movs.c create mode 100644 src/compiler/nir/nir_metadata.c create mode 100644 src/compiler/nir/nir_move_vec_src_uses_to_dest.c create mode 100644 src/compiler/nir/nir_normalize_cubemap_coords.c create mode 100644 src/compiler/nir/nir_opcodes.py create mode 100644 src/compiler/nir/nir_opcodes_c.py create mode 100644 src/compiler/nir/nir_opcodes_h.py create mode 100644 src/compiler/nir/nir_opt_algebraic.py create mode 100644 src/compiler/nir/nir_opt_constant_folding.c create mode 100644 src/compiler/nir/nir_opt_copy_propagate.c create mode 100644 src/compiler/nir/nir_opt_cse.c create mode 100644 src/compiler/nir/nir_opt_dce.c create mode 100644 src/compiler/nir/nir_opt_dead_cf.c create mode 100644 src/compiler/nir/nir_opt_gcm.c create mode 100644 src/compiler/nir/nir_opt_global_to_local.c create mode 100644 src/compiler/nir/nir_opt_peephole_select.c create mode 100644 src/compiler/nir/nir_opt_remove_phis.c create mode 100644 src/compiler/nir/nir_opt_undef.c create mode 100644 src/compiler/nir/nir_print.c create mode 100644 src/compiler/nir/nir_remove_dead_variables.c create mode 100644 src/compiler/nir/nir_search.c create mode 100644 src/compiler/nir/nir_search.h create mode 100644 src/compiler/nir/nir_split_var_copies.c create mode 100644 src/compiler/nir/nir_sweep.c create mode 100644 src/compiler/nir/nir_to_ssa.c create mode 100644 src/compiler/nir/nir_validate.c create mode 100644 src/compiler/nir/nir_vla.h create mode 100644 src/compiler/nir/nir_worklist.c create mode 100644 src/compiler/nir/nir_worklist.h create mode 100644 src/compiler/nir/tests/control_flow_tests.cpp delete mode 100644 src/glsl/nir/.gitignore delete mode 100644 src/glsl/nir/README delete mode 100644 src/glsl/nir/glsl_to_nir.cpp delete mode 100644 src/glsl/nir/glsl_to_nir.h delete mode 100644 src/glsl/nir/nir.c delete mode 100644 src/glsl/nir/nir.h delete mode 100644 src/glsl/nir/nir_algebraic.py delete mode 100644 src/glsl/nir/nir_array.h delete mode 100644 src/glsl/nir/nir_builder.h delete mode 100644 src/glsl/nir/nir_builder_opcodes_h.py delete mode 100644 src/glsl/nir/nir_clone.c delete mode 100644 src/glsl/nir/nir_constant_expressions.h delete mode 100644 src/glsl/nir/nir_constant_expressions.py delete mode 100644 src/glsl/nir/nir_control_flow.c delete mode 100644 src/glsl/nir/nir_control_flow.h delete mode 100644 src/glsl/nir/nir_control_flow_private.h delete mode 100644 src/glsl/nir/nir_dominance.c delete mode 100644 src/glsl/nir/nir_from_ssa.c delete mode 100644 src/glsl/nir/nir_gs_count_vertices.c delete mode 100644 src/glsl/nir/nir_instr_set.c delete mode 100644 src/glsl/nir/nir_instr_set.h delete mode 100644 src/glsl/nir/nir_intrinsics.c delete mode 100644 src/glsl/nir/nir_intrinsics.h delete mode 100644 src/glsl/nir/nir_liveness.c delete mode 100644 src/glsl/nir/nir_lower_alu_to_scalar.c delete mode 100644 src/glsl/nir/nir_lower_atomics.c delete mode 100644 src/glsl/nir/nir_lower_clip.c delete mode 100644 src/glsl/nir/nir_lower_global_vars_to_local.c delete mode 100644 src/glsl/nir/nir_lower_gs_intrinsics.c delete mode 100644 src/glsl/nir/nir_lower_idiv.c delete mode 100644 src/glsl/nir/nir_lower_io.c delete mode 100644 src/glsl/nir/nir_lower_load_const_to_scalar.c delete mode 100644 src/glsl/nir/nir_lower_locals_to_regs.c delete mode 100644 src/glsl/nir/nir_lower_outputs_to_temporaries.c delete mode 100644 src/glsl/nir/nir_lower_phis_to_scalar.c delete mode 100644 src/glsl/nir/nir_lower_samplers.c delete mode 100644 src/glsl/nir/nir_lower_system_values.c delete mode 100644 src/glsl/nir/nir_lower_tex.c delete mode 100644 src/glsl/nir/nir_lower_to_source_mods.c delete mode 100644 src/glsl/nir/nir_lower_two_sided_color.c delete mode 100644 src/glsl/nir/nir_lower_var_copies.c delete mode 100644 src/glsl/nir/nir_lower_vars_to_ssa.c delete mode 100644 src/glsl/nir/nir_lower_vec_to_movs.c delete mode 100644 src/glsl/nir/nir_metadata.c delete mode 100644 src/glsl/nir/nir_move_vec_src_uses_to_dest.c delete mode 100644 src/glsl/nir/nir_normalize_cubemap_coords.c delete mode 100644 src/glsl/nir/nir_opcodes.py delete mode 100644 src/glsl/nir/nir_opcodes_c.py delete mode 100644 src/glsl/nir/nir_opcodes_h.py delete mode 100644 src/glsl/nir/nir_opt_algebraic.py delete mode 100644 src/glsl/nir/nir_opt_constant_folding.c delete mode 100644 src/glsl/nir/nir_opt_copy_propagate.c delete mode 100644 src/glsl/nir/nir_opt_cse.c delete mode 100644 src/glsl/nir/nir_opt_dce.c delete mode 100644 src/glsl/nir/nir_opt_dead_cf.c delete mode 100644 src/glsl/nir/nir_opt_gcm.c delete mode 100644 src/glsl/nir/nir_opt_global_to_local.c delete mode 100644 src/glsl/nir/nir_opt_peephole_select.c delete mode 100644 src/glsl/nir/nir_opt_remove_phis.c delete mode 100644 src/glsl/nir/nir_opt_undef.c delete mode 100644 src/glsl/nir/nir_print.c delete mode 100644 src/glsl/nir/nir_remove_dead_variables.c delete mode 100644 src/glsl/nir/nir_search.c delete mode 100644 src/glsl/nir/nir_search.h delete mode 100644 src/glsl/nir/nir_split_var_copies.c delete mode 100644 src/glsl/nir/nir_sweep.c delete mode 100644 src/glsl/nir/nir_to_ssa.c delete mode 100644 src/glsl/nir/nir_validate.c delete mode 100644 src/glsl/nir/nir_vla.h delete mode 100644 src/glsl/nir/nir_worklist.c delete mode 100644 src/glsl/nir/nir_worklist.h delete mode 100644 src/glsl/nir/tests/control_flow_tests.cpp (limited to 'src/mesa') diff --git a/src/compiler/Android.gen.mk b/src/compiler/Android.gen.mk new file mode 100644 index 00000000000..fcd5f94d459 --- /dev/null +++ b/src/compiler/Android.gen.mk @@ -0,0 +1,91 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# included by glsl Android.mk for source generation + +ifeq ($(LOCAL_MODULE_CLASS),) +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +endif + +intermediates := $(call local-generated-sources-dir) + +LOCAL_SRC_FILES := $(LOCAL_SRC_FILES) + +LOCAL_C_INCLUDES += \ + $(intermediates)/nir \ + $(MESA_TOP)/src/compiler/nir + +LOCAL_EXPORT_C_INCLUDE_DIRS += \ + $(intermediates)/nir \ + $(MESA_TOP)/src/compiler/nir + +LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ + $(NIR_GENERATED_FILES)) + + +nir_builder_opcodes_gen := $(LOCAL_PATH)/nir/nir_builder_opcodes_h.py +nir_builder_opcodes_deps := \ + $(LOCAL_PATH)/nir/nir_opcodes.py \ + $(LOCAL_PATH)/nir/nir_builder_opcodes_h.py + +$(intermediates)/nir/nir_builder_opcodes.h: $(nir_builder_opcodes_deps) + @mkdir -p $(dir $@) + $(hide) $(MESA_PYTHON2) $(nir_builder_opcodes_gen) $< > $@ + +nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py +nir_constant_expressions_deps := \ + $(LOCAL_PATH)/nir/nir_opcodes.py \ + $(LOCAL_PATH)/nir/nir_constant_expressions.py + +$(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps) + @mkdir -p $(dir $@) + $(hide) $(MESA_PYTHON2) $(nir_constant_expressions_gen) $< > $@ + +nir_opcodes_h_gen := $(LOCAL_PATH)/nir/nir_opcodes_h.py +nir_opcodes_h_deps := \ + $(LOCAL_PATH)/nir/nir_opcodes.py \ + $(LOCAL_PATH)/nir/nir_opcodes_h.py + +$(intermediates)/nir/nir_opcodes.h: $(nir_opcodes_h_deps) + @mkdir -p $(dir $@) + $(hide) $(MESA_PYTHON2) $(nir_opcodes_h_gen) $< > $@ + +$(LOCAL_PATH)/nir/nir.h: $(intermediates)/nir/nir_opcodes.h + +nir_opcodes_c_gen := $(LOCAL_PATH)/nir/nir_opcodes_c.py +nir_opcodes_c_deps := \ + $(LOCAL_PATH)/nir/nir_opcodes.py \ + $(LOCAL_PATH)/nir/nir_opcodes_c.py + +$(intermediates)/nir/nir_opcodes.c: $(nir_opcodes_c_deps) + @mkdir -p $(dir $@) + $(hide) $(MESA_PYTHON2) $(nir_opcodes_c_gen) $< > $@ + +nir_opt_algebraic_gen := $(LOCAL_PATH)/nir/nir_opt_algebraic.py +nir_opt_algebraic_deps := \ + $(LOCAL_PATH)/nir/nir_opt_algebraic.py \ + $(LOCAL_PATH)/nir/nir_algebraic.py + +$(intermediates)/nir/nir_opt_algebraic.c: $(nir_opt_algebraic_deps) + @mkdir -p $(dir $@) + $(hide) $(MESA_PYTHON2) $(nir_opt_algebraic_gen) $< > $@ diff --git a/src/compiler/Android.mk b/src/compiler/Android.mk index 8ebd49778ef..888780ba3fb 100644 --- a/src/compiler/Android.mk +++ b/src/compiler/Android.mk @@ -42,3 +42,26 @@ LOCAL_MODULE := libmesa_compiler include $(MESA_COMMON_MK) include $(BUILD_STATIC_LIBRARY) + +# --------------------------------------- +# Build libmesa_nir +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(NIR_FILES) + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary + +LOCAL_STATIC_LIBRARIES := libmesa_compiler + +LOCAL_MODULE := libmesa_nir + +include $(LOCAL_PATH)/Android.gen.mk +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am index fd1dd4b6d30..1e3778df8d5 100644 --- a/src/compiler/Makefile.am +++ b/src/compiler/Makefile.am @@ -29,6 +29,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/mesa/ \ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/gtest/include \ $(DEFINES) AM_CFLAGS = \ @@ -43,4 +44,81 @@ noinst_LTLIBRARIES = libcompiler.la libcompiler_la_SOURCES = $(LIBCOMPILER_FILES) +check_PROGRAMS = +TESTS = +BUILT_SOURCES = +CLEANFILES = EXTRA_DIST = SConscript + + +noinst_LTLIBRARIES += nir/libnir.la + +nir_libnir_la_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_srcdir)/src/compiler/nir + +nir_libnir_la_LIBADD = \ + libcompiler.la + +nir_libnir_la_SOURCES = \ + $(NIR_FILES) \ + $(NIR_GENERATED_FILES) + +PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) + +nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ || ($(RM) $@; false) + +nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@ || ($(RM) $@; false) + +nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@ || ($(RM) $@; false) + +nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@ || ($(RM) $@; false) + +nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py + $(MKDIR_GEN) + $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false) + + +check_PROGRAMS += nir/tests/control_flow_tests + +nir_tests_control_flow_tests_CPPFLAGS = \ + $(AM_CPPFLAGS) \ + -I$(top_builddir)/src/compiler/nir \ + -I$(top_srcdir)/src/compiler/nir + +nir_tests_control_flow_tests_SOURCES = \ + nir/tests/control_flow_tests.cpp +nir_tests_control_flow_tests_CFLAGS = \ + $(PTHREAD_CFLAGS) +nir_tests_control_flow_tests_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(PTHREAD_LIBS) + + +TESTS += nir/tests/control_flow_tests + + +BUILT_SOURCES += $(NIR_GENERATED_FILES) +CLEANFILES += $(NIR_GENERATED_FILES) + +EXTRA_DIST += \ + nir/nir_algebraic.py \ + nir/nir_builder_opcodes_h.py \ + nir/nir_constant_expressions.py \ + nir/nir_opcodes.py \ + nir/nir_opcodes_c.py \ + nir/nir_opcodes_h.py \ + nir/nir_opt_algebraic.py \ + nir/tests \ + nir/Makefile.sources diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index e1228cafa21..fe12e419afb 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -6,3 +6,74 @@ LIBCOMPILER_FILES = \ nir_types.h \ shader_enums.c \ shader_enums.h + +NIR_GENERATED_FILES = \ + nir/nir_builder_opcodes.h \ + nir/nir_constant_expressions.c \ + nir/nir_opcodes.c \ + nir/nir_opcodes.h \ + nir/nir_opt_algebraic.c + +NIR_FILES = \ + nir/glsl_to_nir.cpp \ + nir/glsl_to_nir.h \ + nir/nir.c \ + nir/nir.h \ + nir/nir_array.h \ + nir/nir_builder.h \ + nir/nir_clone.c \ + nir/nir_constant_expressions.h \ + nir/nir_control_flow.c \ + nir/nir_control_flow.h \ + nir/nir_control_flow_private.h \ + nir/nir_dominance.c \ + nir/nir_from_ssa.c \ + nir/nir_gs_count_vertices.c \ + nir/nir_intrinsics.c \ + nir/nir_intrinsics.h \ + nir/nir_instr_set.c \ + nir/nir_instr_set.h \ + nir/nir_liveness.c \ + nir/nir_lower_alu_to_scalar.c \ + nir/nir_lower_atomics.c \ + nir/nir_lower_clip.c \ + nir/nir_lower_global_vars_to_local.c \ + nir/nir_lower_gs_intrinsics.c \ + nir/nir_lower_load_const_to_scalar.c \ + nir/nir_lower_locals_to_regs.c \ + nir/nir_lower_idiv.c \ + nir/nir_lower_io.c \ + nir/nir_lower_outputs_to_temporaries.c \ + nir/nir_lower_phis_to_scalar.c \ + nir/nir_lower_samplers.c \ + nir/nir_lower_system_values.c \ + nir/nir_lower_tex.c \ + nir/nir_lower_to_source_mods.c \ + nir/nir_lower_two_sided_color.c \ + nir/nir_lower_vars_to_ssa.c \ + nir/nir_lower_var_copies.c \ + nir/nir_lower_vec_to_movs.c \ + nir/nir_metadata.c \ + nir/nir_move_vec_src_uses_to_dest.c \ + nir/nir_normalize_cubemap_coords.c \ + nir/nir_opt_constant_folding.c \ + nir/nir_opt_copy_propagate.c \ + nir/nir_opt_cse.c \ + nir/nir_opt_dce.c \ + nir/nir_opt_dead_cf.c \ + nir/nir_opt_gcm.c \ + nir/nir_opt_global_to_local.c \ + nir/nir_opt_peephole_select.c \ + nir/nir_opt_remove_phis.c \ + nir/nir_opt_undef.c \ + nir/nir_print.c \ + nir/nir_remove_dead_variables.c \ + nir/nir_search.c \ + nir/nir_search.h \ + nir/nir_split_var_copies.c \ + nir/nir_sweep.c \ + nir/nir_to_ssa.c \ + nir/nir_validate.c \ + nir/nir_vla.h \ + nir/nir_worklist.c \ + nir/nir_worklist.h diff --git a/src/compiler/nir/.gitignore b/src/compiler/nir/.gitignore new file mode 100644 index 00000000000..64828eba6d3 --- /dev/null +++ b/src/compiler/nir/.gitignore @@ -0,0 +1,5 @@ +nir_builder_opcodes.h +nir_opt_algebraic.c +nir_opcodes.c +nir_opcodes.h +nir_constant_expressions.c diff --git a/src/compiler/nir/README b/src/compiler/nir/README new file mode 100644 index 00000000000..2c81db9db61 --- /dev/null +++ b/src/compiler/nir/README @@ -0,0 +1,118 @@ +New IR, or NIR, is an IR for Mesa intended to sit below GLSL IR and Mesa IR. +Its design inherits from the various IR's that Mesa has used in the past, as +well as Direct3D assembly, and it includes a few new ideas as well. It is a +flat (in terms of using instructions instead of expressions), typeless IR, +similar to TGSI and Mesa IR. It also supports SSA (although it doesn't require +it). + +Variables +========= + +NIR includes support for source-level GLSL variables through a structure mostly +copied from GLSL IR. These will be used for linking and conversion from GLSL IR +(and later, from an AST), but for the most part, they will be lowered to +registers (see below) and loads/stores. + +Registers +========= + +Registers are light-weight; they consist of a structure that only contains its +size, its index for liveness analysis, and an optional name for debugging. In +addition, registers can be local to a function or global to the entire shader; +the latter will be used in ARB_shader_subroutine for passing parameters and +getting return values from subroutines. Registers can also be an array, in which +case they can be accessed indirectly. Each ALU instruction (add, subtract, etc.) +works directly with registers or SSA values (see below). + +SSA +======== + +Everywhere a register can be loaded/stored, an SSA value can be used instead. +The only exception is that arrays/indirect addressing are not supported with +SSA; although research has been done on extensions of SSA to arrays before, it's +usually for the purpose of parallelization (which we're not interested in), and +adds some overhead in the form of adding copies or extra arrays (which is much +more expensive than introducing copies between non-array registers). SSA uses +point directly to their corresponding definition, which in turn points to the +instruction it is part of. This creates an implicit use-def chain and avoids the +need for an external structure for each SSA register. + +Functions +========= + +Support for function calls is mostly similar to GLSL IR. Each shader contains a +list of functions, and each function has a list of overloads. Each overload +contains a list of parameters, and may contain an implementation which specifies +the variables that correspond to the parameters and return value. Inlining a +function, assuming it has a single return point, is as simple as copying its +instructions, registers, and local variables into the target function and then +inserting copies to and from the new parameters as appropriate. After functions +are inlined and any non-subroutine functions are deleted, parameters and return +variables will be converted to global variables and then global registers. We +don't do this lowering earlier (i.e. the fortranizer idea) for a few reasons: + +- If we want to do optimizations before link time, we need to have the function +signature available during link-time. + +- If we do any inlining before link time, then we might wind up with the +inlined function and the non-inlined function using the same global +variables/registers which would preclude optimization. + +Intrinsics +========= + +Any operation (other than function calls and textures) which touches a variable +or is not referentially transparent is represented by an intrinsic. Intrinsics +are similar to the idea of a "builtin function," i.e. a function declaration +whose implementation is provided by the backend, except they are more powerful +in the following ways: + +- They can also load and store registers when appropriate, which limits the +number of variables needed in later stages of the IR while obviating the need +for a separate load/store variable instruction. + +- Intrinsics can be marked as side-effect free, which permits them to be +treated like any other instruction when it comes to optimizations. This allows +load intrinsics to be represented as intrinsics while still being optimized +away by dead code elimination, common subexpression elimination, etc. + +Intrinsics are used for: + +- Atomic operations +- Memory barriers +- Subroutine calls +- Geometry shader emitVertex and endPrimitive +- Loading and storing variables (before lowering) +- Loading and storing uniforms, shader inputs and outputs, etc (after lowering) +- Copying variables (cases where in GLSL the destination is a structure or +array) +- The kitchen sink +- ... + +Textures +========= + +Unfortunately, there are far too many texture operations to represent each one +of them with an intrinsic, so there's a special texture instruction similar to +the GLSL IR one. The biggest difference is that, while the texture instruction +has a sampler dereference field used just like in GLSL IR, this gets lowered to +a texture unit index (with a possible indirect offset) while the type +information of the original sampler is kept around for backends. Also, all the +non-constant sources are stored in a single array to make it easier for +optimization passes to iterate over all the sources. + +Control Flow +========= + +Like in GLSL IR, control flow consists of a tree of "control flow nodes", which +include if statements and loops, and jump instructions (break, continue, and +return). Unlike GLSL IR, though, the leaves of the tree aren't statements but +basic blocks. Each basic block also keeps track of its successors and +predecessors, and function implementations keep track of the beginning basic +block (the first basic block of the function) and the ending basic block (a fake +basic block that every return statement points to). Together, these elements +make up the control flow graph, in this case a redundant piece of information on +top of the control flow tree that will be used by almost all the optimizations. +There are helper functions to add and remove control flow nodes that also update +the control flow graph, and so usually it doesn't need to be touched by passes +that modify control flow nodes. diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp new file mode 100644 index 00000000000..33b1f5c7b9e --- /dev/null +++ b/src/compiler/nir/glsl_to_nir.cpp @@ -0,0 +1,2031 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "glsl_to_nir.h" +#include "nir_control_flow.h" +#include "nir_builder.h" +#include "glsl/ir_visitor.h" +#include "glsl/ir_hierarchical_visitor.h" +#include "glsl/ir.h" +#include "main/imports.h" + +/* + * pass to lower GLSL IR to NIR + * + * This will lower variable dereferences to loads/stores of corresponding + * variables in NIR - the variables will be converted to registers in a later + * pass. + */ + +namespace { + +class nir_visitor : public ir_visitor +{ +public: + nir_visitor(nir_shader *shader); + ~nir_visitor(); + + virtual void visit(ir_variable *); + virtual void visit(ir_function *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_loop *); + virtual void visit(ir_if *); + virtual void visit(ir_discard *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_return *); + virtual void visit(ir_call *); + virtual void visit(ir_assignment *); + virtual void visit(ir_emit_vertex *); + virtual void visit(ir_end_primitive *); + virtual void visit(ir_expression *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_texture *); + virtual void visit(ir_constant *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_barrier *); + + void create_function(ir_function_signature *ir); + +private: + void add_instr(nir_instr *instr, unsigned num_components); + nir_ssa_def *evaluate_rvalue(ir_rvalue *ir); + + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs); + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1); + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, + nir_ssa_def *src2); + nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, + nir_ssa_def *src2, nir_ssa_def *src3); + + bool supports_ints; + + nir_shader *shader; + nir_function_impl *impl; + nir_builder b; + nir_ssa_def *result; /* result of the expression tree last visited */ + + nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir); + + /* the head of the dereference chain we're creating */ + nir_deref_var *deref_head; + /* the tail of the dereference chain we're creating */ + nir_deref *deref_tail; + + nir_variable *var; /* variable created by ir_variable visitor */ + + /* whether the IR we're operating on is per-function or global */ + bool is_global; + + /* map of ir_variable -> nir_variable */ + struct hash_table *var_table; + + /* map of ir_function_signature -> nir_function_overload */ + struct hash_table *overload_table; +}; + +/* + * This visitor runs before the main visitor, calling create_function() for + * each function so that the main visitor can resolve forward references in + * calls. + */ + +class nir_function_visitor : public ir_hierarchical_visitor +{ +public: + nir_function_visitor(nir_visitor *v) : visitor(v) + { + } + virtual ir_visitor_status visit_enter(ir_function *); + +private: + nir_visitor *visitor; +}; + +}; /* end of anonymous namespace */ + +nir_shader * +glsl_to_nir(const struct gl_shader_program *shader_prog, + gl_shader_stage stage, + const nir_shader_compiler_options *options) +{ + struct gl_shader *sh = shader_prog->_LinkedShaders[stage]; + + nir_shader *shader = nir_shader_create(NULL, stage, options); + + nir_visitor v1(shader); + nir_function_visitor v2(&v1); + v2.run(sh->ir); + visit_exec_list(sh->ir, &v1); + + nir_lower_outputs_to_temporaries(shader); + + shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); + if (shader_prog->Label) + shader->info.label = ralloc_strdup(shader, shader_prog->Label); + shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed); + shader->info.num_ubos = sh->NumUniformBlocks; + shader->info.num_abos = shader_prog->NumAtomicBuffers; + shader->info.num_ssbos = sh->NumShaderStorageBlocks; + shader->info.num_images = sh->NumImages; + shader->info.inputs_read = sh->Program->InputsRead; + shader->info.outputs_written = sh->Program->OutputsWritten; + shader->info.patch_inputs_read = sh->Program->PatchInputsRead; + shader->info.patch_outputs_written = sh->Program->PatchOutputsWritten; + shader->info.system_values_read = sh->Program->SystemValuesRead; + shader->info.uses_texture_gather = sh->Program->UsesGather; + shader->info.uses_clip_distance_out = + sh->Program->ClipDistanceArraySize != 0; + shader->info.separate_shader = shader_prog->SeparateShader; + shader->info.has_transform_feedback_varyings = + shader_prog->TransformFeedback.NumVarying > 0; + + switch (stage) { + case MESA_SHADER_TESS_CTRL: + shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut; + break; + + case MESA_SHADER_GEOMETRY: + shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn; + shader->info.gs.output_primitive = sh->Geom.OutputType; + shader->info.gs.vertices_out = sh->Geom.VerticesOut; + shader->info.gs.invocations = sh->Geom.Invocations; + shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive; + shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams; + break; + + case MESA_SHADER_FRAGMENT: { + struct gl_fragment_program *fp = + (struct gl_fragment_program *)sh->Program; + + shader->info.fs.uses_discard = fp->UsesKill; + shader->info.fs.early_fragment_tests = sh->EarlyFragmentTests; + shader->info.fs.depth_layout = fp->FragDepthLayout; + break; + } + + case MESA_SHADER_COMPUTE: { + struct gl_compute_program *cp = (struct gl_compute_program *)sh->Program; + shader->info.cs.local_size[0] = cp->LocalSize[0]; + shader->info.cs.local_size[1] = cp->LocalSize[1]; + shader->info.cs.local_size[2] = cp->LocalSize[2]; + break; + } + + default: + break; /* No stage-specific info */ + } + + return shader; +} + +nir_visitor::nir_visitor(nir_shader *shader) +{ + this->supports_ints = shader->options->native_integers; + this->shader = shader; + this->is_global = true; + this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + +nir_visitor::~nir_visitor() +{ + _mesa_hash_table_destroy(this->var_table, NULL); + _mesa_hash_table_destroy(this->overload_table, NULL); +} + +nir_deref_var * +nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir) +{ + ir->accept(this); + ralloc_steal(mem_ctx, this->deref_head); + return this->deref_head; +} + +static nir_constant * +constant_copy(ir_constant *ir, void *mem_ctx) +{ + if (ir == NULL) + return NULL; + + nir_constant *ret = ralloc(mem_ctx, nir_constant); + + unsigned total_elems = ir->type->components(); + unsigned i; + + ret->num_elements = 0; + switch (ir->type->base_type) { + case GLSL_TYPE_UINT: + for (i = 0; i < total_elems; i++) + ret->value.u[i] = ir->value.u[i]; + break; + + case GLSL_TYPE_INT: + for (i = 0; i < total_elems; i++) + ret->value.i[i] = ir->value.i[i]; + break; + + case GLSL_TYPE_FLOAT: + for (i = 0; i < total_elems; i++) + ret->value.f[i] = ir->value.f[i]; + break; + + case GLSL_TYPE_BOOL: + for (i = 0; i < total_elems; i++) + ret->value.b[i] = ir->value.b[i]; + break; + + case GLSL_TYPE_STRUCT: + ret->elements = ralloc_array(mem_ctx, nir_constant *, + ir->type->length); + ret->num_elements = ir->type->length; + + i = 0; + foreach_in_list(ir_constant, field, &ir->components) { + ret->elements[i] = constant_copy(field, mem_ctx); + i++; + } + break; + + case GLSL_TYPE_ARRAY: + ret->elements = ralloc_array(mem_ctx, nir_constant *, + ir->type->length); + ret->num_elements = ir->type->length; + + for (i = 0; i < ir->type->length; i++) + ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx); + break; + + default: + unreachable("not reached"); + } + + return ret; +} + +void +nir_visitor::visit(ir_variable *ir) +{ + nir_variable *var = ralloc(shader, nir_variable); + var->type = ir->type; + var->name = ralloc_strdup(var, ir->name); + + var->data.read_only = ir->data.read_only; + var->data.centroid = ir->data.centroid; + var->data.sample = ir->data.sample; + var->data.patch = ir->data.patch; + var->data.invariant = ir->data.invariant; + var->data.location = ir->data.location; + + switch(ir->data.mode) { + case ir_var_auto: + case ir_var_temporary: + if (is_global) + var->data.mode = nir_var_global; + else + var->data.mode = nir_var_local; + break; + + case ir_var_function_in: + case ir_var_function_out: + case ir_var_function_inout: + case ir_var_const_in: + var->data.mode = nir_var_local; + break; + + case ir_var_shader_in: + if (shader->stage == MESA_SHADER_FRAGMENT && + ir->data.location == VARYING_SLOT_FACE) { + /* For whatever reason, GLSL IR makes gl_FrontFacing an input */ + var->data.location = SYSTEM_VALUE_FRONT_FACE; + var->data.mode = nir_var_system_value; + } else if (shader->stage == MESA_SHADER_GEOMETRY && + ir->data.location == VARYING_SLOT_PRIMITIVE_ID) { + /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */ + var->data.location = SYSTEM_VALUE_PRIMITIVE_ID; + var->data.mode = nir_var_system_value; + } else { + var->data.mode = nir_var_shader_in; + } + break; + + case ir_var_shader_out: + var->data.mode = nir_var_shader_out; + break; + + case ir_var_uniform: + var->data.mode = nir_var_uniform; + break; + + case ir_var_shader_storage: + var->data.mode = nir_var_shader_storage; + break; + + case ir_var_system_value: + var->data.mode = nir_var_system_value; + break; + + default: + unreachable("not reached"); + } + + var->data.interpolation = ir->data.interpolation; + var->data.origin_upper_left = ir->data.origin_upper_left; + var->data.pixel_center_integer = ir->data.pixel_center_integer; + var->data.explicit_location = ir->data.explicit_location; + var->data.explicit_index = ir->data.explicit_index; + var->data.explicit_binding = ir->data.explicit_binding; + var->data.has_initializer = ir->data.has_initializer; + var->data.location_frac = ir->data.location_frac; + var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array; + var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray; + + switch (ir->data.depth_layout) { + case ir_depth_layout_none: + var->data.depth_layout = nir_depth_layout_none; + break; + case ir_depth_layout_any: + var->data.depth_layout = nir_depth_layout_any; + break; + case ir_depth_layout_greater: + var->data.depth_layout = nir_depth_layout_greater; + break; + case ir_depth_layout_less: + var->data.depth_layout = nir_depth_layout_less; + break; + case ir_depth_layout_unchanged: + var->data.depth_layout = nir_depth_layout_unchanged; + break; + default: + unreachable("not reached"); + } + + var->data.index = ir->data.index; + var->data.binding = ir->data.binding; + var->data.offset = ir->data.offset; + var->data.image.read_only = ir->data.image_read_only; + var->data.image.write_only = ir->data.image_write_only; + var->data.image.coherent = ir->data.image_coherent; + var->data.image._volatile = ir->data.image_volatile; + var->data.image.restrict_flag = ir->data.image_restrict; + var->data.image.format = ir->data.image_format; + var->data.max_array_access = ir->data.max_array_access; + + var->num_state_slots = ir->get_num_state_slots(); + if (var->num_state_slots > 0) { + var->state_slots = ralloc_array(var, nir_state_slot, + var->num_state_slots); + + ir_state_slot *state_slots = ir->get_state_slots(); + for (unsigned i = 0; i < var->num_state_slots; i++) { + for (unsigned j = 0; j < 5; j++) + var->state_slots[i].tokens[j] = state_slots[i].tokens[j]; + var->state_slots[i].swizzle = state_slots[i].swizzle; + } + } else { + var->state_slots = NULL; + } + + var->constant_initializer = constant_copy(ir->constant_initializer, var); + + var->interface_type = ir->get_interface_type(); + + if (var->data.mode == nir_var_local) + nir_function_impl_add_variable(impl, var); + else + nir_shader_add_variable(shader, var); + + _mesa_hash_table_insert(var_table, ir, var); + this->var = var; +} + +ir_visitor_status +nir_function_visitor::visit_enter(ir_function *ir) +{ + foreach_in_list(ir_function_signature, sig, &ir->signatures) { + visitor->create_function(sig); + } + return visit_continue_with_parent; +} + +void +nir_visitor::create_function(ir_function_signature *ir) +{ + if (ir->is_intrinsic) + return; + + nir_function *func = nir_function_create(shader, ir->function_name()); + + unsigned num_params = ir->parameters.length(); + func->num_params = num_params; + func->params = ralloc_array(shader, nir_parameter, num_params); + + unsigned i = 0; + foreach_in_list(ir_variable, param, &ir->parameters) { + switch (param->data.mode) { + case ir_var_function_in: + func->params[i].param_type = nir_parameter_in; + break; + + case ir_var_function_out: + func->params[i].param_type = nir_parameter_out; + break; + + case ir_var_function_inout: + func->params[i].param_type = nir_parameter_inout; + break; + + default: + unreachable("not reached"); + } + + func->params[i].type = param->type; + i++; + } + + func->return_type = ir->return_type; + + _mesa_hash_table_insert(this->overload_table, ir, func); +} + +void +nir_visitor::visit(ir_function *ir) +{ + foreach_in_list(ir_function_signature, sig, &ir->signatures) + sig->accept(this); +} + +void +nir_visitor::visit(ir_function_signature *ir) +{ + if (ir->is_intrinsic) + return; + + struct hash_entry *entry = + _mesa_hash_table_search(this->overload_table, ir); + + assert(entry); + nir_function *func = (nir_function *) entry->data; + + if (ir->is_defined) { + nir_function_impl *impl = nir_function_impl_create(func); + this->impl = impl; + + unsigned num_params = func->num_params; + impl->num_params = num_params; + impl->params = ralloc_array(this->shader, nir_variable *, num_params); + unsigned i = 0; + foreach_in_list(ir_variable, param, &ir->parameters) { + param->accept(this); + impl->params[i] = this->var; + i++; + } + + if (func->return_type == glsl_type::void_type) { + impl->return_var = NULL; + } else { + impl->return_var = ralloc(this->shader, nir_variable); + impl->return_var->name = ralloc_strdup(impl->return_var, + "return_var"); + impl->return_var->type = func->return_type; + } + + this->is_global = false; + + nir_builder_init(&b, impl); + b.cursor = nir_after_cf_list(&impl->body); + visit_exec_list(&ir->body, this); + + this->is_global = true; + } else { + func->impl = NULL; + } +} + +void +nir_visitor::visit(ir_loop *ir) +{ + nir_loop *loop = nir_loop_create(this->shader); + nir_builder_cf_insert(&b, &loop->cf_node); + + b.cursor = nir_after_cf_list(&loop->body); + visit_exec_list(&ir->body_instructions, this); + b.cursor = nir_after_cf_node(&loop->cf_node); +} + +void +nir_visitor::visit(ir_if *ir) +{ + nir_src condition = + nir_src_for_ssa(evaluate_rvalue(ir->condition)); + + nir_if *if_stmt = nir_if_create(this->shader); + if_stmt->condition = condition; + nir_builder_cf_insert(&b, &if_stmt->cf_node); + + b.cursor = nir_after_cf_list(&if_stmt->then_list); + visit_exec_list(&ir->then_instructions, this); + + b.cursor = nir_after_cf_list(&if_stmt->else_list); + visit_exec_list(&ir->else_instructions, this); + + b.cursor = nir_after_cf_node(&if_stmt->cf_node); +} + +void +nir_visitor::visit(ir_discard *ir) +{ + /* + * discards aren't treated as control flow, because before we lower them + * they can appear anywhere in the shader and the stuff after them may still + * be executed (yay, crazy GLSL rules!). However, after lowering, all the + * discards will be immediately followed by a return. + */ + + nir_intrinsic_instr *discard; + if (ir->condition) { + discard = nir_intrinsic_instr_create(this->shader, + nir_intrinsic_discard_if); + discard->src[0] = + nir_src_for_ssa(evaluate_rvalue(ir->condition)); + } else { + discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard); + } + + nir_builder_instr_insert(&b, &discard->instr); +} + +void +nir_visitor::visit(ir_emit_vertex *ir) +{ + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex); + instr->const_index[0] = ir->stream_id(); + nir_builder_instr_insert(&b, &instr->instr); +} + +void +nir_visitor::visit(ir_end_primitive *ir) +{ + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive); + instr->const_index[0] = ir->stream_id(); + nir_builder_instr_insert(&b, &instr->instr); +} + +void +nir_visitor::visit(ir_loop_jump *ir) +{ + nir_jump_type type; + switch (ir->mode) { + case ir_loop_jump::jump_break: + type = nir_jump_break; + break; + case ir_loop_jump::jump_continue: + type = nir_jump_continue; + break; + default: + unreachable("not reached"); + } + + nir_jump_instr *instr = nir_jump_instr_create(this->shader, type); + nir_builder_instr_insert(&b, &instr->instr); +} + +void +nir_visitor::visit(ir_return *ir) +{ + if (ir->value != NULL) { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); + + copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var); + copy->variables[1] = evaluate_deref(©->instr, ir->value); + } + + nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); + nir_builder_instr_insert(&b, &instr->instr); +} + +void +nir_visitor::visit(ir_call *ir) +{ + if (ir->callee->is_intrinsic) { + nir_intrinsic_op op; + if (strcmp(ir->callee_name(), "__intrinsic_atomic_read") == 0) { + op = nir_intrinsic_atomic_counter_read_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_increment") == 0) { + op = nir_intrinsic_atomic_counter_inc_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) { + op = nir_intrinsic_atomic_counter_dec_var; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) { + op = nir_intrinsic_image_load; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) { + op = nir_intrinsic_image_store; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) { + op = nir_intrinsic_image_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) { + op = nir_intrinsic_image_atomic_min; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) { + op = nir_intrinsic_image_atomic_max; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) { + op = nir_intrinsic_image_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) { + op = nir_intrinsic_image_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) { + op = nir_intrinsic_image_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) { + op = nir_intrinsic_image_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) { + op = nir_intrinsic_image_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) { + op = nir_intrinsic_memory_barrier; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) { + op = nir_intrinsic_image_size; + } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) { + op = nir_intrinsic_image_samples; + } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) { + op = nir_intrinsic_store_ssbo; + } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) { + op = nir_intrinsic_load_ssbo; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_ssbo_atomic_imin; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_ssbo_atomic_umin; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_ssbo_atomic_imax; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_ssbo_atomic_umax; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) { + op = nir_intrinsic_ssbo_atomic_comp_swap; + } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) { + op = nir_intrinsic_shader_clock; + } else if (strcmp(ir->callee_name(), "__intrinsic_group_memory_barrier") == 0) { + op = nir_intrinsic_group_memory_barrier; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_atomic_counter") == 0) { + op = nir_intrinsic_memory_barrier_atomic_counter; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_buffer") == 0) { + op = nir_intrinsic_memory_barrier_buffer; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_image") == 0) { + op = nir_intrinsic_memory_barrier_image; + } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) { + op = nir_intrinsic_memory_barrier_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) { + op = nir_intrinsic_load_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) { + op = nir_intrinsic_store_shared; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) { + op = nir_intrinsic_shared_atomic_add; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) { + op = nir_intrinsic_shared_atomic_and; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) { + op = nir_intrinsic_shared_atomic_or; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) { + op = nir_intrinsic_shared_atomic_xor; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_shared_atomic_imin; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_shared_atomic_umin; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) { + assert(ir->return_deref); + if (ir->return_deref->type == glsl_type::int_type) + op = nir_intrinsic_shared_atomic_imax; + else if (ir->return_deref->type == glsl_type::uint_type) + op = nir_intrinsic_shared_atomic_umax; + else + unreachable("Invalid type"); + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) { + op = nir_intrinsic_shared_atomic_exchange; + } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) { + op = nir_intrinsic_shared_atomic_comp_swap; + } else { + unreachable("not reached"); + } + + nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); + nir_dest *dest = &instr->dest; + + switch (op) { + case nir_intrinsic_atomic_counter_read_var: + case nir_intrinsic_atomic_counter_inc_var: + case nir_intrinsic_atomic_counter_dec_var: { + ir_dereference *param = + (ir_dereference *) ir->actual_parameters.get_head(); + instr->variables[0] = evaluate_deref(&instr->instr, param); + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_image_load: + case nir_intrinsic_image_store: + case nir_intrinsic_image_atomic_add: + case nir_intrinsic_image_atomic_min: + case nir_intrinsic_image_atomic_max: + case nir_intrinsic_image_atomic_and: + case nir_intrinsic_image_atomic_or: + case nir_intrinsic_image_atomic_xor: + case nir_intrinsic_image_atomic_exchange: + case nir_intrinsic_image_atomic_comp_swap: + case nir_intrinsic_image_samples: + case nir_intrinsic_image_size: { + nir_ssa_undef_instr *instr_undef = + nir_ssa_undef_instr_create(shader, 1); + nir_builder_instr_insert(&b, &instr_undef->instr); + + /* Set the image variable dereference. */ + exec_node *param = ir->actual_parameters.get_head(); + ir_dereference *image = (ir_dereference *)param; + const glsl_type *type = + image->variable_referenced()->type->without_array(); + + instr->variables[0] = evaluate_deref(&instr->instr, image); + param = param->get_next(); + + /* Set the intrinsic destination. */ + if (ir->return_deref) { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[instr->intrinsic]; + nir_ssa_dest_init(&instr->instr, &instr->dest, + info->dest_components, NULL); + } + + if (op == nir_intrinsic_image_size || + op == nir_intrinsic_image_samples) { + nir_builder_instr_insert(&b, &instr->instr); + break; + } + + /* Set the address argument, extending the coordinate vector to four + * components. + */ + nir_ssa_def *src_addr = + evaluate_rvalue((ir_dereference *)param); + nir_ssa_def *srcs[4]; + + for (int i = 0; i < 4; i++) { + if (i < type->coordinate_components()) + srcs[i] = nir_channel(&b, src_addr, i); + else + srcs[i] = &instr_undef->def; + } + + instr->src[0] = nir_src_for_ssa(nir_vec(&b, srcs, 4)); + param = param->get_next(); + + /* Set the sample argument, which is undefined for single-sample + * images. + */ + if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + instr->src[1] = + nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); + param = param->get_next(); + } else { + instr->src[1] = nir_src_for_ssa(&instr_undef->def); + } + + /* Set the intrinsic parameters. */ + if (!param->is_tail_sentinel()) { + instr->src[2] = + nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); + param = param->get_next(); + } + + if (!param->is_tail_sentinel()) { + instr->src[3] = + nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); + param = param->get_next(); + } + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_memory_barrier: + case nir_intrinsic_group_memory_barrier: + case nir_intrinsic_memory_barrier_atomic_counter: + case nir_intrinsic_memory_barrier_buffer: + case nir_intrinsic_memory_barrier_image: + case nir_intrinsic_memory_barrier_shared: + nir_builder_instr_insert(&b, &instr->instr); + break; + case nir_intrinsic_shader_clock: + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + case nir_intrinsic_store_ssbo: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); + instr->const_index[0] = write_mask->value.u[0]; + instr->num_components = val->type->vector_elements; + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_load_ssbo: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block)); + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); + + const glsl_type *type = ir->return_deref->var->type; + instr->num_components = type->vector_elements; + + /* Setup destination register */ + nir_ssa_dest_init(&instr->instr, &instr->dest, + type->vector_elements, NULL); + + /* Insert the created nir instruction now since in the case of boolean + * result we will need to emit another instruction after it + */ + nir_builder_instr_insert(&b, &instr->instr); + + /* + * In SSBO/UBO's, a true boolean value is any non-zero value, but we + * consider a true boolean to be ~0. Fix this up with a != 0 + * comparison. + */ + if (type->base_type == GLSL_TYPE_BOOL) { + nir_alu_instr *load_ssbo_compare = + nir_alu_instr_create(shader, nir_op_ine); + load_ssbo_compare->src[0].src.is_ssa = true; + load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa; + load_ssbo_compare->src[1].src = + nir_src_for_ssa(nir_imm_int(&b, 0)); + for (unsigned i = 0; i < type->vector_elements; i++) + load_ssbo_compare->src[1].swizzle[i] = 0; + nir_ssa_dest_init(&load_ssbo_compare->instr, + &load_ssbo_compare->dest.dest, + type->vector_elements, NULL); + load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1; + nir_builder_instr_insert(&b, &load_ssbo_compare->instr); + dest = &load_ssbo_compare->dest.dest; + } + break; + } + case nir_intrinsic_ssbo_atomic_add: + case nir_intrinsic_ssbo_atomic_imin: + case nir_intrinsic_ssbo_atomic_umin: + case nir_intrinsic_ssbo_atomic_imax: + case nir_intrinsic_ssbo_atomic_umax: + case nir_intrinsic_ssbo_atomic_and: + case nir_intrinsic_ssbo_atomic_or: + case nir_intrinsic_ssbo_atomic_xor: + case nir_intrinsic_ssbo_atomic_exchange: + case nir_intrinsic_ssbo_atomic_comp_swap: { + int param_count = ir->actual_parameters.length(); + assert(param_count == 3 || param_count == 4); + + /* Block index */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* Offset */ + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data1 parameter (this is always present) */ + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data2 parameter (only with atomic_comp_swap) */ + if (param_count == 4) { + assert(op == nir_intrinsic_ssbo_atomic_comp_swap); + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + } + + /* Atomic result */ + assert(ir->return_deref); + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_load_shared: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + instr->const_index[0] = 0; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); + + const glsl_type *type = ir->return_deref->var->type; + instr->num_components = type->vector_elements; + + /* Setup destination register */ + nir_ssa_dest_init(&instr->instr, &instr->dest, + type->vector_elements, NULL); + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_store_shared: { + exec_node *param = ir->actual_parameters.get_head(); + ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); + + param = param->get_next(); + ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); + assert(write_mask); + + instr->const_index[0] = 0; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); + + instr->const_index[1] = write_mask->value.u[0]; + + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); + instr->num_components = val->type->vector_elements; + + nir_builder_instr_insert(&b, &instr->instr); + break; + } + case nir_intrinsic_shared_atomic_add: + case nir_intrinsic_shared_atomic_imin: + case nir_intrinsic_shared_atomic_umin: + case nir_intrinsic_shared_atomic_imax: + case nir_intrinsic_shared_atomic_umax: + case nir_intrinsic_shared_atomic_and: + case nir_intrinsic_shared_atomic_or: + case nir_intrinsic_shared_atomic_xor: + case nir_intrinsic_shared_atomic_exchange: + case nir_intrinsic_shared_atomic_comp_swap: { + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* Offset */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data1 parameter (this is always present) */ + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + + /* data2 parameter (only with atomic_comp_swap) */ + if (param_count == 3) { + assert(op == nir_intrinsic_shared_atomic_comp_swap); + param = param->get_next(); + inst = (ir_instruction *) param; + instr->src[2] = + nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); + } + + /* Atomic result */ + assert(ir->return_deref); + nir_ssa_dest_init(&instr->instr, &instr->dest, + ir->return_deref->type->vector_elements, NULL); + nir_builder_instr_insert(&b, &instr->instr); + break; + } + default: + unreachable("not reached"); + } + + if (ir->return_deref) { + nir_intrinsic_instr *store_instr = + nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); + store_instr->num_components = ir->return_deref->type->vector_elements; + store_instr->const_index[0] = (1 << store_instr->num_components) - 1; + + store_instr->variables[0] = + evaluate_deref(&store_instr->instr, ir->return_deref); + store_instr->src[0] = nir_src_for_ssa(&dest->ssa); + + nir_builder_instr_insert(&b, &store_instr->instr); + } + + return; + } + + struct hash_entry *entry = + _mesa_hash_table_search(this->overload_table, ir->callee); + assert(entry); + nir_function *callee = (nir_function *) entry->data; + + nir_call_instr *instr = nir_call_instr_create(this->shader, callee); + + unsigned i = 0; + foreach_in_list(ir_dereference, param, &ir->actual_parameters) { + instr->params[i] = evaluate_deref(&instr->instr, param); + i++; + } + + instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref); + nir_builder_instr_insert(&b, &instr->instr); +} + +void +nir_visitor::visit(ir_assignment *ir) +{ + unsigned num_components = ir->lhs->type->vector_elements; + + if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) && + (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) { + /* We're doing a plain-as-can-be copy, so emit a copy_var */ + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); + + copy->variables[0] = evaluate_deref(©->instr, ir->lhs); + copy->variables[1] = evaluate_deref(©->instr, ir->rhs); + + if (ir->condition) { + nir_if *if_stmt = nir_if_create(this->shader); + if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition)); + nir_builder_cf_insert(&b, &if_stmt->cf_node); + nir_instr_insert_after_cf_list(&if_stmt->then_list, ©->instr); + b.cursor = nir_after_cf_node(&if_stmt->cf_node); + } else { + nir_builder_instr_insert(&b, ©->instr); + } + return; + } + + assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector()); + + ir->lhs->accept(this); + nir_deref_var *lhs_deref = this->deref_head; + nir_ssa_def *src = evaluate_rvalue(ir->rhs); + + if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) { + /* GLSL IR will give us the input to the write-masked assignment in a + * single packed vector. So, for example, if the writemask is xzw, then + * we have to swizzle x -> x, y -> z, and z -> w and get the y component + * from the load. + */ + unsigned swiz[4]; + unsigned component = 0; + for (unsigned i = 0; i < 4; i++) { + swiz[i] = ir->write_mask & (1 << i) ? component++ : 0; + } + src = nir_swizzle(&b, src, swiz, num_components, !supports_ints); + } + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); + store->num_components = ir->lhs->type->vector_elements; + store->const_index[0] = ir->write_mask; + nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref); + store->variables[0] = nir_deref_as_var(store_deref); + store->src[0] = nir_src_for_ssa(src); + + if (ir->condition) { + nir_if *if_stmt = nir_if_create(this->shader); + if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition)); + nir_builder_cf_insert(&b, &if_stmt->cf_node); + nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr); + b.cursor = nir_after_cf_node(&if_stmt->cf_node); + } else { + nir_builder_instr_insert(&b, &store->instr); + } +} + +/* + * Given an instruction, returns a pointer to its destination or NULL if there + * is no destination. + * + * Note that this only handles instructions we generate at this level. + */ +static nir_dest * +get_instr_dest(nir_instr *instr) +{ + nir_alu_instr *alu_instr; + nir_intrinsic_instr *intrinsic_instr; + nir_tex_instr *tex_instr; + + switch (instr->type) { + case nir_instr_type_alu: + alu_instr = nir_instr_as_alu(instr); + return &alu_instr->dest.dest; + + case nir_instr_type_intrinsic: + intrinsic_instr = nir_instr_as_intrinsic(instr); + if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest) + return &intrinsic_instr->dest; + else + return NULL; + + case nir_instr_type_tex: + tex_instr = nir_instr_as_tex(instr); + return &tex_instr->dest; + + default: + unreachable("not reached"); + } + + return NULL; +} + +void +nir_visitor::add_instr(nir_instr *instr, unsigned num_components) +{ + nir_dest *dest = get_instr_dest(instr); + + if (dest) + nir_ssa_dest_init(instr, dest, num_components, NULL); + + nir_builder_instr_insert(&b, instr); + + if (dest) { + assert(dest->is_ssa); + this->result = &dest->ssa; + } +} + +nir_ssa_def * +nir_visitor::evaluate_rvalue(ir_rvalue* ir) +{ + ir->accept(this); + if (ir->as_dereference() || ir->as_constant()) { + /* + * A dereference is being used on the right hand side, which means we + * must emit a variable load. + */ + + nir_intrinsic_instr *load_instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var); + load_instr->num_components = ir->type->vector_elements; + load_instr->variables[0] = this->deref_head; + ralloc_steal(load_instr, load_instr->variables[0]); + add_instr(&load_instr->instr, ir->type->vector_elements); + } + + return this->result; +} + +void +nir_visitor::visit(ir_expression *ir) +{ + /* Some special cases */ + switch (ir->operation) { + case ir_binop_ubo_load: { + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo); + load->num_components = ir->type->vector_elements; + load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); + load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); + add_instr(&load->instr, ir->type->vector_elements); + + /* + * In UBO's, a true boolean value is any non-zero value, but we consider + * a true boolean to be ~0. Fix this up with a != 0 comparison. + */ + + if (ir->type->base_type == GLSL_TYPE_BOOL) + this->result = nir_ine(&b, &load->dest.ssa, nir_imm_int(&b, 0)); + + return; + } + + case ir_unop_interpolate_at_centroid: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: { + ir_dereference *deref = ir->operands[0]->as_dereference(); + ir_swizzle *swizzle = NULL; + if (!deref) { + /* the api does not allow a swizzle here, but the varying packing code + * may have pushed one into here. + */ + swizzle = ir->operands[0]->as_swizzle(); + assert(swizzle); + deref = swizzle->val->as_dereference(); + assert(deref); + } + + deref->accept(this); + + nir_intrinsic_op op; + if (this->deref_head->var->data.mode == nir_var_shader_in) { + switch (ir->operation) { + case ir_unop_interpolate_at_centroid: + op = nir_intrinsic_interp_var_at_centroid; + break; + case ir_binop_interpolate_at_offset: + op = nir_intrinsic_interp_var_at_offset; + break; + case ir_binop_interpolate_at_sample: + op = nir_intrinsic_interp_var_at_sample; + break; + default: + unreachable("Invalid interpolation intrinsic"); + } + } else { + /* This case can happen if the vertex shader does not write the + * given varying. In this case, the linker will lower it to a + * global variable. Since interpolating a variable makes no + * sense, we'll just turn it into a load which will probably + * eventually end up as an SSA definition. + */ + assert(this->deref_head->var->data.mode == nir_var_global); + op = nir_intrinsic_load_var; + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); + intrin->num_components = deref->type->vector_elements; + intrin->variables[0] = this->deref_head; + ralloc_steal(intrin, intrin->variables[0]); + + if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset || + intrin->intrinsic == nir_intrinsic_interp_var_at_sample) + intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); + + add_instr(&intrin->instr, deref->type->vector_elements); + + if (swizzle) { + unsigned swiz[4] = { + swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w + }; + + result = nir_swizzle(&b, result, swiz, + swizzle->type->vector_elements, false); + } + + return; + } + + default: + break; + } + + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < ir->get_num_operands(); i++) + srcs[i] = evaluate_rvalue(ir->operands[i]); + + glsl_base_type types[4]; + for (unsigned i = 0; i < ir->get_num_operands(); i++) + if (supports_ints) + types[i] = ir->operands[i]->type->base_type; + else + types[i] = GLSL_TYPE_FLOAT; + + glsl_base_type out_type; + if (supports_ints) + out_type = ir->type->base_type; + else + out_type = GLSL_TYPE_FLOAT; + + switch (ir->operation) { + case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break; + case ir_unop_logic_not: + result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]); + break; + case ir_unop_neg: + result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fneg(&b, srcs[0]) + : nir_ineg(&b, srcs[0]); + break; + case ir_unop_abs: + result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fabs(&b, srcs[0]) + : nir_iabs(&b, srcs[0]); + break; + case ir_unop_saturate: + assert(types[0] == GLSL_TYPE_FLOAT); + result = nir_fsat(&b, srcs[0]); + break; + case ir_unop_sign: + result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fsign(&b, srcs[0]) + : nir_isign(&b, srcs[0]); + break; + case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break; + case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break; + case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break; + case ir_unop_exp: unreachable("ir_unop_exp should have been lowered"); + case ir_unop_log: unreachable("ir_unop_log should have been lowered"); + case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break; + case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break; + case ir_unop_i2f: + result = supports_ints ? nir_i2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); + break; + case ir_unop_u2f: + result = supports_ints ? nir_u2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); + break; + case ir_unop_b2f: + result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); + break; + case ir_unop_f2i: result = nir_f2i(&b, srcs[0]); break; + case ir_unop_f2u: result = nir_f2u(&b, srcs[0]); break; + case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break; + case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break; + case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break; + case ir_unop_i2u: + case ir_unop_u2i: + case ir_unop_bitcast_i2f: + case ir_unop_bitcast_f2i: + case ir_unop_bitcast_u2f: + case ir_unop_bitcast_f2u: + case ir_unop_subroutine_to_int: + /* no-op */ + result = nir_imov(&b, srcs[0]); + break; + case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break; + case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break; + case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break; + case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break; + case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break; + case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break; + case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break; + case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break; + case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break; + case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break; + case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break; + case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break; + case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break; + case ir_unop_pack_snorm_2x16: + result = nir_pack_snorm_2x16(&b, srcs[0]); + break; + case ir_unop_pack_snorm_4x8: + result = nir_pack_snorm_4x8(&b, srcs[0]); + break; + case ir_unop_pack_unorm_2x16: + result = nir_pack_unorm_2x16(&b, srcs[0]); + break; + case ir_unop_pack_unorm_4x8: + result = nir_pack_unorm_4x8(&b, srcs[0]); + break; + case ir_unop_pack_half_2x16: + result = nir_pack_half_2x16(&b, srcs[0]); + break; + case ir_unop_unpack_snorm_2x16: + result = nir_unpack_snorm_2x16(&b, srcs[0]); + break; + case ir_unop_unpack_snorm_4x8: + result = nir_unpack_snorm_4x8(&b, srcs[0]); + break; + case ir_unop_unpack_unorm_2x16: + result = nir_unpack_unorm_2x16(&b, srcs[0]); + break; + case ir_unop_unpack_unorm_4x8: + result = nir_unpack_unorm_4x8(&b, srcs[0]); + break; + case ir_unop_unpack_half_2x16: + result = nir_unpack_half_2x16(&b, srcs[0]); + break; + case ir_unop_unpack_half_2x16_split_x: + result = nir_unpack_half_2x16_split_x(&b, srcs[0]); + break; + case ir_unop_unpack_half_2x16_split_y: + result = nir_unpack_half_2x16_split_y(&b, srcs[0]); + break; + case ir_unop_bitfield_reverse: + result = nir_bitfield_reverse(&b, srcs[0]); + break; + case ir_unop_bit_count: + result = nir_bit_count(&b, srcs[0]); + break; + case ir_unop_find_msb: + switch (types[0]) { + case GLSL_TYPE_UINT: + result = nir_ufind_msb(&b, srcs[0]); + break; + case GLSL_TYPE_INT: + result = nir_ifind_msb(&b, srcs[0]); + break; + default: + unreachable("Invalid type for findMSB()"); + } + break; + case ir_unop_find_lsb: + result = nir_find_lsb(&b, srcs[0]); + break; + + case ir_unop_noise: + switch (ir->type->vector_elements) { + case 1: + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fnoise1_1(&b, srcs[0]); break; + case 2: result = nir_fnoise1_2(&b, srcs[0]); break; + case 3: result = nir_fnoise1_3(&b, srcs[0]); break; + case 4: result = nir_fnoise1_4(&b, srcs[0]); break; + default: unreachable("not reached"); + } + break; + case 2: + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fnoise2_1(&b, srcs[0]); break; + case 2: result = nir_fnoise2_2(&b, srcs[0]); break; + case 3: result = nir_fnoise2_3(&b, srcs[0]); break; + case 4: result = nir_fnoise2_4(&b, srcs[0]); break; + default: unreachable("not reached"); + } + break; + case 3: + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fnoise3_1(&b, srcs[0]); break; + case 2: result = nir_fnoise3_2(&b, srcs[0]); break; + case 3: result = nir_fnoise3_3(&b, srcs[0]); break; + case 4: result = nir_fnoise3_4(&b, srcs[0]); break; + default: unreachable("not reached"); + } + break; + case 4: + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fnoise4_1(&b, srcs[0]); break; + case 2: result = nir_fnoise4_2(&b, srcs[0]); break; + case 3: result = nir_fnoise4_3(&b, srcs[0]); break; + case 4: result = nir_fnoise4_4(&b, srcs[0]); break; + default: unreachable("not reached"); + } + break; + default: + unreachable("not reached"); + } + break; + case ir_unop_get_buffer_size: { + nir_intrinsic_instr *load = nir_intrinsic_instr_create( + this->shader, + nir_intrinsic_get_buffer_size); + load->num_components = ir->type->vector_elements; + load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); + add_instr(&load->instr, ir->type->vector_elements); + return; + } + + case ir_binop_add: + result = (out_type == GLSL_TYPE_FLOAT) ? nir_fadd(&b, srcs[0], srcs[1]) + : nir_iadd(&b, srcs[0], srcs[1]); + break; + case ir_binop_sub: + result = (out_type == GLSL_TYPE_FLOAT) ? nir_fsub(&b, srcs[0], srcs[1]) + : nir_isub(&b, srcs[0], srcs[1]); + break; + case ir_binop_mul: + result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmul(&b, srcs[0], srcs[1]) + : nir_imul(&b, srcs[0], srcs[1]); + break; + case ir_binop_div: + if (out_type == GLSL_TYPE_FLOAT) + result = nir_fdiv(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_INT) + result = nir_idiv(&b, srcs[0], srcs[1]); + else + result = nir_udiv(&b, srcs[0], srcs[1]); + break; + case ir_binop_mod: + result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmod(&b, srcs[0], srcs[1]) + : nir_umod(&b, srcs[0], srcs[1]); + break; + case ir_binop_min: + if (out_type == GLSL_TYPE_FLOAT) + result = nir_fmin(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_INT) + result = nir_imin(&b, srcs[0], srcs[1]); + else + result = nir_umin(&b, srcs[0], srcs[1]); + break; + case ir_binop_max: + if (out_type == GLSL_TYPE_FLOAT) + result = nir_fmax(&b, srcs[0], srcs[1]); + else if (out_type == GLSL_TYPE_INT) + result = nir_imax(&b, srcs[0], srcs[1]); + else + result = nir_umax(&b, srcs[0], srcs[1]); + break; + case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break; + case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break; + case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break; + case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break; + case ir_binop_logic_and: + result = supports_ints ? nir_iand(&b, srcs[0], srcs[1]) + : nir_fand(&b, srcs[0], srcs[1]); + break; + case ir_binop_logic_or: + result = supports_ints ? nir_ior(&b, srcs[0], srcs[1]) + : nir_for(&b, srcs[0], srcs[1]); + break; + case ir_binop_logic_xor: + result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1]) + : nir_fxor(&b, srcs[0], srcs[1]); + break; + case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break; + case ir_binop_rshift: + result = (out_type == GLSL_TYPE_INT) ? nir_ishr(&b, srcs[0], srcs[1]) + : nir_ushr(&b, srcs[0], srcs[1]); + break; + case ir_binop_imul_high: + result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1]) + : nir_umul_high(&b, srcs[0], srcs[1]); + break; + case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break; + case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break; + case ir_binop_less: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_flt(&b, srcs[0], srcs[1]); + else if (types[0] == GLSL_TYPE_INT) + result = nir_ilt(&b, srcs[0], srcs[1]); + else + result = nir_ult(&b, srcs[0], srcs[1]); + } else { + result = nir_slt(&b, srcs[0], srcs[1]); + } + break; + case ir_binop_greater: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_flt(&b, srcs[1], srcs[0]); + else if (types[0] == GLSL_TYPE_INT) + result = nir_ilt(&b, srcs[1], srcs[0]); + else + result = nir_ult(&b, srcs[1], srcs[0]); + } else { + result = nir_slt(&b, srcs[1], srcs[0]); + } + break; + case ir_binop_lequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_fge(&b, srcs[1], srcs[0]); + else if (types[0] == GLSL_TYPE_INT) + result = nir_ige(&b, srcs[1], srcs[0]); + else + result = nir_uge(&b, srcs[1], srcs[0]); + } else { + result = nir_slt(&b, srcs[1], srcs[0]); + } + break; + case ir_binop_gequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_fge(&b, srcs[0], srcs[1]); + else if (types[0] == GLSL_TYPE_INT) + result = nir_ige(&b, srcs[0], srcs[1]); + else + result = nir_uge(&b, srcs[0], srcs[1]); + } else { + result = nir_slt(&b, srcs[0], srcs[1]); + } + break; + case ir_binop_equal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_feq(&b, srcs[0], srcs[1]); + else + result = nir_ieq(&b, srcs[0], srcs[1]); + } else { + result = nir_seq(&b, srcs[0], srcs[1]); + } + break; + case ir_binop_nequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) + result = nir_fne(&b, srcs[0], srcs[1]); + else + result = nir_ine(&b, srcs[0], srcs[1]); + } else { + result = nir_sne(&b, srcs[0], srcs[1]); + } + break; + case ir_binop_all_equal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_feq(&b, srcs[0], srcs[1]); break; + case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break; + case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_seq(&b, srcs[0], srcs[1]); break; + case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } + break; + case ir_binop_any_nequal: + if (supports_ints) { + if (types[0] == GLSL_TYPE_FLOAT) { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_fne(&b, srcs[0], srcs[1]); break; + case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_ine(&b, srcs[0], srcs[1]); break; + case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } + } else { + switch (ir->operands[0]->type->vector_elements) { + case 1: result = nir_sne(&b, srcs[0], srcs[1]); break; + case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + } + break; + case ir_binop_dot: + switch (ir->operands[0]->type->vector_elements) { + case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break; + case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break; + case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break; + default: + unreachable("not reached"); + } + break; + + case ir_binop_pack_half_2x16_split: + result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]); + break; + case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; + case ir_triop_fma: + result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); + break; + case ir_triop_lrp: + result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]); + break; + case ir_triop_csel: + if (supports_ints) + result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]); + else + result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]); + break; + case ir_triop_bitfield_extract: + result = (out_type == GLSL_TYPE_INT) ? + nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) : + nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]); + break; + case ir_quadop_bitfield_insert: + result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]); + break; + case ir_quadop_vector: + result = nir_vec(&b, srcs, ir->type->vector_elements); + break; + + default: + unreachable("not reached"); + } +} + +void +nir_visitor::visit(ir_swizzle *ir) +{ + unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w }; + result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle, + ir->type->vector_elements, !supports_ints); +} + +void +nir_visitor::visit(ir_texture *ir) +{ + unsigned num_srcs; + nir_texop op; + switch (ir->op) { + case ir_tex: + op = nir_texop_tex; + num_srcs = 1; /* coordinate */ + break; + + case ir_txb: + case ir_txl: + op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl; + num_srcs = 2; /* coordinate, bias/lod */ + break; + + case ir_txd: + op = nir_texop_txd; /* coordinate, dPdx, dPdy */ + num_srcs = 3; + break; + + case ir_txf: + op = nir_texop_txf; + if (ir->lod_info.lod != NULL) + num_srcs = 2; /* coordinate, lod */ + else + num_srcs = 1; /* coordinate */ + break; + + case ir_txf_ms: + op = nir_texop_txf_ms; + num_srcs = 2; /* coordinate, sample_index */ + break; + + case ir_txs: + op = nir_texop_txs; + if (ir->lod_info.lod != NULL) + num_srcs = 1; /* lod */ + else + num_srcs = 0; + break; + + case ir_lod: + op = nir_texop_lod; + num_srcs = 1; /* coordinate */ + break; + + case ir_tg4: + op = nir_texop_tg4; + num_srcs = 1; /* coordinate */ + break; + + case ir_query_levels: + op = nir_texop_query_levels; + num_srcs = 0; + break; + + case ir_texture_samples: + op = nir_texop_texture_samples; + num_srcs = 0; + break; + + case ir_samples_identical: + op = nir_texop_samples_identical; + num_srcs = 1; /* coordinate */ + break; + + default: + unreachable("not reached"); + } + + if (ir->projector != NULL) + num_srcs++; + if (ir->shadow_comparitor != NULL) + num_srcs++; + if (ir->offset != NULL && ir->offset->as_constant() == NULL) + num_srcs++; + + nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); + + instr->op = op; + instr->sampler_dim = + (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality; + instr->is_array = ir->sampler->type->sampler_array; + instr->is_shadow = ir->sampler->type->sampler_shadow; + if (instr->is_shadow) + instr->is_new_style_shadow = (ir->type->vector_elements == 1); + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + instr->dest_type = nir_type_float; + break; + case GLSL_TYPE_INT: + instr->dest_type = nir_type_int; + break; + case GLSL_TYPE_BOOL: + case GLSL_TYPE_UINT: + instr->dest_type = nir_type_uint; + break; + default: + unreachable("not reached"); + } + + instr->sampler = evaluate_deref(&instr->instr, ir->sampler); + + unsigned src_number = 0; + + if (ir->coordinate != NULL) { + instr->coord_components = ir->coordinate->type->vector_elements; + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->coordinate)); + instr->src[src_number].src_type = nir_tex_src_coord; + src_number++; + } + + if (ir->projector != NULL) { + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->projector)); + instr->src[src_number].src_type = nir_tex_src_projector; + src_number++; + } + + if (ir->shadow_comparitor != NULL) { + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparitor)); + instr->src[src_number].src_type = nir_tex_src_comparitor; + src_number++; + } + + if (ir->offset != NULL) { + /* we don't support multiple offsets yet */ + assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); + + ir_constant *const_offset = ir->offset->as_constant(); + if (const_offset != NULL) { + for (unsigned i = 0; i < const_offset->type->vector_elements; i++) + instr->const_offset[i] = const_offset->value.i[i]; + } else { + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->offset)); + instr->src[src_number].src_type = nir_tex_src_offset; + src_number++; + } + } + + switch (ir->op) { + case ir_txb: + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias)); + instr->src[src_number].src_type = nir_tex_src_bias; + src_number++; + break; + + case ir_txl: + case ir_txf: + case ir_txs: + if (ir->lod_info.lod != NULL) { + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod)); + instr->src[src_number].src_type = nir_tex_src_lod; + src_number++; + } + break; + + case ir_txd: + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx)); + instr->src[src_number].src_type = nir_tex_src_ddx; + src_number++; + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy)); + instr->src[src_number].src_type = nir_tex_src_ddy; + src_number++; + break; + + case ir_txf_ms: + instr->src[src_number].src = + nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index)); + instr->src[src_number].src_type = nir_tex_src_ms_index; + src_number++; + break; + + case ir_tg4: + instr->component = ir->lod_info.component->as_constant()->value.u[0]; + break; + + default: + break; + } + + assert(src_number == num_srcs); + + add_instr(&instr->instr, nir_tex_instr_dest_size(instr)); +} + +void +nir_visitor::visit(ir_constant *ir) +{ + /* + * We don't know if this variable is an an array or struct that gets + * dereferenced, so do the safe thing an make it a variable with a + * constant initializer and return a dereference. + */ + + nir_variable *var = + nir_local_variable_create(this->impl, ir->type, "const_temp"); + var->data.read_only = true; + var->constant_initializer = constant_copy(ir, var); + + this->deref_head = nir_deref_var_create(this->shader, var); + this->deref_tail = &this->deref_head->deref; +} + +void +nir_visitor::visit(ir_dereference_variable *ir) +{ + struct hash_entry *entry = + _mesa_hash_table_search(this->var_table, ir->var); + assert(entry); + nir_variable *var = (nir_variable *) entry->data; + + nir_deref_var *deref = nir_deref_var_create(this->shader, var); + this->deref_head = deref; + this->deref_tail = &deref->deref; +} + +void +nir_visitor::visit(ir_dereference_record *ir) +{ + ir->record->accept(this); + + int field_index = this->deref_tail->type->field_index(ir->field); + assert(field_index >= 0); + + nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index); + deref->deref.type = ir->type; + this->deref_tail->child = &deref->deref; + this->deref_tail = &deref->deref; +} + +void +nir_visitor::visit(ir_dereference_array *ir) +{ + nir_deref_array *deref = nir_deref_array_create(this->shader); + deref->deref.type = ir->type; + + ir_constant *const_index = ir->array_index->as_constant(); + if (const_index != NULL) { + deref->deref_array_type = nir_deref_array_type_direct; + deref->base_offset = const_index->value.u[0]; + } else { + deref->deref_array_type = nir_deref_array_type_indirect; + deref->indirect = + nir_src_for_ssa(evaluate_rvalue(ir->array_index)); + } + + ir->array->accept(this); + + this->deref_tail->child = &deref->deref; + ralloc_steal(this->deref_tail, deref); + this->deref_tail = &deref->deref; +} + +void +nir_visitor::visit(ir_barrier *ir) +{ + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier); + nir_builder_instr_insert(&b, &instr->instr); +} diff --git a/src/compiler/nir/glsl_to_nir.h b/src/compiler/nir/glsl_to_nir.h new file mode 100644 index 00000000000..20d2a380a26 --- /dev/null +++ b/src/compiler/nir/glsl_to_nir.h @@ -0,0 +1,41 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include "glsl/glsl_parser_extras.h" + +#ifdef __cplusplus +extern "C" { +#endif + +nir_shader *glsl_to_nir(const struct gl_shader_program *shader_prog, + gl_shader_stage stage, + const nir_shader_compiler_options *options); + +#ifdef __cplusplus +} +#endif diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c new file mode 100644 index 00000000000..21bf678c04e --- /dev/null +++ b/src/compiler/nir/nir.c @@ -0,0 +1,1665 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include "nir_control_flow_private.h" +#include + +nir_shader * +nir_shader_create(void *mem_ctx, + gl_shader_stage stage, + const nir_shader_compiler_options *options) +{ + nir_shader *shader = ralloc(mem_ctx, nir_shader); + + exec_list_make_empty(&shader->uniforms); + exec_list_make_empty(&shader->inputs); + exec_list_make_empty(&shader->outputs); + + shader->options = options; + memset(&shader->info, 0, sizeof(shader->info)); + + exec_list_make_empty(&shader->functions); + exec_list_make_empty(&shader->registers); + exec_list_make_empty(&shader->globals); + exec_list_make_empty(&shader->system_values); + shader->reg_alloc = 0; + + shader->num_inputs = 0; + shader->num_outputs = 0; + shader->num_uniforms = 0; + + shader->stage = stage; + + return shader; +} + +static nir_register * +reg_create(void *mem_ctx, struct exec_list *list) +{ + nir_register *reg = ralloc(mem_ctx, nir_register); + + list_inithead(®->uses); + list_inithead(®->defs); + list_inithead(®->if_uses); + + reg->num_components = 0; + reg->num_array_elems = 0; + reg->is_packed = false; + reg->name = NULL; + + exec_list_push_tail(list, ®->node); + + return reg; +} + +nir_register * +nir_global_reg_create(nir_shader *shader) +{ + nir_register *reg = reg_create(shader, &shader->registers); + reg->index = shader->reg_alloc++; + reg->is_global = true; + + return reg; +} + +nir_register * +nir_local_reg_create(nir_function_impl *impl) +{ + nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers); + reg->index = impl->reg_alloc++; + reg->is_global = false; + + return reg; +} + +void +nir_reg_remove(nir_register *reg) +{ + exec_node_remove(®->node); +} + +void +nir_shader_add_variable(nir_shader *shader, nir_variable *var) +{ + switch (var->data.mode) { + case nir_var_all: + assert(!"invalid mode"); + break; + + case nir_var_local: + assert(!"nir_shader_add_variable cannot be used for local variables"); + break; + + case nir_var_global: + exec_list_push_tail(&shader->globals, &var->node); + break; + + case nir_var_shader_in: + exec_list_push_tail(&shader->inputs, &var->node); + break; + + case nir_var_shader_out: + exec_list_push_tail(&shader->outputs, &var->node); + break; + + case nir_var_uniform: + case nir_var_shader_storage: + exec_list_push_tail(&shader->uniforms, &var->node); + break; + + case nir_var_system_value: + exec_list_push_tail(&shader->system_values, &var->node); + break; + } +} + +nir_variable * +nir_variable_create(nir_shader *shader, nir_variable_mode mode, + const struct glsl_type *type, const char *name) +{ + nir_variable *var = rzalloc(shader, nir_variable); + var->name = ralloc_strdup(var, name); + var->type = type; + var->data.mode = mode; + + if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) || + (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT)) + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + + if (mode == nir_var_shader_in || mode == nir_var_uniform) + var->data.read_only = true; + + nir_shader_add_variable(shader, var); + + return var; +} + +nir_variable * +nir_local_variable_create(nir_function_impl *impl, + const struct glsl_type *type, const char *name) +{ + nir_variable *var = rzalloc(impl->function->shader, nir_variable); + var->name = ralloc_strdup(var, name); + var->type = type; + var->data.mode = nir_var_local; + + nir_function_impl_add_variable(impl, var); + + return var; +} + +nir_function * +nir_function_create(nir_shader *shader, const char *name) +{ + nir_function *func = ralloc(shader, nir_function); + + exec_list_push_tail(&shader->functions, &func->node); + + func->name = ralloc_strdup(func, name); + func->shader = shader; + func->num_params = 0; + func->params = NULL; + func->return_type = glsl_void_type(); + func->impl = NULL; + + return func; +} + +void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx) +{ + dest->is_ssa = src->is_ssa; + if (src->is_ssa) { + dest->ssa = src->ssa; + } else { + dest->reg.base_offset = src->reg.base_offset; + dest->reg.reg = src->reg.reg; + if (src->reg.indirect) { + dest->reg.indirect = ralloc(mem_ctx, nir_src); + nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx); + } else { + dest->reg.indirect = NULL; + } + } +} + +void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr) +{ + /* Copying an SSA definition makes no sense whatsoever. */ + assert(!src->is_ssa); + + dest->is_ssa = false; + + dest->reg.base_offset = src->reg.base_offset; + dest->reg.reg = src->reg.reg; + if (src->reg.indirect) { + dest->reg.indirect = ralloc(instr, nir_src); + nir_src_copy(dest->reg.indirect, src->reg.indirect, instr); + } else { + dest->reg.indirect = NULL; + } +} + +void +nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, + nir_alu_instr *instr) +{ + nir_src_copy(&dest->src, &src->src, &instr->instr); + dest->abs = src->abs; + dest->negate = src->negate; + for (unsigned i = 0; i < 4; i++) + dest->swizzle[i] = src->swizzle[i]; +} + +void +nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, + nir_alu_instr *instr) +{ + nir_dest_copy(&dest->dest, &src->dest, &instr->instr); + dest->write_mask = src->write_mask; + dest->saturate = src->saturate; +} + + +static void +cf_init(nir_cf_node *node, nir_cf_node_type type) +{ + exec_node_init(&node->node); + node->parent = NULL; + node->type = type; +} + +nir_function_impl * +nir_function_impl_create(nir_function *function) +{ + assert(function->impl == NULL); + + void *mem_ctx = ralloc_parent(function); + + nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl); + + function->impl = impl; + impl->function = function; + + cf_init(&impl->cf_node, nir_cf_node_function); + + exec_list_make_empty(&impl->body); + exec_list_make_empty(&impl->registers); + exec_list_make_empty(&impl->locals); + impl->num_params = 0; + impl->params = NULL; + impl->return_var = NULL; + impl->reg_alloc = 0; + impl->ssa_alloc = 0; + impl->valid_metadata = nir_metadata_none; + + /* create start & end blocks */ + nir_block *start_block = nir_block_create(mem_ctx); + nir_block *end_block = nir_block_create(mem_ctx); + start_block->cf_node.parent = &impl->cf_node; + end_block->cf_node.parent = &impl->cf_node; + impl->end_block = end_block; + + exec_list_push_tail(&impl->body, &start_block->cf_node.node); + + start_block->successors[0] = end_block; + _mesa_set_add(end_block->predecessors, start_block); + return impl; +} + +nir_block * +nir_block_create(nir_shader *shader) +{ + nir_block *block = ralloc(shader, nir_block); + + cf_init(&block->cf_node, nir_cf_node_block); + + block->successors[0] = block->successors[1] = NULL; + block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, + _mesa_key_pointer_equal); + block->imm_dom = NULL; + /* XXX maybe it would be worth it to defer allocation? This + * way it doesn't get allocated for shader ref's that never run + * nir_calc_dominance? For example, state-tracker creates an + * initial IR, clones that, runs appropriate lowering pass, passes + * to driver which does common lowering/opt, and then stores ref + * which is later used to do state specific lowering and futher + * opt. Do any of the references not need dominance metadata? + */ + block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + exec_list_make_empty(&block->instr_list); + + return block; +} + +static inline void +src_init(nir_src *src) +{ + src->is_ssa = false; + src->reg.reg = NULL; + src->reg.indirect = NULL; + src->reg.base_offset = 0; +} + +nir_if * +nir_if_create(nir_shader *shader) +{ + nir_if *if_stmt = ralloc(shader, nir_if); + + cf_init(&if_stmt->cf_node, nir_cf_node_if); + src_init(&if_stmt->condition); + + nir_block *then = nir_block_create(shader); + exec_list_make_empty(&if_stmt->then_list); + exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node); + then->cf_node.parent = &if_stmt->cf_node; + + nir_block *else_stmt = nir_block_create(shader); + exec_list_make_empty(&if_stmt->else_list); + exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node); + else_stmt->cf_node.parent = &if_stmt->cf_node; + + return if_stmt; +} + +nir_loop * +nir_loop_create(nir_shader *shader) +{ + nir_loop *loop = ralloc(shader, nir_loop); + + cf_init(&loop->cf_node, nir_cf_node_loop); + + nir_block *body = nir_block_create(shader); + exec_list_make_empty(&loop->body); + exec_list_push_tail(&loop->body, &body->cf_node.node); + body->cf_node.parent = &loop->cf_node; + + body->successors[0] = body; + _mesa_set_add(body->predecessors, body); + + return loop; +} + +static void +instr_init(nir_instr *instr, nir_instr_type type) +{ + instr->type = type; + instr->block = NULL; + exec_node_init(&instr->node); +} + +static void +dest_init(nir_dest *dest) +{ + dest->is_ssa = false; + dest->reg.reg = NULL; + dest->reg.indirect = NULL; + dest->reg.base_offset = 0; +} + +static void +alu_dest_init(nir_alu_dest *dest) +{ + dest_init(&dest->dest); + dest->saturate = false; + dest->write_mask = 0xf; +} + +static void +alu_src_init(nir_alu_src *src) +{ + src_init(&src->src); + src->abs = src->negate = false; + src->swizzle[0] = 0; + src->swizzle[1] = 1; + src->swizzle[2] = 2; + src->swizzle[3] = 3; +} + +nir_alu_instr * +nir_alu_instr_create(nir_shader *shader, nir_op op) +{ + unsigned num_srcs = nir_op_infos[op].num_inputs; + nir_alu_instr *instr = + ralloc_size(shader, + sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); + + instr_init(&instr->instr, nir_instr_type_alu); + instr->op = op; + alu_dest_init(&instr->dest); + for (unsigned i = 0; i < num_srcs; i++) + alu_src_init(&instr->src[i]); + + return instr; +} + +nir_jump_instr * +nir_jump_instr_create(nir_shader *shader, nir_jump_type type) +{ + nir_jump_instr *instr = ralloc(shader, nir_jump_instr); + instr_init(&instr->instr, nir_instr_type_jump); + instr->type = type; + return instr; +} + +nir_load_const_instr * +nir_load_const_instr_create(nir_shader *shader, unsigned num_components) +{ + nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr); + instr_init(&instr->instr, nir_instr_type_load_const); + + nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); + + return instr; +} + +nir_intrinsic_instr * +nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) +{ + unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; + nir_intrinsic_instr *instr = + ralloc_size(shader, + sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); + + instr_init(&instr->instr, nir_instr_type_intrinsic); + instr->intrinsic = op; + + if (nir_intrinsic_infos[op].has_dest) + dest_init(&instr->dest); + + for (unsigned i = 0; i < num_srcs; i++) + src_init(&instr->src[i]); + + return instr; +} + +nir_call_instr * +nir_call_instr_create(nir_shader *shader, nir_function *callee) +{ + nir_call_instr *instr = ralloc(shader, nir_call_instr); + instr_init(&instr->instr, nir_instr_type_call); + + instr->callee = callee; + instr->num_params = callee->num_params; + instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params); + instr->return_deref = NULL; + + return instr; +} + +nir_tex_instr * +nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) +{ + nir_tex_instr *instr = rzalloc(shader, nir_tex_instr); + instr_init(&instr->instr, nir_instr_type_tex); + + dest_init(&instr->dest); + + instr->num_srcs = num_srcs; + instr->src = ralloc_array(instr, nir_tex_src, num_srcs); + for (unsigned i = 0; i < num_srcs; i++) + src_init(&instr->src[i].src); + + instr->sampler_index = 0; + instr->sampler_array_size = 0; + instr->sampler = NULL; + + return instr; +} + +nir_phi_instr * +nir_phi_instr_create(nir_shader *shader) +{ + nir_phi_instr *instr = ralloc(shader, nir_phi_instr); + instr_init(&instr->instr, nir_instr_type_phi); + + dest_init(&instr->dest); + exec_list_make_empty(&instr->srcs); + return instr; +} + +nir_parallel_copy_instr * +nir_parallel_copy_instr_create(nir_shader *shader) +{ + nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); + instr_init(&instr->instr, nir_instr_type_parallel_copy); + + exec_list_make_empty(&instr->entries); + + return instr; +} + +nir_ssa_undef_instr * +nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components) +{ + nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); + instr_init(&instr->instr, nir_instr_type_ssa_undef); + + nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); + + return instr; +} + +nir_deref_var * +nir_deref_var_create(void *mem_ctx, nir_variable *var) +{ + nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var); + deref->deref.deref_type = nir_deref_type_var; + deref->deref.child = NULL; + deref->deref.type = var->type; + deref->var = var; + return deref; +} + +nir_deref_array * +nir_deref_array_create(void *mem_ctx) +{ + nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array); + deref->deref.deref_type = nir_deref_type_array; + deref->deref.child = NULL; + deref->deref_array_type = nir_deref_array_type_direct; + src_init(&deref->indirect); + deref->base_offset = 0; + return deref; +} + +nir_deref_struct * +nir_deref_struct_create(void *mem_ctx, unsigned field_index) +{ + nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct); + deref->deref.deref_type = nir_deref_type_struct; + deref->deref.child = NULL; + deref->index = field_index; + return deref; +} + +static nir_deref_var * +copy_deref_var(void *mem_ctx, nir_deref_var *deref) +{ + nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); + ret->deref.type = deref->deref.type; + if (deref->deref.child) + ret->deref.child = nir_copy_deref(ret, deref->deref.child); + return ret; +} + +static nir_deref_array * +copy_deref_array(void *mem_ctx, nir_deref_array *deref) +{ + nir_deref_array *ret = nir_deref_array_create(mem_ctx); + ret->base_offset = deref->base_offset; + ret->deref_array_type = deref->deref_array_type; + if (deref->deref_array_type == nir_deref_array_type_indirect) { + nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx); + } + ret->deref.type = deref->deref.type; + if (deref->deref.child) + ret->deref.child = nir_copy_deref(ret, deref->deref.child); + return ret; +} + +static nir_deref_struct * +copy_deref_struct(void *mem_ctx, nir_deref_struct *deref) +{ + nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index); + ret->deref.type = deref->deref.type; + if (deref->deref.child) + ret->deref.child = nir_copy_deref(ret, deref->deref.child); + return ret; +} + +nir_deref * +nir_copy_deref(void *mem_ctx, nir_deref *deref) +{ + switch (deref->deref_type) { + case nir_deref_type_var: + return ©_deref_var(mem_ctx, nir_deref_as_var(deref))->deref; + case nir_deref_type_array: + return ©_deref_array(mem_ctx, nir_deref_as_array(deref))->deref; + case nir_deref_type_struct: + return ©_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref; + default: + unreachable("Invalid dereference type"); + } + + return NULL; +} + +/* Returns a load_const instruction that represents the constant + * initializer for the given deref chain. The caller is responsible for + * ensuring that there actually is a constant initializer. + */ +nir_load_const_instr * +nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref) +{ + nir_constant *constant = deref->var->constant_initializer; + assert(constant); + + const nir_deref *tail = &deref->deref; + unsigned matrix_offset = 0; + while (tail->child) { + switch (tail->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(tail->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (glsl_type_is_matrix(tail->type)) { + assert(arr->deref.child == NULL); + matrix_offset = arr->base_offset; + } else { + constant = constant->elements[arr->base_offset]; + } + break; + } + + case nir_deref_type_struct: { + constant = constant->elements[nir_deref_as_struct(tail->child)->index]; + break; + } + + default: + unreachable("Invalid deref child type"); + } + + tail = tail->child; + } + + nir_load_const_instr *load = + nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type)); + + matrix_offset *= load->def.num_components; + for (unsigned i = 0; i < load->def.num_components; i++) { + switch (glsl_get_base_type(tail->type)) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + load->value.u[i] = constant->value.u[matrix_offset + i]; + break; + case GLSL_TYPE_BOOL: + load->value.u[i] = constant->value.b[matrix_offset + i] ? + NIR_TRUE : NIR_FALSE; + break; + default: + unreachable("Invalid immediate type"); + } + } + + return load; +} + +nir_function_impl * +nir_cf_node_get_function(nir_cf_node *node) +{ + while (node->type != nir_cf_node_function) { + node = node->parent; + } + + return nir_cf_node_as_function(node); +} + +static bool +add_use_cb(nir_src *src, void *state) +{ + nir_instr *instr = state; + + src->parent_instr = instr; + list_addtail(&src->use_link, + src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses); + + return true; +} + +static bool +add_ssa_def_cb(nir_ssa_def *def, void *state) +{ + nir_instr *instr = state; + + if (instr->block && def->index == UINT_MAX) { + nir_function_impl *impl = + nir_cf_node_get_function(&instr->block->cf_node); + + def->index = impl->ssa_alloc++; + } + + return true; +} + +static bool +add_reg_def_cb(nir_dest *dest, void *state) +{ + nir_instr *instr = state; + + if (!dest->is_ssa) { + dest->reg.parent_instr = instr; + list_addtail(&dest->reg.def_link, &dest->reg.reg->defs); + } + + return true; +} + +static void +add_defs_uses(nir_instr *instr) +{ + nir_foreach_src(instr, add_use_cb, instr); + nir_foreach_dest(instr, add_reg_def_cb, instr); + nir_foreach_ssa_def(instr, add_ssa_def_cb, instr); +} + +void +nir_instr_insert(nir_cursor cursor, nir_instr *instr) +{ + switch (cursor.option) { + case nir_cursor_before_block: + /* Only allow inserting jumps into empty blocks. */ + if (instr->type == nir_instr_type_jump) + assert(exec_list_is_empty(&cursor.block->instr_list)); + + instr->block = cursor.block; + add_defs_uses(instr); + exec_list_push_head(&cursor.block->instr_list, &instr->node); + break; + case nir_cursor_after_block: { + /* Inserting instructions after a jump is illegal. */ + nir_instr *last = nir_block_last_instr(cursor.block); + assert(last == NULL || last->type != nir_instr_type_jump); + (void) last; + + instr->block = cursor.block; + add_defs_uses(instr); + exec_list_push_tail(&cursor.block->instr_list, &instr->node); + break; + } + case nir_cursor_before_instr: + assert(instr->type != nir_instr_type_jump); + instr->block = cursor.instr->block; + add_defs_uses(instr); + exec_node_insert_node_before(&cursor.instr->node, &instr->node); + break; + case nir_cursor_after_instr: + /* Inserting instructions after a jump is illegal. */ + assert(cursor.instr->type != nir_instr_type_jump); + + /* Only allow inserting jumps at the end of the block. */ + if (instr->type == nir_instr_type_jump) + assert(cursor.instr == nir_block_last_instr(cursor.instr->block)); + + instr->block = cursor.instr->block; + add_defs_uses(instr); + exec_node_insert_after(&cursor.instr->node, &instr->node); + break; + } + + if (instr->type == nir_instr_type_jump) + nir_handle_add_jump(instr->block); +} + +static bool +src_is_valid(const nir_src *src) +{ + return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL); +} + +static bool +remove_use_cb(nir_src *src, void *state) +{ + if (src_is_valid(src)) + list_del(&src->use_link); + + return true; +} + +static bool +remove_def_cb(nir_dest *dest, void *state) +{ + if (!dest->is_ssa) + list_del(&dest->reg.def_link); + + return true; +} + +static void +remove_defs_uses(nir_instr *instr) +{ + nir_foreach_dest(instr, remove_def_cb, instr); + nir_foreach_src(instr, remove_use_cb, instr); +} + +void nir_instr_remove(nir_instr *instr) +{ + remove_defs_uses(instr); + exec_node_remove(&instr->node); + + if (instr->type == nir_instr_type_jump) { + nir_jump_instr *jump_instr = nir_instr_as_jump(instr); + nir_handle_remove_jump(instr->block, jump_instr->type); + } +} + +/*@}*/ + +void +nir_index_local_regs(nir_function_impl *impl) +{ + unsigned index = 0; + foreach_list_typed(nir_register, reg, node, &impl->registers) { + reg->index = index++; + } + impl->reg_alloc = index; +} + +void +nir_index_global_regs(nir_shader *shader) +{ + unsigned index = 0; + foreach_list_typed(nir_register, reg, node, &shader->registers) { + reg->index = index++; + } + shader->reg_alloc = index; +} + +static bool +visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) +{ + return cb(&instr->dest.dest, state); +} + +static bool +visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb, + void *state) +{ + if (nir_intrinsic_infos[instr->intrinsic].has_dest) + return cb(&instr->dest, state); + + return true; +} + +static bool +visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb, + void *state) +{ + return cb(&instr->dest, state); +} + +static bool +visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state) +{ + return cb(&instr->dest, state); +} + +static bool +visit_parallel_copy_dest(nir_parallel_copy_instr *instr, + nir_foreach_dest_cb cb, void *state) +{ + nir_foreach_parallel_copy_entry(instr, entry) { + if (!cb(&entry->dest, state)) + return false; + } + + return true; +} + +bool +nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) +{ + switch (instr->type) { + case nir_instr_type_alu: + return visit_alu_dest(nir_instr_as_alu(instr), cb, state); + case nir_instr_type_intrinsic: + return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); + case nir_instr_type_tex: + return visit_texture_dest(nir_instr_as_tex(instr), cb, state); + case nir_instr_type_phi: + return visit_phi_dest(nir_instr_as_phi(instr), cb, state); + case nir_instr_type_parallel_copy: + return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr), + cb, state); + + case nir_instr_type_load_const: + case nir_instr_type_ssa_undef: + case nir_instr_type_call: + case nir_instr_type_jump: + break; + + default: + unreachable("Invalid instruction type"); + break; + } + + return true; +} + +struct foreach_ssa_def_state { + nir_foreach_ssa_def_cb cb; + void *client_state; +}; + +static inline bool +nir_ssa_def_visitor(nir_dest *dest, void *void_state) +{ + struct foreach_ssa_def_state *state = void_state; + + if (dest->is_ssa) + return state->cb(&dest->ssa, state->client_state); + else + return true; +} + +bool +nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) +{ + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_tex: + case nir_instr_type_intrinsic: + case nir_instr_type_phi: + case nir_instr_type_parallel_copy: { + struct foreach_ssa_def_state foreach_state = {cb, state}; + return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state); + } + + case nir_instr_type_load_const: + return cb(&nir_instr_as_load_const(instr)->def, state); + case nir_instr_type_ssa_undef: + return cb(&nir_instr_as_ssa_undef(instr)->def, state); + case nir_instr_type_call: + case nir_instr_type_jump: + return true; + default: + unreachable("Invalid instruction type"); + } +} + +static bool +visit_src(nir_src *src, nir_foreach_src_cb cb, void *state) +{ + if (!cb(src, state)) + return false; + if (!src->is_ssa && src->reg.indirect) + return cb(src->reg.indirect, state); + return true; +} + +static bool +visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb, + void *state) +{ + if (deref->deref_array_type == nir_deref_array_type_indirect) + return visit_src(&deref->indirect, cb, state); + return true; +} + +static bool +visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state) +{ + nir_deref *cur = &deref->deref; + while (cur != NULL) { + if (cur->deref_type == nir_deref_type_array) + if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state)) + return false; + + cur = cur->child; + } + + return true; +} + +static bool +visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) +{ + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) + if (!visit_src(&instr->src[i].src, cb, state)) + return false; + + return true; +} + +static bool +visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) +{ + for (unsigned i = 0; i < instr->num_srcs; i++) + if (!visit_src(&instr->src[i].src, cb, state)) + return false; + + if (instr->sampler != NULL) + if (!visit_deref_src(instr->sampler, cb, state)) + return false; + + return true; +} + +static bool +visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb, + void *state) +{ + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + for (unsigned i = 0; i < num_srcs; i++) + if (!visit_src(&instr->src[i], cb, state)) + return false; + + unsigned num_vars = + nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) + if (!visit_deref_src(instr->variables[i], cb, state)) + return false; + + return true; +} + +static bool +visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state) +{ + return true; +} + +static bool +visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb, + void *state) +{ + return true; +} + +static bool +visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state) +{ + nir_foreach_phi_src(instr, src) { + if (!visit_src(&src->src, cb, state)) + return false; + } + + return true; +} + +static bool +visit_parallel_copy_src(nir_parallel_copy_instr *instr, + nir_foreach_src_cb cb, void *state) +{ + nir_foreach_parallel_copy_entry(instr, entry) { + if (!visit_src(&entry->src, cb, state)) + return false; + } + + return true; +} + +typedef struct { + void *state; + nir_foreach_src_cb cb; +} visit_dest_indirect_state; + +static bool +visit_dest_indirect(nir_dest *dest, void *_state) +{ + visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state; + + if (!dest->is_ssa && dest->reg.indirect) + return state->cb(dest->reg.indirect, state->state); + + return true; +} + +bool +nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) +{ + switch (instr->type) { + case nir_instr_type_alu: + if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) + return false; + break; + case nir_instr_type_intrinsic: + if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) + return false; + break; + case nir_instr_type_tex: + if (!visit_tex_src(nir_instr_as_tex(instr), cb, state)) + return false; + break; + case nir_instr_type_call: + if (!visit_call_src(nir_instr_as_call(instr), cb, state)) + return false; + break; + case nir_instr_type_load_const: + if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state)) + return false; + break; + case nir_instr_type_phi: + if (!visit_phi_src(nir_instr_as_phi(instr), cb, state)) + return false; + break; + case nir_instr_type_parallel_copy: + if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr), + cb, state)) + return false; + break; + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + return true; + + default: + unreachable("Invalid instruction type"); + break; + } + + visit_dest_indirect_state dest_state; + dest_state.state = state; + dest_state.cb = cb; + return nir_foreach_dest(instr, visit_dest_indirect, &dest_state); +} + +nir_const_value * +nir_src_as_const_value(nir_src src) +{ + if (!src.is_ssa) + return NULL; + + if (src.ssa->parent_instr->type != nir_instr_type_load_const) + return NULL; + + nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); + + return &load->value; +} + +/** + * Returns true if the source is known to be dynamically uniform. Otherwise it + * returns false which means it may or may not be dynamically uniform but it + * can't be determined. + */ +bool +nir_src_is_dynamically_uniform(nir_src src) +{ + if (!src.is_ssa) + return false; + + /* Constants are trivially dynamically uniform */ + if (src.ssa->parent_instr->type == nir_instr_type_load_const) + return true; + + /* As are uniform variables */ + if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr); + + if (intr->intrinsic == nir_intrinsic_load_uniform) + return true; + } + + /* XXX: this could have many more tests, such as when a sampler function is + * called with dynamically uniform arguments. + */ + return false; +} + +static void +src_remove_all_uses(nir_src *src) +{ + for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { + if (!src_is_valid(src)) + continue; + + list_del(&src->use_link); + } +} + +static void +src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) +{ + for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { + if (!src_is_valid(src)) + continue; + + if (parent_instr) { + src->parent_instr = parent_instr; + if (src->is_ssa) + list_addtail(&src->use_link, &src->ssa->uses); + else + list_addtail(&src->use_link, &src->reg.reg->uses); + } else { + assert(parent_if); + src->parent_if = parent_if; + if (src->is_ssa) + list_addtail(&src->use_link, &src->ssa->if_uses); + else + list_addtail(&src->use_link, &src->reg.reg->if_uses); + } + } +} + +void +nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) +{ + assert(!src_is_valid(src) || src->parent_instr == instr); + + src_remove_all_uses(src); + *src = new_src; + src_add_all_uses(src, instr, NULL); +} + +void +nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src) +{ + assert(!src_is_valid(dest) || dest->parent_instr == dest_instr); + + src_remove_all_uses(dest); + src_remove_all_uses(src); + *dest = *src; + *src = NIR_SRC_INIT; + src_add_all_uses(dest, dest_instr, NULL); +} + +void +nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src) +{ + nir_src *src = &if_stmt->condition; + assert(!src_is_valid(src) || src->parent_if == if_stmt); + + src_remove_all_uses(src); + *src = new_src; + src_add_all_uses(src, NULL, if_stmt); +} + +void +nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest) +{ + if (dest->is_ssa) { + /* We can only overwrite an SSA destination if it has no uses. */ + assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses)); + } else { + list_del(&dest->reg.def_link); + if (dest->reg.indirect) + src_remove_all_uses(dest->reg.indirect); + } + + /* We can't re-write with an SSA def */ + assert(!new_dest.is_ssa); + + nir_dest_copy(dest, &new_dest, instr); + + dest->reg.parent_instr = instr; + list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs); + + if (dest->reg.indirect) + src_add_all_uses(dest->reg.indirect, instr, NULL); +} + +void +nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, + unsigned num_components, const char *name) +{ + def->name = name; + def->parent_instr = instr; + list_inithead(&def->uses); + list_inithead(&def->if_uses); + def->num_components = num_components; + + if (instr->block) { + nir_function_impl *impl = + nir_cf_node_get_function(&instr->block->cf_node); + + def->index = impl->ssa_alloc++; + } else { + def->index = UINT_MAX; + } +} + +void +nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, + unsigned num_components, const char *name) +{ + dest->is_ssa = true; + nir_ssa_def_init(instr, &dest->ssa, num_components, name); +} + +void +nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src) +{ + assert(!new_src.is_ssa || def != new_src.ssa); + + nir_foreach_use_safe(def, use_src) + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); + + nir_foreach_if_use_safe(def, use_src) + nir_if_rewrite_condition(use_src->parent_if, new_src); +} + +static bool +is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between) +{ + assert(start->block == end->block); + + if (between->block != start->block) + return false; + + /* Search backwards looking for "between" */ + while (start != end) { + if (between == end) + return true; + + end = nir_instr_prev(end); + assert(end); + } + + return false; +} + +/* Replaces all uses of the given SSA def with the given source but only if + * the use comes after the after_me instruction. This can be useful if you + * are emitting code to fix up the result of some instruction: you can freely + * use the result in that code and then call rewrite_uses_after and pass the + * last fixup instruction as after_me and it will replace all of the uses you + * want without touching the fixup code. + * + * This function assumes that after_me is in the same block as + * def->parent_instr and that after_me comes after def->parent_instr. + */ +void +nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, + nir_instr *after_me) +{ + assert(!new_src.is_ssa || def != new_src.ssa); + + nir_foreach_use_safe(def, use_src) { + assert(use_src->parent_instr != def->parent_instr); + /* Since def already dominates all of its uses, the only way a use can + * not be dominated by after_me is if it is between def and after_me in + * the instruction list. + */ + if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr)) + nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); + } + + nir_foreach_if_use_safe(def, use_src) + nir_if_rewrite_condition(use_src->parent_if, new_src); +} + +static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + bool reverse, void *state); + +static inline bool +foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state) +{ + if (reverse) { + foreach_list_typed_reverse_safe(nir_cf_node, node, node, + &if_stmt->else_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + + foreach_list_typed_reverse_safe(nir_cf_node, node, node, + &if_stmt->then_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } else { + foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + + foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } + + return true; +} + +static inline bool +foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state) +{ + if (reverse) { + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } else { + foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) { + if (!foreach_cf_node(node, cb, reverse, state)) + return false; + } + } + + return true; +} + +static bool +foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + bool reverse, void *state) +{ + switch (node->type) { + case nir_cf_node_block: + return cb(nir_cf_node_as_block(node), state); + case nir_cf_node_if: + return foreach_if(nir_cf_node_as_if(node), cb, reverse, state); + case nir_cf_node_loop: + return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state); + break; + + default: + unreachable("Invalid CFG node type"); + break; + } + + return false; +} + +bool +nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + void *state) +{ + return foreach_cf_node(node, cb, false, state); +} + +bool +nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state) +{ + foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { + if (!foreach_cf_node(node, cb, false, state)) + return false; + } + + return cb(impl->end_block, state); +} + +bool +nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb, + void *state) +{ + if (!cb(impl->end_block, state)) + return false; + + foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) { + if (!foreach_cf_node(node, cb, true, state)) + return false; + } + + return true; +} + +nir_if * +nir_block_get_following_if(nir_block *block) +{ + if (exec_node_is_tail_sentinel(&block->cf_node.node)) + return NULL; + + if (nir_cf_node_is_last(&block->cf_node)) + return NULL; + + nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); + + if (next_node->type != nir_cf_node_if) + return NULL; + + return nir_cf_node_as_if(next_node); +} + +nir_loop * +nir_block_get_following_loop(nir_block *block) +{ + if (exec_node_is_tail_sentinel(&block->cf_node.node)) + return NULL; + + if (nir_cf_node_is_last(&block->cf_node)) + return NULL; + + nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); + + if (next_node->type != nir_cf_node_loop) + return NULL; + + return nir_cf_node_as_loop(next_node); +} +static bool +index_block(nir_block *block, void *state) +{ + unsigned *index = state; + block->index = (*index)++; + return true; +} + +void +nir_index_blocks(nir_function_impl *impl) +{ + unsigned index = 0; + + if (impl->valid_metadata & nir_metadata_block_index) + return; + + nir_foreach_block(impl, index_block, &index); + + impl->num_blocks = index; +} + +static bool +index_ssa_def_cb(nir_ssa_def *def, void *state) +{ + unsigned *index = (unsigned *) state; + def->index = (*index)++; + + return true; +} + +static bool +index_ssa_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, index_ssa_def_cb, state); + + return true; +} + +/** + * The indices are applied top-to-bottom which has the very nice property + * that, if A dominates B, then A->index <= B->index. + */ +void +nir_index_ssa_defs(nir_function_impl *impl) +{ + unsigned index = 0; + nir_foreach_block(impl, index_ssa_block, &index); + impl->ssa_alloc = index; +} + +static bool +index_instrs_block(nir_block *block, void *state) +{ + unsigned *index = state; + nir_foreach_instr(block, instr) + instr->index = (*index)++; + + return true; +} + +/** + * The indices are applied top-to-bottom which has the very nice property + * that, if A dominates B, then A->index <= B->index. + */ +unsigned +nir_index_instrs(nir_function_impl *impl) +{ + unsigned index = 0; + nir_foreach_block(impl, index_instrs_block, &index); + return index; +} + +nir_intrinsic_op +nir_intrinsic_from_system_value(gl_system_value val) +{ + switch (val) { + case SYSTEM_VALUE_VERTEX_ID: + return nir_intrinsic_load_vertex_id; + case SYSTEM_VALUE_INSTANCE_ID: + return nir_intrinsic_load_instance_id; + case SYSTEM_VALUE_DRAW_ID: + return nir_intrinsic_load_draw_id; + case SYSTEM_VALUE_BASE_INSTANCE: + return nir_intrinsic_load_base_instance; + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: + return nir_intrinsic_load_vertex_id_zero_base; + case SYSTEM_VALUE_BASE_VERTEX: + return nir_intrinsic_load_base_vertex; + case SYSTEM_VALUE_INVOCATION_ID: + return nir_intrinsic_load_invocation_id; + case SYSTEM_VALUE_FRONT_FACE: + return nir_intrinsic_load_front_face; + case SYSTEM_VALUE_SAMPLE_ID: + return nir_intrinsic_load_sample_id; + case SYSTEM_VALUE_SAMPLE_POS: + return nir_intrinsic_load_sample_pos; + case SYSTEM_VALUE_SAMPLE_MASK_IN: + return nir_intrinsic_load_sample_mask_in; + case SYSTEM_VALUE_LOCAL_INVOCATION_ID: + return nir_intrinsic_load_local_invocation_id; + case SYSTEM_VALUE_WORK_GROUP_ID: + return nir_intrinsic_load_work_group_id; + case SYSTEM_VALUE_NUM_WORK_GROUPS: + return nir_intrinsic_load_num_work_groups; + case SYSTEM_VALUE_PRIMITIVE_ID: + return nir_intrinsic_load_primitive_id; + case SYSTEM_VALUE_TESS_COORD: + return nir_intrinsic_load_tess_coord; + case SYSTEM_VALUE_TESS_LEVEL_OUTER: + return nir_intrinsic_load_tess_level_outer; + case SYSTEM_VALUE_TESS_LEVEL_INNER: + return nir_intrinsic_load_tess_level_inner; + case SYSTEM_VALUE_VERTICES_IN: + return nir_intrinsic_load_patch_vertices_in; + case SYSTEM_VALUE_HELPER_INVOCATION: + return nir_intrinsic_load_helper_invocation; + default: + unreachable("system value does not directly correspond to intrinsic"); + } +} + +gl_system_value +nir_system_value_from_intrinsic(nir_intrinsic_op intrin) +{ + switch (intrin) { + case nir_intrinsic_load_vertex_id: + return SYSTEM_VALUE_VERTEX_ID; + case nir_intrinsic_load_instance_id: + return SYSTEM_VALUE_INSTANCE_ID; + case nir_intrinsic_load_draw_id: + return SYSTEM_VALUE_DRAW_ID; + case nir_intrinsic_load_base_instance: + return SYSTEM_VALUE_BASE_INSTANCE; + case nir_intrinsic_load_vertex_id_zero_base: + return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + case nir_intrinsic_load_base_vertex: + return SYSTEM_VALUE_BASE_VERTEX; + case nir_intrinsic_load_invocation_id: + return SYSTEM_VALUE_INVOCATION_ID; + case nir_intrinsic_load_front_face: + return SYSTEM_VALUE_FRONT_FACE; + case nir_intrinsic_load_sample_id: + return SYSTEM_VALUE_SAMPLE_ID; + case nir_intrinsic_load_sample_pos: + return SYSTEM_VALUE_SAMPLE_POS; + case nir_intrinsic_load_sample_mask_in: + return SYSTEM_VALUE_SAMPLE_MASK_IN; + case nir_intrinsic_load_local_invocation_id: + return SYSTEM_VALUE_LOCAL_INVOCATION_ID; + case nir_intrinsic_load_num_work_groups: + return SYSTEM_VALUE_NUM_WORK_GROUPS; + case nir_intrinsic_load_work_group_id: + return SYSTEM_VALUE_WORK_GROUP_ID; + case nir_intrinsic_load_primitive_id: + return SYSTEM_VALUE_PRIMITIVE_ID; + case nir_intrinsic_load_tess_coord: + return SYSTEM_VALUE_TESS_COORD; + case nir_intrinsic_load_tess_level_outer: + return SYSTEM_VALUE_TESS_LEVEL_OUTER; + case nir_intrinsic_load_tess_level_inner: + return SYSTEM_VALUE_TESS_LEVEL_INNER; + case nir_intrinsic_load_patch_vertices_in: + return SYSTEM_VALUE_VERTICES_IN; + case nir_intrinsic_load_helper_invocation: + return SYSTEM_VALUE_HELPER_INVOCATION; + default: + unreachable("intrinsic doesn't produce a system value"); + } +} diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h new file mode 100644 index 00000000000..54e23eb4754 --- /dev/null +++ b/src/compiler/nir/nir.h @@ -0,0 +1,2111 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#pragma once + +#include "util/hash_table.h" +#include "glsl/list.h" +#include "GL/gl.h" /* GLenum */ +#include "util/list.h" +#include "util/ralloc.h" +#include "util/set.h" +#include "util/bitset.h" +#include "compiler/nir_types.h" +#include "compiler/shader_enums.h" +#include + +#include "nir_opcodes.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct gl_program; +struct gl_shader_program; + +#define NIR_FALSE 0u +#define NIR_TRUE (~0u) + +/** Defines a cast function + * + * This macro defines a cast function from in_type to out_type where + * out_type is some structure type that contains a field of type out_type. + * + * Note that you have to be a bit careful as the generated cast function + * destroys constness. + */ +#define NIR_DEFINE_CAST(name, in_type, out_type, field) \ +static inline out_type * \ +name(const in_type *parent) \ +{ \ + return exec_node_data(out_type, parent, field); \ +} + +struct nir_function; +struct nir_shader; +struct nir_instr; + + +/** + * Description of built-in state associated with a uniform + * + * \sa nir_variable::state_slots + */ +typedef struct { + int tokens[5]; + int swizzle; +} nir_state_slot; + +typedef enum { + nir_var_all = -1, + nir_var_shader_in, + nir_var_shader_out, + nir_var_global, + nir_var_local, + nir_var_uniform, + nir_var_shader_storage, + nir_var_system_value +} nir_variable_mode; + +/** + * Data stored in an nir_constant + */ +union nir_constant_data { + unsigned u[16]; + int i[16]; + float f[16]; + bool b[16]; +}; + +typedef struct nir_constant { + /** + * Value of the constant. + * + * The field used to back the values supplied by the constant is determined + * by the type associated with the \c nir_variable. Constants may be + * scalars, vectors, or matrices. + */ + union nir_constant_data value; + + /* we could get this from the var->type but makes clone *much* easier to + * not have to care about the type. + */ + unsigned num_elements; + + /* Array elements / Structure Fields */ + struct nir_constant **elements; +} nir_constant; + +/** + * \brief Layout qualifiers for gl_FragDepth. + * + * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared + * with a layout qualifier. + */ +typedef enum { + nir_depth_layout_none, /**< No depth layout is specified. */ + nir_depth_layout_any, + nir_depth_layout_greater, + nir_depth_layout_less, + nir_depth_layout_unchanged +} nir_depth_layout; + +/** + * Either a uniform, global variable, shader input, or shader output. Based on + * ir_variable - it should be easy to translate between the two. + */ + +typedef struct nir_variable { + struct exec_node node; + + /** + * Declared type of the variable + */ + const struct glsl_type *type; + + /** + * Declared name of the variable + */ + char *name; + + struct nir_variable_data { + + /** + * Is the variable read-only? + * + * This is set for variables declared as \c const, shader inputs, + * and uniforms. + */ + unsigned read_only:1; + unsigned centroid:1; + unsigned sample:1; + unsigned patch:1; + unsigned invariant:1; + + /** + * Storage class of the variable. + * + * \sa nir_variable_mode + */ + nir_variable_mode mode:4; + + /** + * Interpolation mode for shader inputs / outputs + * + * \sa glsl_interp_qualifier + */ + unsigned interpolation:2; + + /** + * \name ARB_fragment_coord_conventions + * @{ + */ + unsigned origin_upper_left:1; + unsigned pixel_center_integer:1; + /*@}*/ + + /** + * Was the location explicitly set in the shader? + * + * If the location is explicitly set in the shader, it \b cannot be changed + * by the linker or by the API (e.g., calls to \c glBindAttribLocation have + * no effect). + */ + unsigned explicit_location:1; + unsigned explicit_index:1; + + /** + * Was an initial binding explicitly set in the shader? + * + * If so, constant_initializer contains an integer nir_constant + * representing the initial binding point. + */ + unsigned explicit_binding:1; + + /** + * Does this variable have an initializer? + * + * This is used by the linker to cross-validiate initializers of global + * variables. + */ + unsigned has_initializer:1; + + /** + * If non-zero, then this variable may be packed along with other variables + * into a single varying slot, so this offset should be applied when + * accessing components. For example, an offset of 1 means that the x + * component of this variable is actually stored in component y of the + * location specified by \c location. + */ + unsigned location_frac:2; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was not an array. + * + * Note that this variable and \c from_named_ifc_block_array will never + * both be non-zero. + */ + unsigned from_named_ifc_block_nonarray:1; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was an array. + * + * Note that this variable and \c from_named_ifc_block_nonarray will never + * both be non-zero. + */ + unsigned from_named_ifc_block_array:1; + + /** + * \brief Layout qualifier for gl_FragDepth. + * + * This is not equal to \c ir_depth_layout_none if and only if this + * variable is \c gl_FragDepth and a layout qualifier is specified. + */ + nir_depth_layout depth_layout; + + /** + * Storage location of the base of this variable + * + * The precise meaning of this field depends on the nature of the variable. + * + * - Vertex shader input: one of the values from \c gl_vert_attrib. + * - Vertex shader output: one of the values from \c gl_varying_slot. + * - Geometry shader input: one of the values from \c gl_varying_slot. + * - Geometry shader output: one of the values from \c gl_varying_slot. + * - Fragment shader input: one of the values from \c gl_varying_slot. + * - Fragment shader output: one of the values from \c gl_frag_result. + * - Uniforms: Per-stage uniform slot number for default uniform block. + * - Uniforms: Index within the uniform block definition for UBO members. + * - Non-UBO Uniforms: uniform slot number. + * - Other: This field is not currently used. + * + * If the variable is a uniform, shader input, or shader output, and the + * slot has not been assigned, the value will be -1. + */ + int location; + + /** + * The actual location of the variable in the IR. Only valid for inputs + * and outputs. + */ + unsigned int driver_location; + + /** + * output index for dual source blending. + */ + int index; + + /** + * Initial binding point for a sampler or UBO. + * + * For array types, this represents the binding point for the first element. + */ + int binding; + + /** + * Location an atomic counter is stored at. + */ + unsigned offset; + + /** + * ARB_shader_image_load_store qualifiers. + */ + struct { + bool read_only; /**< "readonly" qualifier. */ + bool write_only; /**< "writeonly" qualifier. */ + bool coherent; + bool _volatile; + bool restrict_flag; + + /** Image internal format if specified explicitly, otherwise GL_NONE. */ + GLenum format; + } image; + + /** + * Highest element accessed with a constant expression array index + * + * Not used for non-array variables. + */ + unsigned max_array_access; + + } data; + + /** + * Built-in state that backs this uniform + * + * Once set at variable creation, \c state_slots must remain invariant. + * This is because, ideally, this array would be shared by all clones of + * this variable in the IR tree. In other words, we'd really like for it + * to be a fly-weight. + * + * If the variable is not a uniform, \c num_state_slots will be zero and + * \c state_slots will be \c NULL. + */ + /*@{*/ + unsigned num_state_slots; /**< Number of state slots used */ + nir_state_slot *state_slots; /**< State descriptors. */ + /*@}*/ + + /** + * Constant expression assigned in the initializer of the variable + */ + nir_constant *constant_initializer; + + /** + * For variables that are in an interface block or are an instance of an + * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. + * + * \sa ir_variable::location + */ + const struct glsl_type *interface_type; +} nir_variable; + +#define nir_foreach_variable(var, var_list) \ + foreach_list_typed(nir_variable, var, node, var_list) + +typedef struct nir_register { + struct exec_node node; + + unsigned num_components; /** < number of vector components */ + unsigned num_array_elems; /** < size of array (0 for no array) */ + + /** generic register index. */ + unsigned index; + + /** only for debug purposes, can be NULL */ + const char *name; + + /** whether this register is local (per-function) or global (per-shader) */ + bool is_global; + + /** + * If this flag is set to true, then accessing channels >= num_components + * is well-defined, and simply spills over to the next array element. This + * is useful for backends that can do per-component accessing, in + * particular scalar backends. By setting this flag and making + * num_components equal to 1, structures can be packed tightly into + * registers and then registers can be accessed per-component to get to + * each structure member, even if it crosses vec4 boundaries. + */ + bool is_packed; + + /** set of nir_src's where this register is used (read from) */ + struct list_head uses; + + /** set of nir_dest's where this register is defined (written to) */ + struct list_head defs; + + /** set of nir_if's where this register is used as a condition */ + struct list_head if_uses; +} nir_register; + +typedef enum { + nir_instr_type_alu, + nir_instr_type_call, + nir_instr_type_tex, + nir_instr_type_intrinsic, + nir_instr_type_load_const, + nir_instr_type_jump, + nir_instr_type_ssa_undef, + nir_instr_type_phi, + nir_instr_type_parallel_copy, +} nir_instr_type; + +typedef struct nir_instr { + struct exec_node node; + nir_instr_type type; + struct nir_block *block; + + /** generic instruction index. */ + unsigned index; + + /* A temporary for optimization and analysis passes to use for storing + * flags. For instance, DCE uses this to store the "dead/live" info. + */ + uint8_t pass_flags; +} nir_instr; + +static inline nir_instr * +nir_instr_next(nir_instr *instr) +{ + struct exec_node *next = exec_node_get_next(&instr->node); + if (exec_node_is_tail_sentinel(next)) + return NULL; + else + return exec_node_data(nir_instr, next, node); +} + +static inline nir_instr * +nir_instr_prev(nir_instr *instr) +{ + struct exec_node *prev = exec_node_get_prev(&instr->node); + if (exec_node_is_head_sentinel(prev)) + return NULL; + else + return exec_node_data(nir_instr, prev, node); +} + +static inline bool +nir_instr_is_first(nir_instr *instr) +{ + return exec_node_is_head_sentinel(exec_node_get_prev(&instr->node)); +} + +static inline bool +nir_instr_is_last(nir_instr *instr) +{ + return exec_node_is_tail_sentinel(exec_node_get_next(&instr->node)); +} + +typedef struct nir_ssa_def { + /** for debugging only, can be NULL */ + const char* name; + + /** generic SSA definition index. */ + unsigned index; + + /** Index into the live_in and live_out bitfields */ + unsigned live_index; + + nir_instr *parent_instr; + + /** set of nir_instr's where this register is used (read from) */ + struct list_head uses; + + /** set of nir_if's where this register is used as a condition */ + struct list_head if_uses; + + uint8_t num_components; +} nir_ssa_def; + +struct nir_src; + +typedef struct { + nir_register *reg; + struct nir_src *indirect; /** < NULL for no indirect offset */ + unsigned base_offset; + + /* TODO use-def chain goes here */ +} nir_reg_src; + +typedef struct { + nir_instr *parent_instr; + struct list_head def_link; + + nir_register *reg; + struct nir_src *indirect; /** < NULL for no indirect offset */ + unsigned base_offset; + + /* TODO def-use chain goes here */ +} nir_reg_dest; + +struct nir_if; + +typedef struct nir_src { + union { + nir_instr *parent_instr; + struct nir_if *parent_if; + }; + + struct list_head use_link; + + union { + nir_reg_src reg; + nir_ssa_def *ssa; + }; + + bool is_ssa; +} nir_src; + +#define NIR_SRC_INIT (nir_src) { { NULL } } + +#define nir_foreach_use(reg_or_ssa_def, src) \ + list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) + +#define nir_foreach_use_safe(reg_or_ssa_def, src) \ + list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) + +#define nir_foreach_if_use(reg_or_ssa_def, src) \ + list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) + +#define nir_foreach_if_use_safe(reg_or_ssa_def, src) \ + list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) + +typedef struct { + union { + nir_reg_dest reg; + nir_ssa_def ssa; + }; + + bool is_ssa; +} nir_dest; + +#define NIR_DEST_INIT (nir_dest) { { { NULL } } } + +#define nir_foreach_def(reg, dest) \ + list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) + +#define nir_foreach_def_safe(reg, dest) \ + list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) + +static inline nir_src +nir_src_for_ssa(nir_ssa_def *def) +{ + nir_src src = NIR_SRC_INIT; + + src.is_ssa = true; + src.ssa = def; + + return src; +} + +static inline nir_src +nir_src_for_reg(nir_register *reg) +{ + nir_src src = NIR_SRC_INIT; + + src.is_ssa = false; + src.reg.reg = reg; + src.reg.indirect = NULL; + src.reg.base_offset = 0; + + return src; +} + +static inline nir_dest +nir_dest_for_reg(nir_register *reg) +{ + nir_dest dest = NIR_DEST_INIT; + + dest.reg.reg = reg; + + return dest; +} + +void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); +void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); + +typedef struct { + nir_src src; + + /** + * \name input modifiers + */ + /*@{*/ + /** + * For inputs interpreted as floating point, flips the sign bit. For + * inputs interpreted as integers, performs the two's complement negation. + */ + bool negate; + + /** + * Clears the sign bit for floating point values, and computes the integer + * absolute value for integers. Note that the negate modifier acts after + * the absolute value modifier, therefore if both are set then all inputs + * will become negative. + */ + bool abs; + /*@}*/ + + /** + * For each input component, says which component of the register it is + * chosen from. Note that which elements of the swizzle are used and which + * are ignored are based on the write mask for most opcodes - for example, + * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and + * a swizzle of {2, x, 1, 0} where x means "don't care." + */ + uint8_t swizzle[4]; +} nir_alu_src; + +typedef struct { + nir_dest dest; + + /** + * \name saturate output modifier + * + * Only valid for opcodes that output floating-point numbers. Clamps the + * output to between 0.0 and 1.0 inclusive. + */ + + bool saturate; + + unsigned write_mask : 4; /* ignored if dest.is_ssa is true */ +} nir_alu_dest; + +typedef enum { + nir_type_invalid = 0, /* Not a valid type */ + nir_type_float, + nir_type_int, + nir_type_uint, + nir_type_bool +} nir_alu_type; + +typedef enum { + NIR_OP_IS_COMMUTATIVE = (1 << 0), + NIR_OP_IS_ASSOCIATIVE = (1 << 1), +} nir_op_algebraic_property; + +typedef struct { + const char *name; + + unsigned num_inputs; + + /** + * The number of components in the output + * + * If non-zero, this is the size of the output and input sizes are + * explicitly given; swizzle and writemask are still in effect, but if + * the output component is masked out, then the input component may + * still be in use. + * + * If zero, the opcode acts in the standard, per-component manner; the + * operation is performed on each component (except the ones that are + * masked out) with the input being taken from the input swizzle for + * that component. + * + * The size of some of the inputs may be given (i.e. non-zero) even + * though output_size is zero; in that case, the inputs with a zero + * size act per-component, while the inputs with non-zero size don't. + */ + unsigned output_size; + + /** + * The type of vector that the instruction outputs. Note that the + * staurate modifier is only allowed on outputs with the float type. + */ + + nir_alu_type output_type; + + /** + * The number of components in each input + */ + unsigned input_sizes[4]; + + /** + * The type of vector that each input takes. Note that negate and + * absolute value are only allowed on inputs with int or float type and + * behave differently on the two. + */ + nir_alu_type input_types[4]; + + nir_op_algebraic_property algebraic_properties; +} nir_op_info; + +extern const nir_op_info nir_op_infos[nir_num_opcodes]; + +typedef struct nir_alu_instr { + nir_instr instr; + nir_op op; + nir_alu_dest dest; + nir_alu_src src[]; +} nir_alu_instr; + +void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, + nir_alu_instr *instr); +void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, + nir_alu_instr *instr); + +/* is this source channel used? */ +static inline bool +nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel) +{ + if (nir_op_infos[instr->op].input_sizes[src] > 0) + return channel < nir_op_infos[instr->op].input_sizes[src]; + + return (instr->dest.write_mask >> channel) & 1; +} + +/* + * For instructions whose destinations are SSA, get the number of channels + * used for a source + */ +static inline unsigned +nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) +{ + assert(instr->dest.dest.is_ssa); + + if (nir_op_infos[instr->op].input_sizes[src] > 0) + return nir_op_infos[instr->op].input_sizes[src]; + + return instr->dest.dest.ssa.num_components; +} + +typedef enum { + nir_deref_type_var, + nir_deref_type_array, + nir_deref_type_struct +} nir_deref_type; + +typedef struct nir_deref { + nir_deref_type deref_type; + struct nir_deref *child; + const struct glsl_type *type; +} nir_deref; + +typedef struct { + nir_deref deref; + + nir_variable *var; +} nir_deref_var; + +/* This enum describes how the array is referenced. If the deref is + * direct then the base_offset is used. If the deref is indirect then then + * offset is given by base_offset + indirect. If the deref is a wildcard + * then the deref refers to all of the elements of the array at the same + * time. Wildcard dereferences are only ever allowed in copy_var + * intrinsics and the source and destination derefs must have matching + * wildcards. + */ +typedef enum { + nir_deref_array_type_direct, + nir_deref_array_type_indirect, + nir_deref_array_type_wildcard, +} nir_deref_array_type; + +typedef struct { + nir_deref deref; + + nir_deref_array_type deref_array_type; + unsigned base_offset; + nir_src indirect; +} nir_deref_array; + +typedef struct { + nir_deref deref; + + unsigned index; +} nir_deref_struct; + +NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref) +NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref) +NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref) + +/* Returns the last deref in the chain. */ +static inline nir_deref * +nir_deref_tail(nir_deref *deref) +{ + while (deref->child) + deref = deref->child; + return deref; +} + +typedef struct { + nir_instr instr; + + unsigned num_params; + nir_deref_var **params; + nir_deref_var *return_deref; + + struct nir_function *callee; +} nir_call_instr; + +#define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \ + num_variables, num_indices, flags) \ + nir_intrinsic_##name, + +#define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name, + +typedef enum { +#include "nir_intrinsics.h" + nir_num_intrinsics = nir_last_intrinsic + 1 +} nir_intrinsic_op; + +#undef INTRINSIC +#undef LAST_INTRINSIC + +/** Represents an intrinsic + * + * An intrinsic is an instruction type for handling things that are + * more-or-less regular operations but don't just consume and produce SSA + * values like ALU operations do. Intrinsics are not for things that have + * special semantic meaning such as phi nodes and parallel copies. + * Examples of intrinsics include variable load/store operations, system + * value loads, and the like. Even though texturing more-or-less falls + * under this category, texturing is its own instruction type because + * trying to represent texturing with intrinsics would lead to a + * combinatorial explosion of intrinsic opcodes. + * + * By having a single instruction type for handling a lot of different + * cases, optimization passes can look for intrinsics and, for the most + * part, completely ignore them. Each intrinsic type also has a few + * possible flags that govern whether or not they can be reordered or + * eliminated. That way passes like dead code elimination can still work + * on intrisics without understanding the meaning of each. + * + * Each intrinsic has some number of constant indices, some number of + * variables, and some number of sources. What these sources, variables, + * and indices mean depends on the intrinsic and is documented with the + * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture + * instructions are the only types of instruction that can operate on + * variables. + */ +typedef struct { + nir_instr instr; + + nir_intrinsic_op intrinsic; + + nir_dest dest; + + /** number of components if this is a vectorized intrinsic + * + * Similarly to ALU operations, some intrinsics are vectorized. + * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. + * For vectorized intrinsics, the num_components field specifies the + * number of destination components and the number of source components + * for all sources with nir_intrinsic_infos.src_components[i] == 0. + */ + uint8_t num_components; + + int const_index[3]; + + nir_deref_var *variables[2]; + + nir_src src[]; +} nir_intrinsic_instr; + +/** + * \name NIR intrinsics semantic flags + * + * information about what the compiler can do with the intrinsics. + * + * \sa nir_intrinsic_info::flags + */ +typedef enum { + /** + * whether the intrinsic can be safely eliminated if none of its output + * value is not being used. + */ + NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), + + /** + * Whether the intrinsic can be reordered with respect to any other + * intrinsic, i.e. whether the only reordering dependencies of the + * intrinsic are due to the register reads/writes. + */ + NIR_INTRINSIC_CAN_REORDER = (1 << 1), +} nir_intrinsic_semantic_flag; + +#define NIR_INTRINSIC_MAX_INPUTS 4 + +typedef struct { + const char *name; + + unsigned num_srcs; /** < number of register/SSA inputs */ + + /** number of components of each input register + * + * If this value is 0, the number of components is given by the + * num_components field of nir_intrinsic_instr. + */ + unsigned src_components[NIR_INTRINSIC_MAX_INPUTS]; + + bool has_dest; + + /** number of components of the output register + * + * If this value is 0, the number of components is given by the + * num_components field of nir_intrinsic_instr. + */ + unsigned dest_components; + + /** the number of inputs/outputs that are variables */ + unsigned num_variables; + + /** the number of constant indices used by the intrinsic */ + unsigned num_indices; + + /** semantic flags for calls to this intrinsic */ + nir_intrinsic_semantic_flag flags; +} nir_intrinsic_info; + +extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; + +/** + * \group texture information + * + * This gives semantic information about textures which is useful to the + * frontend, the backend, and lowering passes, but not the optimizer. + */ + +typedef enum { + nir_tex_src_coord, + nir_tex_src_projector, + nir_tex_src_comparitor, /* shadow comparitor */ + nir_tex_src_offset, + nir_tex_src_bias, + nir_tex_src_lod, + nir_tex_src_ms_index, /* MSAA sample index */ + nir_tex_src_ddx, + nir_tex_src_ddy, + nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ + nir_num_tex_src_types +} nir_tex_src_type; + +typedef struct { + nir_src src; + nir_tex_src_type src_type; +} nir_tex_src; + +typedef enum { + nir_texop_tex, /**< Regular texture look-up */ + nir_texop_txb, /**< Texture look-up with LOD bias */ + nir_texop_txl, /**< Texture look-up with explicit LOD */ + nir_texop_txd, /**< Texture look-up with partial derivatvies */ + nir_texop_txf, /**< Texel fetch with explicit LOD */ + nir_texop_txf_ms, /**< Multisample texture fetch */ + nir_texop_txs, /**< Texture size */ + nir_texop_lod, /**< Texture lod query */ + nir_texop_tg4, /**< Texture gather */ + nir_texop_query_levels, /**< Texture levels query */ + nir_texop_texture_samples, /**< Texture samples query */ + nir_texop_samples_identical, /**< Query whether all samples are definitely + * identical. + */ +} nir_texop; + +typedef struct { + nir_instr instr; + + enum glsl_sampler_dim sampler_dim; + nir_alu_type dest_type; + + nir_texop op; + nir_dest dest; + nir_tex_src *src; + unsigned num_srcs, coord_components; + bool is_array, is_shadow; + + /** + * If is_shadow is true, whether this is the old-style shadow that outputs 4 + * components or the new-style shadow that outputs 1 component. + */ + bool is_new_style_shadow; + + /* constant offset - must be 0 if the offset source is used */ + int const_offset[4]; + + /* gather component selector */ + unsigned component : 2; + + /** The sampler index + * + * If this texture instruction has a nir_tex_src_sampler_offset source, + * then the sampler index is given by sampler_index + sampler_offset. + */ + unsigned sampler_index; + + /** The size of the sampler array or 0 if it's not an array */ + unsigned sampler_array_size; + + nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */ +} nir_tex_instr; + +static inline unsigned +nir_tex_instr_dest_size(nir_tex_instr *instr) +{ + switch (instr->op) { + case nir_texop_txs: { + unsigned ret; + switch (instr->sampler_dim) { + case GLSL_SAMPLER_DIM_1D: + case GLSL_SAMPLER_DIM_BUF: + ret = 1; + break; + case GLSL_SAMPLER_DIM_2D: + case GLSL_SAMPLER_DIM_CUBE: + case GLSL_SAMPLER_DIM_MS: + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_EXTERNAL: + ret = 2; + break; + case GLSL_SAMPLER_DIM_3D: + ret = 3; + break; + default: + unreachable("not reached"); + } + if (instr->is_array) + ret++; + return ret; + } + + case nir_texop_lod: + return 2; + + case nir_texop_texture_samples: + case nir_texop_query_levels: + case nir_texop_samples_identical: + return 1; + + default: + if (instr->is_shadow && instr->is_new_style_shadow) + return 1; + + return 4; + } +} + +/* Returns true if this texture operation queries something about the texture + * rather than actually sampling it. + */ +static inline bool +nir_tex_instr_is_query(nir_tex_instr *instr) +{ + switch (instr->op) { + case nir_texop_txs: + case nir_texop_lod: + case nir_texop_texture_samples: + case nir_texop_query_levels: + return true; + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_txl: + case nir_texop_txd: + case nir_texop_txf: + case nir_texop_txf_ms: + case nir_texop_tg4: + return false; + default: + unreachable("Invalid texture opcode"); + } +} + +static inline unsigned +nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src) +{ + if (instr->src[src].src_type == nir_tex_src_coord) + return instr->coord_components; + + + if (instr->src[src].src_type == nir_tex_src_offset || + instr->src[src].src_type == nir_tex_src_ddx || + instr->src[src].src_type == nir_tex_src_ddy) { + if (instr->is_array) + return instr->coord_components - 1; + else + return instr->coord_components; + } + + return 1; +} + +static inline int +nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type) +{ + for (unsigned i = 0; i < instr->num_srcs; i++) + if (instr->src[i].src_type == type) + return (int) i; + + return -1; +} + +typedef struct { + union { + float f[4]; + int32_t i[4]; + uint32_t u[4]; + }; +} nir_const_value; + +typedef struct { + nir_instr instr; + + nir_const_value value; + + nir_ssa_def def; +} nir_load_const_instr; + +typedef enum { + nir_jump_return, + nir_jump_break, + nir_jump_continue, +} nir_jump_type; + +typedef struct { + nir_instr instr; + nir_jump_type type; +} nir_jump_instr; + +/* creates a new SSA variable in an undefined state */ + +typedef struct { + nir_instr instr; + nir_ssa_def def; +} nir_ssa_undef_instr; + +typedef struct { + struct exec_node node; + + /* The predecessor block corresponding to this source */ + struct nir_block *pred; + + nir_src src; +} nir_phi_src; + +#define nir_foreach_phi_src(phi, entry) \ + foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs) +#define nir_foreach_phi_src_safe(phi, entry) \ + foreach_list_typed_safe(nir_phi_src, entry, node, &(phi)->srcs) + +typedef struct { + nir_instr instr; + + struct exec_list srcs; /** < list of nir_phi_src */ + + nir_dest dest; +} nir_phi_instr; + +typedef struct { + struct exec_node node; + nir_src src; + nir_dest dest; +} nir_parallel_copy_entry; + +#define nir_foreach_parallel_copy_entry(pcopy, entry) \ + foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) + +typedef struct { + nir_instr instr; + + /* A list of nir_parallel_copy_entry's. The sources of all of the + * entries are copied to the corresponding destinations "in parallel". + * In other words, if we have two entries: a -> b and b -> a, the values + * get swapped. + */ + struct exec_list entries; +} nir_parallel_copy_instr; + +NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr) +NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, + nir_parallel_copy_instr, instr) + +/* + * Control flow + * + * Control flow consists of a tree of control flow nodes, which include + * if-statements and loops. The leaves of the tree are basic blocks, lists of + * instructions that always run start-to-finish. Each basic block also keeps + * track of its successors (blocks which may run immediately after the current + * block) and predecessors (blocks which could have run immediately before the + * current block). Each function also has a start block and an end block which + * all return statements point to (which is always empty). Together, all the + * blocks with their predecessors and successors make up the control flow + * graph (CFG) of the function. There are helpers that modify the tree of + * control flow nodes while modifying the CFG appropriately; these should be + * used instead of modifying the tree directly. + */ + +typedef enum { + nir_cf_node_block, + nir_cf_node_if, + nir_cf_node_loop, + nir_cf_node_function +} nir_cf_node_type; + +typedef struct nir_cf_node { + struct exec_node node; + nir_cf_node_type type; + struct nir_cf_node *parent; +} nir_cf_node; + +typedef struct nir_block { + nir_cf_node cf_node; + + struct exec_list instr_list; /** < list of nir_instr */ + + /** generic block index; generated by nir_index_blocks */ + unsigned index; + + /* + * Each block can only have up to 2 successors, so we put them in a simple + * array - no need for anything more complicated. + */ + struct nir_block *successors[2]; + + /* Set of nir_block predecessors in the CFG */ + struct set *predecessors; + + /* + * this node's immediate dominator in the dominance tree - set to NULL for + * the start block. + */ + struct nir_block *imm_dom; + + /* This node's children in the dominance tree */ + unsigned num_dom_children; + struct nir_block **dom_children; + + /* Set of nir_block's on the dominance frontier of this block */ + struct set *dom_frontier; + + /* + * These two indices have the property that dom_{pre,post}_index for each + * child of this block in the dominance tree will always be between + * dom_pre_index and dom_post_index for this block, which makes testing if + * a given block is dominated by another block an O(1) operation. + */ + unsigned dom_pre_index, dom_post_index; + + /* live in and out for this block; used for liveness analysis */ + BITSET_WORD *live_in; + BITSET_WORD *live_out; +} nir_block; + +static inline nir_instr * +nir_block_first_instr(nir_block *block) +{ + struct exec_node *head = exec_list_get_head(&block->instr_list); + return exec_node_data(nir_instr, head, node); +} + +static inline nir_instr * +nir_block_last_instr(nir_block *block) +{ + struct exec_node *tail = exec_list_get_tail(&block->instr_list); + return exec_node_data(nir_instr, tail, node); +} + +#define nir_foreach_instr(block, instr) \ + foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) +#define nir_foreach_instr_reverse(block, instr) \ + foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) +#define nir_foreach_instr_safe(block, instr) \ + foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) +#define nir_foreach_instr_reverse_safe(block, instr) \ + foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) + +typedef struct nir_if { + nir_cf_node cf_node; + nir_src condition; + + struct exec_list then_list; /** < list of nir_cf_node */ + struct exec_list else_list; /** < list of nir_cf_node */ +} nir_if; + +static inline nir_cf_node * +nir_if_first_then_node(nir_if *if_stmt) +{ + struct exec_node *head = exec_list_get_head(&if_stmt->then_list); + return exec_node_data(nir_cf_node, head, node); +} + +static inline nir_cf_node * +nir_if_last_then_node(nir_if *if_stmt) +{ + struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); + return exec_node_data(nir_cf_node, tail, node); +} + +static inline nir_cf_node * +nir_if_first_else_node(nir_if *if_stmt) +{ + struct exec_node *head = exec_list_get_head(&if_stmt->else_list); + return exec_node_data(nir_cf_node, head, node); +} + +static inline nir_cf_node * +nir_if_last_else_node(nir_if *if_stmt) +{ + struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); + return exec_node_data(nir_cf_node, tail, node); +} + +typedef struct { + nir_cf_node cf_node; + + struct exec_list body; /** < list of nir_cf_node */ +} nir_loop; + +static inline nir_cf_node * +nir_loop_first_cf_node(nir_loop *loop) +{ + return exec_node_data(nir_cf_node, exec_list_get_head(&loop->body), node); +} + +static inline nir_cf_node * +nir_loop_last_cf_node(nir_loop *loop) +{ + return exec_node_data(nir_cf_node, exec_list_get_tail(&loop->body), node); +} + +/** + * Various bits of metadata that can may be created or required by + * optimization and analysis passes + */ +typedef enum { + nir_metadata_none = 0x0, + nir_metadata_block_index = 0x1, + nir_metadata_dominance = 0x2, + nir_metadata_live_ssa_defs = 0x4, + nir_metadata_not_properly_reset = 0x8, +} nir_metadata; + +typedef struct { + nir_cf_node cf_node; + + /** pointer to the function of which this is an implementation */ + struct nir_function *function; + + struct exec_list body; /** < list of nir_cf_node */ + + nir_block *end_block; + + /** list for all local variables in the function */ + struct exec_list locals; + + /** array of variables used as parameters */ + unsigned num_params; + nir_variable **params; + + /** variable used to hold the result of the function */ + nir_variable *return_var; + + /** list of local registers in the function */ + struct exec_list registers; + + /** next available local register index */ + unsigned reg_alloc; + + /** next available SSA value index */ + unsigned ssa_alloc; + + /* total number of basic blocks, only valid when block_index_dirty = false */ + unsigned num_blocks; + + nir_metadata valid_metadata; +} nir_function_impl; + +static inline nir_block * +nir_start_block(nir_function_impl *impl) +{ + return (nir_block *) exec_list_get_head(&impl->body); +} + +static inline nir_cf_node * +nir_cf_node_next(nir_cf_node *node) +{ + struct exec_node *next = exec_node_get_next(&node->node); + if (exec_node_is_tail_sentinel(next)) + return NULL; + else + return exec_node_data(nir_cf_node, next, node); +} + +static inline nir_cf_node * +nir_cf_node_prev(nir_cf_node *node) +{ + struct exec_node *prev = exec_node_get_prev(&node->node); + if (exec_node_is_head_sentinel(prev)) + return NULL; + else + return exec_node_data(nir_cf_node, prev, node); +} + +static inline bool +nir_cf_node_is_first(const nir_cf_node *node) +{ + return exec_node_is_head_sentinel(node->node.prev); +} + +static inline bool +nir_cf_node_is_last(const nir_cf_node *node) +{ + return exec_node_is_tail_sentinel(node->node.next); +} + +NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node) +NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node) +NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node) +NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, nir_function_impl, cf_node) + +typedef enum { + nir_parameter_in, + nir_parameter_out, + nir_parameter_inout, +} nir_parameter_type; + +typedef struct { + nir_parameter_type param_type; + const struct glsl_type *type; +} nir_parameter; + +typedef struct nir_function { + struct exec_node node; + + const char *name; + struct nir_shader *shader; + + unsigned num_params; + nir_parameter *params; + const struct glsl_type *return_type; + + /** The implementation of this function. + * + * If the function is only declared and not implemented, this is NULL. + */ + nir_function_impl *impl; +} nir_function; + +typedef struct nir_shader_compiler_options { + bool lower_fdiv; + bool lower_ffma; + bool lower_flrp; + bool lower_fpow; + bool lower_fsat; + bool lower_fsqrt; + bool lower_fmod; + bool lower_bitfield_extract; + bool lower_bitfield_insert; + bool lower_uadd_carry; + bool lower_usub_borrow; + /** lowers fneg and ineg to fsub and isub. */ + bool lower_negate; + /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ + bool lower_sub; + + /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ + bool lower_scmp; + + /* Does the native fdot instruction replicate its result for four + * components? If so, then opt_algebraic_late will turn all fdotN + * instructions into fdot_replicatedN instructions. + */ + bool fdot_replicates; + + /** lowers ffract to fsub+ffloor: */ + bool lower_ffract; + + /** + * Does the driver support real 32-bit integers? (Otherwise, integers + * are simulated by floats.) + */ + bool native_integers; +} nir_shader_compiler_options; + +typedef struct nir_shader_info { + const char *name; + + /* Descriptive name provided by the client; may be NULL */ + const char *label; + + /* Number of textures used by this shader */ + unsigned num_textures; + /* Number of uniform buffers used by this shader */ + unsigned num_ubos; + /* Number of atomic buffers used by this shader */ + unsigned num_abos; + /* Number of shader storage buffers used by this shader */ + unsigned num_ssbos; + /* Number of images used by this shader */ + unsigned num_images; + + /* Which inputs are actually read */ + uint64_t inputs_read; + /* Which outputs are actually written */ + uint64_t outputs_written; + /* Which system values are actually read */ + uint64_t system_values_read; + + /* Which patch inputs are actually read */ + uint32_t patch_inputs_read; + /* Which patch outputs are actually written */ + uint32_t patch_outputs_written; + + /* Whether or not this shader ever uses textureGather() */ + bool uses_texture_gather; + + /* Whether or not this shader uses the gl_ClipDistance output */ + bool uses_clip_distance_out; + + /* Whether or not separate shader objects were used */ + bool separate_shader; + + /** Was this shader linked with any transform feedback varyings? */ + bool has_transform_feedback_varyings; + + union { + struct { + /** The number of vertices recieves per input primitive */ + unsigned vertices_in; + + /** The output primitive type (GL enum value) */ + unsigned output_primitive; + + /** The maximum number of vertices the geometry shader might write. */ + unsigned vertices_out; + + /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ + unsigned invocations; + + /** Whether or not this shader uses EndPrimitive */ + bool uses_end_primitive; + + /** Whether or not this shader uses non-zero streams */ + bool uses_streams; + } gs; + + struct { + bool uses_discard; + + /** + * Whether early fragment tests are enabled as defined by + * ARB_shader_image_load_store. + */ + bool early_fragment_tests; + + /** gl_FragDepth layout for ARB_conservative_depth. */ + enum gl_frag_depth_layout depth_layout; + } fs; + + struct { + unsigned local_size[3]; + } cs; + + struct { + /** The number of vertices in the TCS output patch. */ + unsigned vertices_out; + } tcs; + }; +} nir_shader_info; + +typedef struct nir_shader { + /** list of uniforms (nir_variable) */ + struct exec_list uniforms; + + /** list of inputs (nir_variable) */ + struct exec_list inputs; + + /** list of outputs (nir_variable) */ + struct exec_list outputs; + + /** Set of driver-specific options for the shader. + * + * The memory for the options is expected to be kept in a single static + * copy by the driver. + */ + const struct nir_shader_compiler_options *options; + + /** Various bits of compile-time information about a given shader */ + struct nir_shader_info info; + + /** list of global variables in the shader (nir_variable) */ + struct exec_list globals; + + /** list of system value variables in the shader (nir_variable) */ + struct exec_list system_values; + + struct exec_list functions; /** < list of nir_function */ + + /** list of global register in the shader */ + struct exec_list registers; + + /** next available global register index */ + unsigned reg_alloc; + + /** + * the highest index a load_input_*, load_uniform_*, etc. intrinsic can + * access plus one + */ + unsigned num_inputs, num_uniforms, num_outputs; + + /** The shader stage, such as MESA_SHADER_VERTEX. */ + gl_shader_stage stage; +} nir_shader; + +#define nir_foreach_function(shader, func) \ + foreach_list_typed(nir_function, func, node, &(shader)->functions) + +nir_shader *nir_shader_create(void *mem_ctx, + gl_shader_stage stage, + const nir_shader_compiler_options *options); + +/** creates a register, including assigning it an index and adding it to the list */ +nir_register *nir_global_reg_create(nir_shader *shader); + +nir_register *nir_local_reg_create(nir_function_impl *impl); + +void nir_reg_remove(nir_register *reg); + +/** Adds a variable to the appropreate list in nir_shader */ +void nir_shader_add_variable(nir_shader *shader, nir_variable *var); + +static inline void +nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) +{ + assert(var->data.mode == nir_var_local); + exec_list_push_tail(&impl->locals, &var->node); +} + +/** creates a variable, sets a few defaults, and adds it to the list */ +nir_variable *nir_variable_create(nir_shader *shader, + nir_variable_mode mode, + const struct glsl_type *type, + const char *name); +/** creates a local variable and adds it to the list */ +nir_variable *nir_local_variable_create(nir_function_impl *impl, + const struct glsl_type *type, + const char *name); + +/** creates a function and adds it to the shader's list of functions */ +nir_function *nir_function_create(nir_shader *shader, const char *name); + +nir_function_impl *nir_function_impl_create(nir_function *func); + +nir_block *nir_block_create(nir_shader *shader); +nir_if *nir_if_create(nir_shader *shader); +nir_loop *nir_loop_create(nir_shader *shader); + +nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); + +/** requests that the given pieces of metadata be generated */ +void nir_metadata_require(nir_function_impl *impl, nir_metadata required); +/** dirties all but the preserved metadata */ +void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); + +/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ +nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); + +nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); + +nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, + unsigned num_components); + +nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, + nir_intrinsic_op op); + +nir_call_instr *nir_call_instr_create(nir_shader *shader, + nir_function *callee); + +nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); + +nir_phi_instr *nir_phi_instr_create(nir_shader *shader); + +nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); + +nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, + unsigned num_components); + +nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var); +nir_deref_array *nir_deref_array_create(void *mem_ctx); +nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index); + +nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref); + +nir_load_const_instr * +nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref); + +/** + * NIR Cursors and Instruction Insertion API + * @{ + * + * A tiny struct representing a point to insert/extract instructions or + * control flow nodes. Helps reduce the combinatorial explosion of possible + * points to insert/extract. + * + * \sa nir_control_flow.h + */ +typedef enum { + nir_cursor_before_block, + nir_cursor_after_block, + nir_cursor_before_instr, + nir_cursor_after_instr, +} nir_cursor_option; + +typedef struct { + nir_cursor_option option; + union { + nir_block *block; + nir_instr *instr; + }; +} nir_cursor; + +static inline nir_cursor +nir_before_block(nir_block *block) +{ + nir_cursor cursor; + cursor.option = nir_cursor_before_block; + cursor.block = block; + return cursor; +} + +static inline nir_cursor +nir_after_block(nir_block *block) +{ + nir_cursor cursor; + cursor.option = nir_cursor_after_block; + cursor.block = block; + return cursor; +} + +static inline nir_cursor +nir_before_instr(nir_instr *instr) +{ + nir_cursor cursor; + cursor.option = nir_cursor_before_instr; + cursor.instr = instr; + return cursor; +} + +static inline nir_cursor +nir_after_instr(nir_instr *instr) +{ + nir_cursor cursor; + cursor.option = nir_cursor_after_instr; + cursor.instr = instr; + return cursor; +} + +static inline nir_cursor +nir_after_block_before_jump(nir_block *block) +{ + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr && last_instr->type == nir_instr_type_jump) { + return nir_before_instr(last_instr); + } else { + return nir_after_block(block); + } +} + +static inline nir_cursor +nir_before_cf_node(nir_cf_node *node) +{ + if (node->type == nir_cf_node_block) + return nir_before_block(nir_cf_node_as_block(node)); + + return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); +} + +static inline nir_cursor +nir_after_cf_node(nir_cf_node *node) +{ + if (node->type == nir_cf_node_block) + return nir_after_block(nir_cf_node_as_block(node)); + + return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); +} + +static inline nir_cursor +nir_before_cf_list(struct exec_list *cf_list) +{ + nir_cf_node *first_node = exec_node_data(nir_cf_node, + exec_list_get_head(cf_list), node); + return nir_before_cf_node(first_node); +} + +static inline nir_cursor +nir_after_cf_list(struct exec_list *cf_list) +{ + nir_cf_node *last_node = exec_node_data(nir_cf_node, + exec_list_get_tail(cf_list), node); + return nir_after_cf_node(last_node); +} + +/** + * Insert a NIR instruction at the given cursor. + * + * Note: This does not update the cursor. + */ +void nir_instr_insert(nir_cursor cursor, nir_instr *instr); + +static inline void +nir_instr_insert_before(nir_instr *instr, nir_instr *before) +{ + nir_instr_insert(nir_before_instr(instr), before); +} + +static inline void +nir_instr_insert_after(nir_instr *instr, nir_instr *after) +{ + nir_instr_insert(nir_after_instr(instr), after); +} + +static inline void +nir_instr_insert_before_block(nir_block *block, nir_instr *before) +{ + nir_instr_insert(nir_before_block(block), before); +} + +static inline void +nir_instr_insert_after_block(nir_block *block, nir_instr *after) +{ + nir_instr_insert(nir_after_block(block), after); +} + +static inline void +nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) +{ + nir_instr_insert(nir_before_cf_node(node), before); +} + +static inline void +nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) +{ + nir_instr_insert(nir_after_cf_node(node), after); +} + +static inline void +nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) +{ + nir_instr_insert(nir_before_cf_list(list), before); +} + +static inline void +nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) +{ + nir_instr_insert(nir_after_cf_list(list), after); +} + +void nir_instr_remove(nir_instr *instr); + +/** @} */ + +typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); +typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); +typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); +bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, + void *state); +bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); +bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); + +nir_const_value *nir_src_as_const_value(nir_src src); +bool nir_src_is_dynamically_uniform(nir_src src); +bool nir_srcs_equal(nir_src src1, nir_src src2); +void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); +void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); +void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); +void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, + nir_dest new_dest); + +void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, + unsigned num_components, const char *name); +void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, + unsigned num_components, const char *name); +void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src); +void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, + nir_instr *after_me); + +/* visits basic blocks in source-code order */ +typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state); +bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, + void *state); +bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb, + void *state); +bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, + void *state); + +/* If the following CF node is an if, this function returns that if. + * Otherwise, it returns NULL. + */ +nir_if *nir_block_get_following_if(nir_block *block); + +nir_loop *nir_block_get_following_loop(nir_block *block); + +void nir_index_local_regs(nir_function_impl *impl); +void nir_index_global_regs(nir_shader *shader); +void nir_index_ssa_defs(nir_function_impl *impl); +unsigned nir_index_instrs(nir_function_impl *impl); + +void nir_index_blocks(nir_function_impl *impl); + +void nir_print_shader(nir_shader *shader, FILE *fp); +void nir_print_instr(const nir_instr *instr, FILE *fp); + +nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s); + +#ifdef DEBUG +void nir_validate_shader(nir_shader *shader); +void nir_metadata_set_validation_flag(nir_shader *shader); +void nir_metadata_check_validation_flag(nir_shader *shader); + +#include "util/debug.h" +static inline bool +should_clone_nir(void) +{ + static int should_clone = -1; + if (should_clone < 0) + should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); + + return should_clone; +} +#else +static inline void nir_validate_shader(nir_shader *shader) { (void) shader; } +static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } +static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } +static inline bool should_clone_nir(void) { return false; } +#endif /* DEBUG */ + +#define _PASS(nir, do_pass) do { \ + do_pass \ + nir_validate_shader(nir); \ + if (should_clone_nir()) { \ + nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ + ralloc_free(nir); \ + nir = clone; \ + } \ +} while (0) + +#define NIR_PASS(progress, nir, pass, ...) _PASS(nir, \ + nir_metadata_set_validation_flag(nir); \ + if (pass(nir, ##__VA_ARGS__)) { \ + progress = true; \ + nir_metadata_check_validation_flag(nir); \ + } \ +) + +#define NIR_PASS_V(nir, pass, ...) _PASS(nir, \ + pass(nir, ##__VA_ARGS__); \ +) + +void nir_calc_dominance_impl(nir_function_impl *impl); +void nir_calc_dominance(nir_shader *shader); + +nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); +bool nir_block_dominates(nir_block *parent, nir_block *child); + +void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); +void nir_dump_dom_tree(nir_shader *shader, FILE *fp); + +void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); +void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); + +void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); +void nir_dump_cfg(nir_shader *shader, FILE *fp); + +int nir_gs_count_vertices(const nir_shader *shader); + +bool nir_split_var_copies(nir_shader *shader); + +void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); +void nir_lower_var_copies(nir_shader *shader); + +bool nir_lower_global_vars_to_local(nir_shader *shader); + +bool nir_lower_locals_to_regs(nir_shader *shader); + +void nir_lower_outputs_to_temporaries(nir_shader *shader); + +void nir_assign_var_locations(struct exec_list *var_list, + unsigned *size, + int (*type_size)(const struct glsl_type *)); + +void nir_lower_io(nir_shader *shader, + nir_variable_mode mode, + int (*type_size)(const struct glsl_type *)); +nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); +nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); + +void nir_lower_vars_to_ssa(nir_shader *shader); + +bool nir_remove_dead_variables(nir_shader *shader); + +void nir_move_vec_src_uses_to_dest(nir_shader *shader); +bool nir_lower_vec_to_movs(nir_shader *shader); +void nir_lower_alu_to_scalar(nir_shader *shader); +void nir_lower_load_const_to_scalar(nir_shader *shader); + +void nir_lower_phis_to_scalar(nir_shader *shader); + +void nir_lower_samplers(nir_shader *shader, + const struct gl_shader_program *shader_program); + +bool nir_lower_system_values(nir_shader *shader); + +typedef struct nir_lower_tex_options { + /** + * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which + * sampler types a texture projector is lowered. + */ + unsigned lower_txp; + + /** + * If true, lower rect textures to 2D, using txs to fetch the + * texture dimensions and dividing the texture coords by the + * texture dims to normalize. + */ + bool lower_rect; + + /** + * To emulate certain texture wrap modes, this can be used + * to saturate the specified tex coord to [0.0, 1.0]. The + * bits are according to sampler #, ie. if, for example: + * + * (conf->saturate_s & (1 << n)) + * + * is true, then the s coord for sampler n is saturated. + * + * Note that clamping must happen *after* projector lowering + * so any projected texture sample instruction with a clamped + * coordinate gets automatically lowered, regardless of the + * 'lower_txp' setting. + */ + unsigned saturate_s; + unsigned saturate_t; + unsigned saturate_r; + + /* Bitmask of samplers that need swizzling. + * + * If (swizzle_result & (1 << sampler_index)), then the swizzle in + * swizzles[sampler_index] is applied to the result of the texturing + * operation. + */ + unsigned swizzle_result; + + /* A swizzle for each sampler. Values 0-3 represent x, y, z, or w swizzles + * while 4 and 5 represent 0 and 1 respectively. + */ + uint8_t swizzles[32][4]; +} nir_lower_tex_options; + +bool nir_lower_tex(nir_shader *shader, + const nir_lower_tex_options *options); + +void nir_lower_idiv(nir_shader *shader); + +void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables); +void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); + +void nir_lower_two_sided_color(nir_shader *shader); + +void nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program); +void nir_lower_to_source_mods(nir_shader *shader); + +bool nir_lower_gs_intrinsics(nir_shader *shader); + +bool nir_normalize_cubemap_coords(nir_shader *shader); + +void nir_live_ssa_defs_impl(nir_function_impl *impl); +bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); + +void nir_convert_to_ssa_impl(nir_function_impl *impl); +void nir_convert_to_ssa(nir_shader *shader); + +/* If phi_webs_only is true, only convert SSA values involved in phi nodes to + * registers. If false, convert all values (even those not involved in a phi + * node) to registers. + */ +void nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); + +bool nir_opt_algebraic(nir_shader *shader); +bool nir_opt_algebraic_late(nir_shader *shader); +bool nir_opt_constant_folding(nir_shader *shader); + +bool nir_opt_global_to_local(nir_shader *shader); + +bool nir_copy_prop(nir_shader *shader); + +bool nir_opt_cse(nir_shader *shader); + +bool nir_opt_dce(nir_shader *shader); + +bool nir_opt_dead_cf(nir_shader *shader); + +void nir_opt_gcm(nir_shader *shader); + +bool nir_opt_peephole_select(nir_shader *shader); + +bool nir_opt_remove_phis(nir_shader *shader); + +bool nir_opt_undef(nir_shader *shader); + +void nir_sweep(nir_shader *shader); + +nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); +gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); + +#ifdef __cplusplus +} /* extern "C" */ +#endif diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py new file mode 100644 index 00000000000..a30652f2afd --- /dev/null +++ b/src/compiler/nir/nir_algebraic.py @@ -0,0 +1,305 @@ +#! /usr/bin/env python +# +# Copyright (C) 2014 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Jason Ekstrand (jason@jlekstrand.net) + +import itertools +import struct +import sys +import mako.template +import re + +# Represents a set of variables, each with a unique id +class VarSet(object): + def __init__(self): + self.names = {} + self.ids = itertools.count() + self.immutable = False; + + def __getitem__(self, name): + if name not in self.names: + assert not self.immutable, "Unknown replacement variable: " + name + self.names[name] = self.ids.next() + + return self.names[name] + + def lock(self): + self.immutable = True + +class Value(object): + @staticmethod + def create(val, name_base, varset): + if isinstance(val, tuple): + return Expression(val, name_base, varset) + elif isinstance(val, Expression): + return val + elif isinstance(val, (str, unicode)): + return Variable(val, name_base, varset) + elif isinstance(val, (bool, int, long, float)): + return Constant(val, name_base) + + __template = mako.template.Template(""" +static const ${val.c_type} ${val.name} = { + { ${val.type_enum} }, +% if isinstance(val, Constant): + { ${hex(val)} /* ${val.value} */ }, +% elif isinstance(val, Variable): + ${val.index}, /* ${val.var_name} */ + ${'true' if val.is_constant else 'false'}, + nir_type_${ val.required_type or 'invalid' }, +% elif isinstance(val, Expression): + nir_op_${val.opcode}, + { ${', '.join(src.c_ptr for src in val.sources)} }, +% endif +};""") + + def __init__(self, name, type_str): + self.name = name + self.type_str = type_str + + @property + def type_enum(self): + return "nir_search_value_" + self.type_str + + @property + def c_type(self): + return "nir_search_" + self.type_str + + @property + def c_ptr(self): + return "&{0}.value".format(self.name) + + def render(self): + return self.__template.render(val=self, + Constant=Constant, + Variable=Variable, + Expression=Expression) + +class Constant(Value): + def __init__(self, val, name): + Value.__init__(self, name, "constant") + self.value = val + + def __hex__(self): + # Even if it's an integer, we still need to unpack as an unsigned + # int. This is because, without C99, we can only assign to the first + # element of a union in an initializer. + if isinstance(self.value, (bool)): + return 'NIR_TRUE' if self.value else 'NIR_FALSE' + if isinstance(self.value, (int, long)): + return hex(struct.unpack('I', struct.pack('i', self.value))[0]) + elif isinstance(self.value, float): + return hex(struct.unpack('I', struct.pack('f', self.value))[0]) + else: + assert False + +_var_name_re = re.compile(r"(?P#)?(?P\w+)(?:@(?P\w+))?") + +class Variable(Value): + def __init__(self, val, name, varset): + Value.__init__(self, name, "variable") + + m = _var_name_re.match(val) + assert m and m.group('name') is not None + + self.var_name = m.group('name') + self.is_constant = m.group('const') is not None + self.required_type = m.group('type') + + if self.required_type is not None: + assert self.required_type in ('float', 'bool', 'int', 'unsigned') + + self.index = varset[self.var_name] + +class Expression(Value): + def __init__(self, expr, name_base, varset): + Value.__init__(self, name_base, "expression") + assert isinstance(expr, tuple) + + self.opcode = expr[0] + self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset) + for (i, src) in enumerate(expr[1:]) ] + + def render(self): + srcs = "\n".join(src.render() for src in self.sources) + return srcs + super(Expression, self).render() + +_optimization_ids = itertools.count() + +condition_list = ['true'] + +class SearchAndReplace(object): + def __init__(self, transform): + self.id = _optimization_ids.next() + + search = transform[0] + replace = transform[1] + if len(transform) > 2: + self.condition = transform[2] + else: + self.condition = 'true' + + if self.condition not in condition_list: + condition_list.append(self.condition) + self.condition_index = condition_list.index(self.condition) + + varset = VarSet() + if isinstance(search, Expression): + self.search = search + else: + self.search = Expression(search, "search{0}".format(self.id), varset) + + varset.lock() + + if isinstance(replace, Value): + self.replace = replace + else: + self.replace = Value.create(replace, "replace{0}".format(self.id), varset) + +_algebraic_pass_template = mako.template.Template(""" +#include "nir.h" +#include "nir_search.h" + +#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS +#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS + +struct transform { + const nir_search_expression *search; + const nir_search_value *replace; + unsigned condition_offset; +}; + +struct opt_state { + void *mem_ctx; + bool progress; + const bool *condition_flags; +}; + +#endif + +% for (opcode, xform_list) in xform_dict.iteritems(): +% for xform in xform_list: + ${xform.search.render()} + ${xform.replace.render()} +% endfor + +static const struct transform ${pass_name}_${opcode}_xforms[] = { +% for xform in xform_list: + { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} }, +% endfor +}; +% endfor + +static bool +${pass_name}_block(nir_block *block, void *void_state) +{ + struct opt_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + if (!alu->dest.dest.is_ssa) + continue; + + switch (alu->op) { + % for opcode in xform_dict.keys(): + case nir_op_${opcode}: + for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) { + const struct transform *xform = &${pass_name}_${opcode}_xforms[i]; + if (state->condition_flags[xform->condition_offset] && + nir_replace_instr(alu, xform->search, xform->replace, + state->mem_ctx)) { + state->progress = true; + break; + } + } + break; + % endfor + default: + break; + } + } + + return true; +} + +static bool +${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags) +{ + struct opt_state state; + + state.mem_ctx = ralloc_parent(impl); + state.progress = false; + state.condition_flags = condition_flags; + + nir_foreach_block(impl, ${pass_name}_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; +} + + +bool +${pass_name}(nir_shader *shader) +{ + bool progress = false; + bool condition_flags[${len(condition_list)}]; + const nir_shader_compiler_options *options = shader->options; + + % for index, condition in enumerate(condition_list): + condition_flags[${index}] = ${condition}; + % endfor + + nir_foreach_function(shader, function) { + if (function->impl) + progress |= ${pass_name}_impl(function->impl, condition_flags); + } + + return progress; +} +""") + +class AlgebraicPass(object): + def __init__(self, pass_name, transforms): + self.xform_dict = {} + self.pass_name = pass_name + + for xform in transforms: + if not isinstance(xform, SearchAndReplace): + xform = SearchAndReplace(xform) + + if xform.search.opcode not in self.xform_dict: + self.xform_dict[xform.search.opcode] = [] + + self.xform_dict[xform.search.opcode].append(xform) + + def render(self): + return _algebraic_pass_template.render(pass_name=self.pass_name, + xform_dict=self.xform_dict, + condition_list=condition_list) diff --git a/src/compiler/nir/nir_array.h b/src/compiler/nir/nir_array.h new file mode 100644 index 00000000000..1db4e8cea36 --- /dev/null +++ b/src/compiler/nir/nir_array.h @@ -0,0 +1,96 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + void *mem_ctx; + size_t size; + size_t alloc; + void *data; +} nir_array; + +static inline void +nir_array_init(nir_array *arr, void *mem_ctx) +{ + arr->mem_ctx = mem_ctx; + arr->size = 0; + arr->alloc = 0; + arr->data = NULL; +} + +static inline void +nir_array_fini(nir_array *arr) +{ + if (arr->mem_ctx) + ralloc_free(arr->data); + else + free(arr->data); +} + +#define NIR_ARRAY_INITIAL_SIZE 64 + +/* Increments the size of the array by the given ammount and returns a + * pointer to the beginning of the newly added space. + */ +static inline void * +nir_array_grow(nir_array *arr, size_t additional) +{ + size_t new_size = arr->size + additional; + if (new_size > arr->alloc) { + if (arr->alloc == 0) + arr->alloc = NIR_ARRAY_INITIAL_SIZE; + + while (new_size > arr->alloc) + arr->alloc *= 2; + + if (arr->mem_ctx) + arr->data = reralloc_size(arr->mem_ctx, arr->data, arr->alloc); + else + arr->data = realloc(arr->data, arr->alloc); + } + + void *ptr = (void *)((char *)arr->data + arr->size); + arr->size = new_size; + + return ptr; +} + +#define nir_array_add(arr, type, elem) \ + *(type *)nir_array_grow(arr, sizeof(type)) = (elem) + +#define nir_array_foreach(arr, type, elem) \ + for (type *elem = (type *)(arr)->data; \ + elem < (type *)((char *)(arr)->data + (arr)->size); elem++) + +#ifdef __cplusplus +} /* extern "C" */ +#endif diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h new file mode 100644 index 00000000000..88ba3a1c269 --- /dev/null +++ b/src/compiler/nir/nir_builder.h @@ -0,0 +1,364 @@ +/* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef NIR_BUILDER_H +#define NIR_BUILDER_H + +#include "nir_control_flow.h" + +struct exec_list; + +typedef struct nir_builder { + nir_cursor cursor; + + nir_shader *shader; + nir_function_impl *impl; +} nir_builder; + +static inline void +nir_builder_init(nir_builder *build, nir_function_impl *impl) +{ + memset(build, 0, sizeof(*build)); + build->impl = impl; + build->shader = impl->function->shader; +} + +static inline void +nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx, + gl_shader_stage stage, + const nir_shader_compiler_options *options) +{ + build->shader = nir_shader_create(mem_ctx, stage, options); + nir_function *func = nir_function_create(build->shader, "main"); + build->impl = nir_function_impl_create(func); + build->cursor = nir_after_cf_list(&build->impl->body); +} + +static inline void +nir_builder_instr_insert(nir_builder *build, nir_instr *instr) +{ + nir_instr_insert(build->cursor, instr); + + /* Move the cursor forward. */ + build->cursor = nir_after_instr(instr); +} + +static inline void +nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf) +{ + nir_cf_node_insert(build->cursor, cf); +} + +static inline nir_ssa_def * +nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value) +{ + nir_load_const_instr *load_const = + nir_load_const_instr_create(build->shader, num_components); + if (!load_const) + return NULL; + + load_const->value = value; + + nir_builder_instr_insert(build, &load_const->instr); + + return &load_const->def; +} + +static inline nir_ssa_def * +nir_imm_float(nir_builder *build, float x) +{ + nir_const_value v; + + memset(&v, 0, sizeof(v)); + v.f[0] = x; + + return nir_build_imm(build, 1, v); +} + +static inline nir_ssa_def * +nir_imm_vec4(nir_builder *build, float x, float y, float z, float w) +{ + nir_const_value v; + + memset(&v, 0, sizeof(v)); + v.f[0] = x; + v.f[1] = y; + v.f[2] = z; + v.f[3] = w; + + return nir_build_imm(build, 4, v); +} + +static inline nir_ssa_def * +nir_imm_int(nir_builder *build, int x) +{ + nir_const_value v; + + memset(&v, 0, sizeof(v)); + v.i[0] = x; + + return nir_build_imm(build, 1, v); +} + +static inline nir_ssa_def * +nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w) +{ + nir_const_value v; + + memset(&v, 0, sizeof(v)); + v.i[0] = x; + v.i[1] = y; + v.i[2] = z; + v.i[3] = w; + + return nir_build_imm(build, 4, v); +} + +static inline nir_ssa_def * +nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, + nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) +{ + const nir_op_info *op_info = &nir_op_infos[op]; + nir_alu_instr *instr = nir_alu_instr_create(build->shader, op); + if (!instr) + return NULL; + + instr->src[0].src = nir_src_for_ssa(src0); + if (src1) + instr->src[1].src = nir_src_for_ssa(src1); + if (src2) + instr->src[2].src = nir_src_for_ssa(src2); + if (src3) + instr->src[3].src = nir_src_for_ssa(src3); + + /* Guess the number of components the destination temporary should have + * based on our input sizes, if it's not fixed for the op. + */ + unsigned num_components = op_info->output_size; + if (num_components == 0) { + for (unsigned i = 0; i < op_info->num_inputs; i++) { + if (op_info->input_sizes[i] == 0) + num_components = MAX2(num_components, + instr->src[i].src.ssa->num_components); + } + } + assert(num_components != 0); + + /* Make sure we don't swizzle from outside of our source vector (like if a + * scalar value was passed into a multiply with a vector). + */ + for (unsigned i = 0; i < op_info->num_inputs; i++) { + for (unsigned j = instr->src[i].src.ssa->num_components; j < 4; j++) { + instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1; + } + } + + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); + instr->dest.write_mask = (1 << num_components) - 1; + + nir_builder_instr_insert(build, &instr->instr); + + return &instr->dest.dest.ssa; +} + +#define ALU1(op) \ +static inline nir_ssa_def * \ +nir_##op(nir_builder *build, nir_ssa_def *src0) \ +{ \ + return nir_build_alu(build, nir_op_##op, src0, NULL, NULL, NULL); \ +} + +#define ALU2(op) \ +static inline nir_ssa_def * \ +nir_##op(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) \ +{ \ + return nir_build_alu(build, nir_op_##op, src0, src1, NULL, NULL); \ +} + +#define ALU3(op) \ +static inline nir_ssa_def * \ +nir_##op(nir_builder *build, nir_ssa_def *src0, \ + nir_ssa_def *src1, nir_ssa_def *src2) \ +{ \ + return nir_build_alu(build, nir_op_##op, src0, src1, src2, NULL); \ +} + +#define ALU4(op) \ +static inline nir_ssa_def * \ +nir_##op(nir_builder *build, nir_ssa_def *src0, \ + nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) \ +{ \ + return nir_build_alu(build, nir_op_##op, src0, src1, src2, src3); \ +} + +#include "nir_builder_opcodes.h" + +static inline nir_ssa_def * +nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components) +{ + switch (num_components) { + case 4: + return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]); + case 3: + return nir_vec3(build, comp[0], comp[1], comp[2]); + case 2: + return nir_vec2(build, comp[0], comp[1]); + case 1: + return comp[0]; + default: + unreachable("bad component count"); + return NULL; + } +} + +/** + * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def. + */ +static inline nir_ssa_def * +nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) +{ + nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); + mov->dest.write_mask = (1 << num_components) - 1; + mov->src[0] = src; + nir_builder_instr_insert(build, &mov->instr); + + return &mov->dest.dest.ssa; +} + +static inline nir_ssa_def * +nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) +{ + nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); + mov->dest.write_mask = (1 << num_components) - 1; + mov->src[0] = src; + nir_builder_instr_insert(build, &mov->instr); + + return &mov->dest.dest.ssa; +} + +/** + * Construct an fmov or imov that reswizzles the source's components. + */ +static inline nir_ssa_def * +nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], + unsigned num_components, bool use_fmov) +{ + nir_alu_src alu_src = { NIR_SRC_INIT }; + alu_src.src = nir_src_for_ssa(src); + for (unsigned i = 0; i < num_components; i++) + alu_src.swizzle[i] = swiz[i]; + + return use_fmov ? nir_fmov_alu(build, alu_src, num_components) : + nir_imov_alu(build, alu_src, num_components); +} + +static inline nir_ssa_def * +nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c) +{ + unsigned swizzle[4] = {c, c, c, c}; + return nir_swizzle(b, def, swizzle, 1, false); +} + +/** + * Turns a nir_src into a nir_ssa_def * so it can be passed to + * nir_build_alu()-based builder calls. + * + * See nir_ssa_for_alu_src() for alu instructions. + */ +static inline nir_ssa_def * +nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) +{ + if (src.is_ssa && src.ssa->num_components == num_components) + return src.ssa; + + nir_alu_src alu = { NIR_SRC_INIT }; + alu.src = src; + for (int j = 0; j < 4; j++) + alu.swizzle[j] = j; + + return nir_imov_alu(build, alu, num_components); +} + +/** + * Similar to nir_ssa_for_src(), but for alu src's, respecting the + * nir_alu_src's swizzle. + */ +static inline nir_ssa_def * +nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn) +{ + static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 }; + nir_alu_src *src = &instr->src[srcn]; + unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn); + + if (src->src.is_ssa && (src->src.ssa->num_components == num_components) && + !src->abs && !src->negate && + (memcmp(src->swizzle, trivial_swizzle, num_components) == 0)) + return src->src.ssa; + + return nir_imov_alu(build, *src, num_components); +} + +static inline nir_ssa_def * +nir_load_var(nir_builder *build, nir_variable *var) +{ + const unsigned num_components = glsl_get_vector_elements(var->type); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var); + load->num_components = num_components; + load->variables[0] = nir_deref_var_create(load, var); + nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); + nir_builder_instr_insert(build, &load->instr); + return &load->dest.ssa; +} + +static inline void +nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value, + unsigned writemask) +{ + const unsigned num_components = glsl_get_vector_elements(var->type); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var); + store->num_components = num_components; + store->const_index[0] = writemask; + store->variables[0] = nir_deref_var_create(store, var); + store->src[0] = nir_src_for_ssa(value); + nir_builder_instr_insert(build, &store->instr); +} + +static inline nir_ssa_def * +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) +{ + nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op); + load->num_components = nir_intrinsic_infos[op].dest_components; + load->const_index[0] = index; + nir_ssa_dest_init(&load->instr, &load->dest, + nir_intrinsic_infos[op].dest_components, NULL); + nir_builder_instr_insert(build, &load->instr); + return &load->dest.ssa; +} + +#endif /* NIR_BUILDER_H */ diff --git a/src/compiler/nir/nir_builder_opcodes_h.py b/src/compiler/nir/nir_builder_opcodes_h.py new file mode 100644 index 00000000000..e27206ea8fc --- /dev/null +++ b/src/compiler/nir/nir_builder_opcodes_h.py @@ -0,0 +1,38 @@ +#! /usr/bin/env python + +template = """\ +/* Copyright (C) 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#ifndef _NIR_BUILDER_OPCODES_ +#define _NIR_BUILDER_OPCODES_ + +% for name, opcode in sorted(opcodes.iteritems()): +ALU${opcode.num_inputs}(${name}); +% endfor + +#endif /* _NIR_BUILDER_OPCODES_ */""" + +from nir_opcodes import opcodes +from mako.template import Template + +print Template(template).render(opcodes=opcodes) diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c new file mode 100644 index 00000000000..5eff743d835 --- /dev/null +++ b/src/compiler/nir/nir_clone.c @@ -0,0 +1,659 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_control_flow_private.h" + +/* Secret Decoder Ring: + * clone_foo(): + * Allocate and clone a foo. + * __clone_foo(): + * Clone body of foo (ie. parent class, embedded struct, etc) + */ + +typedef struct { + /* maps orig ptr -> cloned ptr: */ + struct hash_table *ptr_table; + + /* List of phi sources. */ + struct list_head phi_srcs; + + /* new shader object, used as memctx for just about everything else: */ + nir_shader *ns; +} clone_state; + +static void +init_clone_state(clone_state *state) +{ + state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + list_inithead(&state->phi_srcs); +} + +static void +free_clone_state(clone_state *state) +{ + _mesa_hash_table_destroy(state->ptr_table, NULL); +} + +static void * +lookup_ptr(clone_state *state, const void *ptr) +{ + struct hash_entry *entry; + + if (!ptr) + return NULL; + + entry = _mesa_hash_table_search(state->ptr_table, ptr); + assert(entry && "Failed to find pointer!"); + if (!entry) + return NULL; + + return entry->data; +} + +static void +store_ptr(clone_state *state, void *nptr, const void *ptr) +{ + _mesa_hash_table_insert(state->ptr_table, ptr, nptr); +} + +static nir_constant * +clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) +{ + nir_constant *nc = ralloc(nvar, nir_constant); + + nc->value = c->value; + nc->num_elements = c->num_elements; + nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); + for (unsigned i = 0; i < c->num_elements; i++) { + nc->elements[i] = clone_constant(state, c->elements[i], nvar); + } + + return nc; +} + +/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid + * having to deal with locals and globals separately: + */ +static nir_variable * +clone_variable(clone_state *state, const nir_variable *var) +{ + nir_variable *nvar = rzalloc(state->ns, nir_variable); + store_ptr(state, nvar, var); + + nvar->type = var->type; + nvar->name = ralloc_strdup(nvar, var->name); + nvar->data = var->data; + nvar->num_state_slots = var->num_state_slots; + nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots); + memcpy(nvar->state_slots, var->state_slots, + var->num_state_slots * sizeof(nir_state_slot)); + if (var->constant_initializer) { + nvar->constant_initializer = + clone_constant(state, var->constant_initializer, nvar); + } + nvar->interface_type = var->interface_type; + + return nvar; +} + +/* clone list of nir_variable: */ +static void +clone_var_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) +{ + exec_list_make_empty(dst); + foreach_list_typed(nir_variable, var, node, list) { + nir_variable *nvar = clone_variable(state, var); + exec_list_push_tail(dst, &nvar->node); + } +} + +/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create() + * to avoid having to deal with locals and globals separately: + */ +static nir_register * +clone_register(clone_state *state, const nir_register *reg) +{ + nir_register *nreg = rzalloc(state->ns, nir_register); + store_ptr(state, nreg, reg); + + nreg->num_components = reg->num_components; + nreg->num_array_elems = reg->num_array_elems; + nreg->index = reg->index; + nreg->name = ralloc_strdup(nreg, reg->name); + nreg->is_global = reg->is_global; + nreg->is_packed = reg->is_packed; + + /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */ + list_inithead(&nreg->uses); + list_inithead(&nreg->defs); + list_inithead(&nreg->if_uses); + + return nreg; +} + +/* clone list of nir_register: */ +static void +clone_reg_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) +{ + exec_list_make_empty(dst); + foreach_list_typed(nir_register, reg, node, list) { + nir_register *nreg = clone_register(state, reg); + exec_list_push_tail(dst, &nreg->node); + } +} + +static void +__clone_src(clone_state *state, void *ninstr_or_if, + nir_src *nsrc, const nir_src *src) +{ + nsrc->is_ssa = src->is_ssa; + if (src->is_ssa) { + nsrc->ssa = lookup_ptr(state, src->ssa); + } else { + nsrc->reg.reg = lookup_ptr(state, src->reg.reg); + if (src->reg.indirect) { + nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); + __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); + } + nsrc->reg.base_offset = src->reg.base_offset; + } +} + +static void +__clone_dst(clone_state *state, nir_instr *ninstr, + nir_dest *ndst, const nir_dest *dst) +{ + ndst->is_ssa = dst->is_ssa; + if (dst->is_ssa) { + nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name); + store_ptr(state, &ndst->ssa, &dst->ssa); + } else { + ndst->reg.reg = lookup_ptr(state, dst->reg.reg); + if (dst->reg.indirect) { + ndst->reg.indirect = ralloc(ninstr, nir_src); + __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); + } + ndst->reg.base_offset = dst->reg.base_offset; + } +} + +static nir_deref *clone_deref(clone_state *state, const nir_deref *deref, + nir_instr *ninstr, nir_deref *parent); + +static nir_deref_var * +clone_deref_var(clone_state *state, const nir_deref_var *dvar, + nir_instr *ninstr) +{ + nir_variable *nvar = lookup_ptr(state, dvar->var); + nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar); + + if (dvar->deref.child) + ndvar->deref.child = clone_deref(state, dvar->deref.child, + ninstr, &ndvar->deref); + + return ndvar; +} + +static nir_deref_array * +clone_deref_array(clone_state *state, const nir_deref_array *darr, + nir_instr *ninstr, nir_deref *parent) +{ + nir_deref_array *ndarr = nir_deref_array_create(parent); + + ndarr->deref.type = darr->deref.type; + if (darr->deref.child) + ndarr->deref.child = clone_deref(state, darr->deref.child, + ninstr, &ndarr->deref); + + ndarr->deref_array_type = darr->deref_array_type; + ndarr->base_offset = darr->base_offset; + if (ndarr->deref_array_type == nir_deref_array_type_indirect) + __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect); + + return ndarr; +} + +static nir_deref_struct * +clone_deref_struct(clone_state *state, const nir_deref_struct *dstr, + nir_instr *ninstr, nir_deref *parent) +{ + nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index); + + ndstr->deref.type = dstr->deref.type; + if (dstr->deref.child) + ndstr->deref.child = clone_deref(state, dstr->deref.child, + ninstr, &ndstr->deref); + + return ndstr; +} + +static nir_deref * +clone_deref(clone_state *state, const nir_deref *dref, + nir_instr *ninstr, nir_deref *parent) +{ + switch (dref->deref_type) { + case nir_deref_type_array: + return &clone_deref_array(state, nir_deref_as_array(dref), + ninstr, parent)->deref; + case nir_deref_type_struct: + return &clone_deref_struct(state, nir_deref_as_struct(dref), + ninstr, parent)->deref; + default: + unreachable("bad deref type"); + return NULL; + } +} + +static nir_alu_instr * +clone_alu(clone_state *state, const nir_alu_instr *alu) +{ + nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op); + + __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest); + nalu->dest.saturate = alu->dest.saturate; + nalu->dest.write_mask = alu->dest.write_mask; + + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src); + nalu->src[i].negate = alu->src[i].negate; + nalu->src[i].abs = alu->src[i].abs; + memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, + sizeof(nalu->src[i].swizzle)); + } + + return nalu; +} + +static nir_intrinsic_instr * +clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr) +{ + nir_intrinsic_instr *nitr = + nir_intrinsic_instr_create(state->ns, itr->intrinsic); + + unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables; + unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs; + + if (nir_intrinsic_infos[itr->intrinsic].has_dest) + __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest); + + nitr->num_components = itr->num_components; + memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index)); + + for (unsigned i = 0; i < num_variables; i++) { + nitr->variables[i] = clone_deref_var(state, itr->variables[i], + &nitr->instr); + } + + for (unsigned i = 0; i < num_srcs; i++) + __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]); + + return nitr; +} + +static nir_load_const_instr * +clone_load_const(clone_state *state, const nir_load_const_instr *lc) +{ + nir_load_const_instr *nlc = + nir_load_const_instr_create(state->ns, lc->def.num_components); + + memcpy(&nlc->value, &lc->value, sizeof(nlc->value)); + + store_ptr(state, &nlc->def, &lc->def); + + return nlc; +} + +static nir_ssa_undef_instr * +clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa) +{ + nir_ssa_undef_instr *nsa = + nir_ssa_undef_instr_create(state->ns, sa->def.num_components); + + store_ptr(state, &nsa->def, &sa->def); + + return nsa; +} + +static nir_tex_instr * +clone_tex(clone_state *state, const nir_tex_instr *tex) +{ + nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs); + + ntex->sampler_dim = tex->sampler_dim; + ntex->dest_type = tex->dest_type; + ntex->op = tex->op; + __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest); + for (unsigned i = 0; i < ntex->num_srcs; i++) { + ntex->src[i].src_type = tex->src[i].src_type; + __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src); + } + ntex->coord_components = tex->coord_components; + ntex->is_array = tex->is_array; + ntex->is_shadow = tex->is_shadow; + ntex->is_new_style_shadow = tex->is_new_style_shadow; + memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset)); + ntex->component = tex->component; + ntex->sampler_index = tex->sampler_index; + ntex->sampler_array_size = tex->sampler_array_size; + if (tex->sampler) + ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr); + + return ntex; +} + +static nir_phi_instr * +clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk) +{ + nir_phi_instr *nphi = nir_phi_instr_create(state->ns); + + __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest); + + /* Cloning a phi node is a bit different from other instructions. The + * sources of phi instructions are the only time where we can use an SSA + * def before it is defined. In order to handle this, we just copy over + * the sources from the old phi instruction directly and then fix them up + * in a second pass once all the instrutions in the function have been + * properly cloned. + * + * In order to ensure that the copied sources (which are the same as the + * old phi instruction's sources for now) don't get inserted into the old + * shader's use-def lists, we have to add the phi instruction *before* we + * set up its sources. + */ + nir_instr_insert_after_block(nblk, &nphi->instr); + + foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { + nir_phi_src *nsrc = ralloc(nphi, nir_phi_src); + + /* Just copy the old source for now. */ + memcpy(nsrc, src, sizeof(*src)); + + /* Since we're not letting nir_insert_instr handle use/def stuff for us, + * we have to set the parent_instr manually. It doesn't really matter + * when we do it, so we might as well do it here. + */ + nsrc->src.parent_instr = &nphi->instr; + + /* Stash it in the list of phi sources. We'll walk this list and fix up + * sources at the very end of clone_function_impl. + */ + list_add(&nsrc->src.use_link, &state->phi_srcs); + + exec_list_push_tail(&nphi->srcs, &nsrc->node); + } + + return nphi; +} + +static nir_jump_instr * +clone_jump(clone_state *state, const nir_jump_instr *jmp) +{ + nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type); + + return njmp; +} + +static nir_call_instr * +clone_call(clone_state *state, const nir_call_instr *call) +{ + nir_function *ncallee = lookup_ptr(state, call->callee); + nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee); + + for (unsigned i = 0; i < ncall->num_params; i++) + ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr); + + ncall->return_deref = clone_deref_var(state, call->return_deref, + &ncall->instr); + + return ncall; +} + +static nir_instr * +clone_instr(clone_state *state, const nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + return &clone_alu(state, nir_instr_as_alu(instr))->instr; + case nir_instr_type_intrinsic: + return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr; + case nir_instr_type_load_const: + return &clone_load_const(state, nir_instr_as_load_const(instr))->instr; + case nir_instr_type_ssa_undef: + return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr; + case nir_instr_type_tex: + return &clone_tex(state, nir_instr_as_tex(instr))->instr; + case nir_instr_type_phi: + unreachable("Cannot clone phis with clone_instr"); + case nir_instr_type_jump: + return &clone_jump(state, nir_instr_as_jump(instr))->instr; + case nir_instr_type_call: + return &clone_call(state, nir_instr_as_call(instr))->instr; + case nir_instr_type_parallel_copy: + unreachable("Cannot clone parallel copies"); + default: + unreachable("bad instr type"); + return NULL; + } +} + +static nir_block * +clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) +{ + /* Don't actually create a new block. Just use the one from the tail of + * the list. NIR guarantees that the tail of the list is a block and that + * no two blocks are side-by-side in the IR; It should be empty. + */ + nir_block *nblk = + exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); + assert(nblk->cf_node.type == nir_cf_node_block); + assert(exec_list_is_empty(&nblk->instr_list)); + + /* We need this for phi sources */ + store_ptr(state, nblk, blk); + + nir_foreach_instr(blk, instr) { + if (instr->type == nir_instr_type_phi) { + /* Phi instructions are a bit of a special case when cloning because + * we don't want inserting the instruction to automatically handle + * use/defs for us. Instead, we need to wait until all the + * blocks/instructions are in so that we can set their sources up. + */ + clone_phi(state, nir_instr_as_phi(instr), nblk); + } else { + nir_instr *ninstr = clone_instr(state, instr); + nir_instr_insert_after_block(nblk, ninstr); + } + } + + return nblk; +} + +static void +clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list); + +static nir_if * +clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i) +{ + nir_if *ni = nir_if_create(state->ns); + + __clone_src(state, ni, &ni->condition, &i->condition); + + nir_cf_node_insert_end(cf_list, &ni->cf_node); + + clone_cf_list(state, &ni->then_list, &i->then_list); + clone_cf_list(state, &ni->else_list, &i->else_list); + + return ni; +} + +static nir_loop * +clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop) +{ + nir_loop *nloop = nir_loop_create(state->ns); + + nir_cf_node_insert_end(cf_list, &nloop->cf_node); + + clone_cf_list(state, &nloop->body, &loop->body); + + return nloop; +} + +/* clone list of nir_cf_node: */ +static void +clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) +{ + foreach_list_typed(nir_cf_node, cf, node, list) { + switch (cf->type) { + case nir_cf_node_block: + clone_block(state, dst, nir_cf_node_as_block(cf)); + break; + case nir_cf_node_if: + clone_if(state, dst, nir_cf_node_as_if(cf)); + break; + case nir_cf_node_loop: + clone_loop(state, dst, nir_cf_node_as_loop(cf)); + break; + default: + unreachable("bad cf type"); + } + } +} + +static nir_function_impl * +clone_function_impl(clone_state *state, const nir_function_impl *fi, + nir_function *nfxn) +{ + nir_function_impl *nfi = nir_function_impl_create(nfxn); + + clone_var_list(state, &nfi->locals, &fi->locals); + clone_reg_list(state, &nfi->registers, &fi->registers); + nfi->reg_alloc = fi->reg_alloc; + + nfi->num_params = fi->num_params; + nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); + for (unsigned i = 0; i < fi->num_params; i++) { + nfi->params[i] = lookup_ptr(state, fi->params[i]); + } + nfi->return_var = lookup_ptr(state, fi->return_var); + + assert(list_empty(&state->phi_srcs)); + + clone_cf_list(state, &nfi->body, &fi->body); + + /* After we've cloned almost everything, we have to walk the list of phi + * sources and fix them up. Thanks to loops, the block and SSA value for a + * phi source may not be defined when we first encounter it. Instead, we + * add it to the phi_srcs list and we fix it up here. + */ + list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { + src->pred = lookup_ptr(state, src->pred); + assert(src->src.is_ssa); + src->src.ssa = lookup_ptr(state, src->src.ssa); + + /* Remove from this list and place in the uses of the SSA def */ + list_del(&src->src.use_link); + list_addtail(&src->src.use_link, &src->src.ssa->uses); + } + assert(list_empty(&state->phi_srcs)); + + /* All metadata is invalidated in the cloning process */ + nfi->valid_metadata = 0; + + return nfi; +} + +static nir_function * +clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) +{ + assert(ns == state->ns); + nir_function *nfxn = nir_function_create(ns, fxn->name); + + /* Needed for call instructions */ + store_ptr(state, nfxn, fxn); + + nfxn->num_params = fxn->num_params; + nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params); + memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params); + + nfxn->return_type = fxn->return_type; + + /* At first glance, it looks like we should clone the function_impl here. + * However, call instructions need to be able to reference at least the + * function and those will get processed as we clone the function_impl's. + * We stop here and do function_impls as a second pass. + */ + + return nfxn; +} + +nir_shader * +nir_shader_clone(void *mem_ctx, const nir_shader *s) +{ + clone_state state; + init_clone_state(&state); + + nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options); + state.ns = ns; + + clone_var_list(&state, &ns->uniforms, &s->uniforms); + clone_var_list(&state, &ns->inputs, &s->inputs); + clone_var_list(&state, &ns->outputs, &s->outputs); + clone_var_list(&state, &ns->globals, &s->globals); + clone_var_list(&state, &ns->system_values, &s->system_values); + + /* Go through and clone functions */ + foreach_list_typed(nir_function, fxn, node, &s->functions) + clone_function(&state, fxn, ns); + + /* Only after all functions are cloned can we clone the actual function + * implementations. This is because nir_call_instr's need to reference the + * functions of other functions and we don't know what order the functions + * will have in the list. + */ + nir_foreach_function(s, fxn) { + nir_function *nfxn = lookup_ptr(&state, fxn); + clone_function_impl(&state, fxn->impl, nfxn); + } + + clone_reg_list(&state, &ns->registers, &s->registers); + ns->reg_alloc = s->reg_alloc; + + ns->info = s->info; + ns->info.name = ralloc_strdup(ns, ns->info.name); + if (ns->info.label) + ns->info.label = ralloc_strdup(ns, ns->info.label); + + ns->num_inputs = s->num_inputs; + ns->num_uniforms = s->num_uniforms; + ns->num_outputs = s->num_outputs; + + free_clone_state(&state); + + return ns; +} diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h new file mode 100644 index 00000000000..97997f2e514 --- /dev/null +++ b/src/compiler/nir/nir_constant_expressions.h @@ -0,0 +1,31 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, + nir_const_value *src); diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py new file mode 100644 index 00000000000..32784f6398d --- /dev/null +++ b/src/compiler/nir/nir_constant_expressions.py @@ -0,0 +1,336 @@ +#! /usr/bin/python2 +template = """\ +/* + * Copyright (C) 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + */ + +#include +#include "main/core.h" +#include "util/rounding.h" /* for _mesa_roundeven */ +#include "util/half_float.h" +#include "nir_constant_expressions.h" + +/** + * Evaluate one component of packSnorm4x8. + */ +static uint8_t +pack_snorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packSnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + * + * We must first cast the float to an int, because casting a negative + * float to a uint is undefined. + */ + return (uint8_t) (int) + _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); +} + +/** + * Evaluate one component of packSnorm2x16. + */ +static uint16_t +pack_snorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packSnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) + * + * We must first cast the float to an int, because casting a negative + * float to a uint is undefined. + */ + return (uint16_t) (int) + _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); +} + +/** + * Evaluate one component of unpackSnorm4x8. + */ +static float +unpack_snorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackSnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + */ + return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component of unpackSnorm2x16. + */ +static float +unpack_snorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackSnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) + */ + return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component packUnorm4x8. + */ +static uint8_t +pack_unorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packUnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + */ + return (uint8_t) (int) + _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); +} + +/** + * Evaluate one component packUnorm2x16. + */ +static uint16_t +pack_unorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packUnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) + */ + return (uint16_t) (int) + _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); +} + +/** + * Evaluate one component of unpackUnorm4x8. + */ +static float +unpack_unorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackUnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + */ + return (float) u / 255.0f; +} + +/** + * Evaluate one component of unpackUnorm2x16. + */ +static float +unpack_unorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackUnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm2x16: f / 65535.0 + */ + return (float) u / 65535.0f; +} + +/** + * Evaluate one component of packHalf2x16. + */ +static uint16_t +pack_half_1x16(float x) +{ + return _mesa_float_to_half(x); +} + +/** + * Evaluate one component of unpackHalf2x16. + */ +static float +unpack_half_1x16(uint16_t u) +{ + return _mesa_half_to_float(u); +} + +/* Some typed vector structures to make things like src0.y work */ +% for type in ["float", "int", "uint", "bool"]: +struct ${type}_vec { + ${type} x; + ${type} y; + ${type} z; + ${type} w; +}; +% endfor + +% for name, op in sorted(opcodes.iteritems()): +static nir_const_value +evaluate_${name}(unsigned num_components, nir_const_value *_src) +{ + nir_const_value _dst_val = { { {0, 0, 0, 0} } }; + + ## For each non-per-component input, create a variable srcN that + ## contains x, y, z, and w elements which are filled in with the + ## appropriately-typed values. + % for j in range(op.num_inputs): + % if op.input_sizes[j] == 0: + <% continue %> + % elif "src" + str(j) not in op.const_expr: + ## Avoid unused variable warnings + <% continue %> + %endif + + struct ${op.input_types[j]}_vec src${j} = { + % for k in range(op.input_sizes[j]): + % if op.input_types[j] == "bool": + _src[${j}].u[${k}] != 0, + % else: + _src[${j}].${op.input_types[j][:1]}[${k}], + % endif + % endfor + }; + % endfor + + % if op.output_size == 0: + ## For per-component instructions, we need to iterate over the + ## components and apply the constant expression one component + ## at a time. + for (unsigned _i = 0; _i < num_components; _i++) { + ## For each per-component input, create a variable srcN that + ## contains the value of the current (_i'th) component. + % for j in range(op.num_inputs): + % if op.input_sizes[j] != 0: + <% continue %> + % elif "src" + str(j) not in op.const_expr: + ## Avoid unused variable warnings + <% continue %> + % elif op.input_types[j] == "bool": + bool src${j} = _src[${j}].u[_i] != 0; + % else: + ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i]; + % endif + % endfor + + ## Create an appropriately-typed variable dst and assign the + ## result of the const_expr to it. If const_expr already contains + ## writes to dst, just include const_expr directly. + % if "dst" in op.const_expr: + ${op.output_type} dst; + ${op.const_expr} + % else: + ${op.output_type} dst = ${op.const_expr}; + % endif + + ## Store the current component of the actual destination to the + ## value of dst. + % if op.output_type == "bool": + ## Sanitize the C value to a proper NIR bool + _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; + % else: + _dst_val.${op.output_type[:1]}[_i] = dst; + % endif + } + % else: + ## In the non-per-component case, create a struct dst with + ## appropriately-typed elements x, y, z, and w and assign the result + ## of the const_expr to all components of dst, or include the + ## const_expr directly if it writes to dst already. + struct ${op.output_type}_vec dst; + + % if "dst" in op.const_expr: + ${op.const_expr} + % else: + ## Splat the value to all components. This way expressions which + ## write the same value to all components don't need to explicitly + ## write to dest. One such example is fnoise which has a + ## const_expr of 0.0f. + dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; + % endif + + ## For each component in the destination, copy the value of dst to + ## the actual destination. + % for k in range(op.output_size): + % if op.output_type == "bool": + ## Sanitize the C value to a proper NIR bool + _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; + % else: + _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]}; + % endif + % endfor + % endif + + return _dst_val; +} +% endfor + +nir_const_value +nir_eval_const_opcode(nir_op op, unsigned num_components, + nir_const_value *src) +{ + switch (op) { +% for name in sorted(opcodes.iterkeys()): + case nir_op_${name}: { + return evaluate_${name}(num_components, src); + break; + } +% endfor + default: + unreachable("shouldn't get here"); + } +}""" + +from nir_opcodes import opcodes +from mako.template import Template + +print Template(template).render(opcodes=opcodes) diff --git a/src/compiler/nir/nir_control_flow.c b/src/compiler/nir/nir_control_flow.c new file mode 100644 index 00000000000..96395a41615 --- /dev/null +++ b/src/compiler/nir/nir_control_flow.c @@ -0,0 +1,808 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir_control_flow_private.h" + +/** + * \name Control flow modification + * + * These functions modify the control flow tree while keeping the control flow + * graph up-to-date. The invariants respected are: + * 1. Each then statement, else statement, or loop body must have at least one + * control flow node. + * 2. Each if-statement and loop must have one basic block before it and one + * after. + * 3. Two basic blocks cannot be directly next to each other. + * 4. If a basic block has a jump instruction, there must be only one and it + * must be at the end of the block. + * 5. The CFG must always be connected - this means that we must insert a fake + * CFG edge for loops with no break statement. + * + * The purpose of the second one is so that we have places to insert code during + * GCM, as well as eliminating the possibility of critical edges. + */ +/*@{*/ + +static bool +block_ends_in_jump(nir_block *block) +{ + return !exec_list_is_empty(&block->instr_list) && + nir_block_last_instr(block)->type == nir_instr_type_jump; +} + +static inline void +block_add_pred(nir_block *block, nir_block *pred) +{ + _mesa_set_add(block->predecessors, pred); +} + +static inline void +block_remove_pred(nir_block *block, nir_block *pred) +{ + struct set_entry *entry = _mesa_set_search(block->predecessors, pred); + + assert(entry); + + _mesa_set_remove(block->predecessors, entry); +} + +static void +link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2) +{ + pred->successors[0] = succ1; + if (succ1 != NULL) + block_add_pred(succ1, pred); + + pred->successors[1] = succ2; + if (succ2 != NULL) + block_add_pred(succ2, pred); +} + +static void +unlink_blocks(nir_block *pred, nir_block *succ) +{ + if (pred->successors[0] == succ) { + pred->successors[0] = pred->successors[1]; + pred->successors[1] = NULL; + } else { + assert(pred->successors[1] == succ); + pred->successors[1] = NULL; + } + + block_remove_pred(succ, pred); +} + +static void +unlink_block_successors(nir_block *block) +{ + if (block->successors[1] != NULL) + unlink_blocks(block, block->successors[1]); + if (block->successors[0] != NULL) + unlink_blocks(block, block->successors[0]); +} + +static void +link_non_block_to_block(nir_cf_node *node, nir_block *block) +{ + if (node->type == nir_cf_node_if) { + /* + * We're trying to link an if to a block after it; this just means linking + * the last block of the then and else branches. + */ + + nir_if *if_stmt = nir_cf_node_as_if(node); + + nir_cf_node *last_then = nir_if_last_then_node(if_stmt); + assert(last_then->type == nir_cf_node_block); + nir_block *last_then_block = nir_cf_node_as_block(last_then); + + nir_cf_node *last_else = nir_if_last_else_node(if_stmt); + assert(last_else->type == nir_cf_node_block); + nir_block *last_else_block = nir_cf_node_as_block(last_else); + + if (!block_ends_in_jump(last_then_block)) { + unlink_block_successors(last_then_block); + link_blocks(last_then_block, block, NULL); + } + + if (!block_ends_in_jump(last_else_block)) { + unlink_block_successors(last_else_block); + link_blocks(last_else_block, block, NULL); + } + } else { + assert(node->type == nir_cf_node_loop); + + /* + * We can only get to this codepath if we're inserting a new loop, or + * at least a loop with no break statements; we can't insert break + * statements into a loop when we haven't inserted it into the CFG + * because we wouldn't know which block comes after the loop + * and therefore, which block should be the successor of the block with + * the break). Therefore, we need to insert a fake edge (see invariant + * #5). + */ + + nir_loop *loop = nir_cf_node_as_loop(node); + + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + + last_block->successors[1] = block; + block_add_pred(block, last_block); + } +} + +static void +link_block_to_non_block(nir_block *block, nir_cf_node *node) +{ + if (node->type == nir_cf_node_if) { + /* + * We're trying to link a block to an if after it; this just means linking + * the block to the first block of the then and else branches. + */ + + nir_if *if_stmt = nir_cf_node_as_if(node); + + nir_cf_node *first_then = nir_if_first_then_node(if_stmt); + assert(first_then->type == nir_cf_node_block); + nir_block *first_then_block = nir_cf_node_as_block(first_then); + + nir_cf_node *first_else = nir_if_first_else_node(if_stmt); + assert(first_else->type == nir_cf_node_block); + nir_block *first_else_block = nir_cf_node_as_block(first_else); + + unlink_block_successors(block); + link_blocks(block, first_then_block, first_else_block); + } else { + /* + * For similar reasons as the corresponding case in + * link_non_block_to_block(), don't worry about if the loop header has + * any predecessors that need to be unlinked. + */ + + assert(node->type == nir_cf_node_loop); + + nir_loop *loop = nir_cf_node_as_loop(node); + + nir_cf_node *loop_header = nir_loop_first_cf_node(loop); + assert(loop_header->type == nir_cf_node_block); + nir_block *loop_header_block = nir_cf_node_as_block(loop_header); + + unlink_block_successors(block); + link_blocks(block, loop_header_block, NULL); + } + +} + +/** + * Replace a block's successor with a different one. + */ +static void +replace_successor(nir_block *block, nir_block *old_succ, nir_block *new_succ) +{ + if (block->successors[0] == old_succ) { + block->successors[0] = new_succ; + } else { + assert(block->successors[1] == old_succ); + block->successors[1] = new_succ; + } + + block_remove_pred(old_succ, block); + block_add_pred(new_succ, block); +} + +/** + * Takes a basic block and inserts a new empty basic block before it, making its + * predecessors point to the new block. This essentially splits the block into + * an empty header and a body so that another non-block CF node can be inserted + * between the two. Note that this does *not* link the two basic blocks, so + * some kind of cleanup *must* be performed after this call. + */ + +static nir_block * +split_block_beginning(nir_block *block) +{ + nir_block *new_block = nir_block_create(ralloc_parent(block)); + new_block->cf_node.parent = block->cf_node.parent; + exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node); + + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + replace_successor(pred, block, new_block); + } + + /* Any phi nodes must stay part of the new block, or else their + * sourcse will be messed up. This will reverse the order of the phi's, but + * order shouldn't matter. + */ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + exec_node_remove(&instr->node); + instr->block = new_block; + exec_list_push_head(&new_block->instr_list, &instr->node); + } + + return new_block; +} + +static void +rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src(phi, src) { + if (src->pred == old_pred) { + src->pred = new_pred; + break; + } + } + } +} + +static void +insert_phi_undef(nir_block *block, nir_block *pred) +{ + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(ralloc_parent(phi), + phi->dest.ssa.num_components); + nir_instr_insert_before_cf_list(&impl->body, &undef->instr); + nir_phi_src *src = ralloc(phi, nir_phi_src); + src->pred = pred; + src->src.parent_instr = &phi->instr; + src->src.is_ssa = true; + src->src.ssa = &undef->def; + + list_addtail(&src->src.use_link, &undef->def.uses); + + exec_list_push_tail(&phi->srcs, &src->node); + } +} + +/** + * Moves the successors of source to the successors of dest, leaving both + * successors of source NULL. + */ + +static void +move_successors(nir_block *source, nir_block *dest) +{ + nir_block *succ1 = source->successors[0]; + nir_block *succ2 = source->successors[1]; + + if (succ1) { + unlink_blocks(source, succ1); + rewrite_phi_preds(succ1, source, dest); + } + + if (succ2) { + unlink_blocks(source, succ2); + rewrite_phi_preds(succ2, source, dest); + } + + unlink_block_successors(dest); + link_blocks(dest, succ1, succ2); +} + +/* Given a basic block with no successors that has been inserted into the + * control flow tree, gives it the successors it would normally have assuming + * it doesn't end in a jump instruction. Also inserts phi sources with undefs + * if necessary. + */ +static void +block_add_normal_succs(nir_block *block) +{ + if (exec_node_is_tail_sentinel(block->cf_node.node.next)) { + nir_cf_node *parent = block->cf_node.parent; + if (parent->type == nir_cf_node_if) { + nir_cf_node *next = nir_cf_node_next(parent); + assert(next->type == nir_cf_node_block); + nir_block *next_block = nir_cf_node_as_block(next); + + link_blocks(block, next_block, NULL); + } else { + assert(parent->type == nir_cf_node_loop); + nir_loop *loop = nir_cf_node_as_loop(parent); + + nir_cf_node *head = nir_loop_first_cf_node(loop); + assert(head->type == nir_cf_node_block); + nir_block *head_block = nir_cf_node_as_block(head); + + link_blocks(block, head_block, NULL); + insert_phi_undef(head_block, block); + } + } else { + nir_cf_node *next = nir_cf_node_next(&block->cf_node); + if (next->type == nir_cf_node_if) { + nir_if *next_if = nir_cf_node_as_if(next); + + nir_cf_node *first_then = nir_if_first_then_node(next_if); + assert(first_then->type == nir_cf_node_block); + nir_block *first_then_block = nir_cf_node_as_block(first_then); + + nir_cf_node *first_else = nir_if_first_else_node(next_if); + assert(first_else->type == nir_cf_node_block); + nir_block *first_else_block = nir_cf_node_as_block(first_else); + + link_blocks(block, first_then_block, first_else_block); + } else { + assert(next->type == nir_cf_node_loop); + nir_loop *next_loop = nir_cf_node_as_loop(next); + + nir_cf_node *first = nir_loop_first_cf_node(next_loop); + assert(first->type == nir_cf_node_block); + nir_block *first_block = nir_cf_node_as_block(first); + + link_blocks(block, first_block, NULL); + insert_phi_undef(first_block, block); + } + } +} + +static nir_block * +split_block_end(nir_block *block) +{ + nir_block *new_block = nir_block_create(ralloc_parent(block)); + new_block->cf_node.parent = block->cf_node.parent; + exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node); + + if (block_ends_in_jump(block)) { + /* Figure out what successor block would've had if it didn't have a jump + * instruction, and make new_block have that successor. + */ + block_add_normal_succs(new_block); + } else { + move_successors(block, new_block); + } + + return new_block; +} + +static nir_block * +split_block_before_instr(nir_instr *instr) +{ + assert(instr->type != nir_instr_type_phi); + nir_block *new_block = split_block_beginning(instr->block); + + nir_foreach_instr_safe(instr->block, cur_instr) { + if (cur_instr == instr) + break; + + exec_node_remove(&cur_instr->node); + cur_instr->block = new_block; + exec_list_push_tail(&new_block->instr_list, &cur_instr->node); + } + + return new_block; +} + +/* Splits a basic block at the point specified by the cursor. The "before" and + * "after" arguments are filled out with the blocks resulting from the split + * if non-NULL. Note that the "beginning" of the block is actually interpreted + * as before the first non-phi instruction, and it's illegal to split a block + * before a phi instruction. + */ + +static void +split_block_cursor(nir_cursor cursor, + nir_block **_before, nir_block **_after) +{ + nir_block *before, *after; + switch (cursor.option) { + case nir_cursor_before_block: + after = cursor.block; + before = split_block_beginning(cursor.block); + break; + + case nir_cursor_after_block: + before = cursor.block; + after = split_block_end(cursor.block); + break; + + case nir_cursor_before_instr: + after = cursor.instr->block; + before = split_block_before_instr(cursor.instr); + break; + + case nir_cursor_after_instr: + /* We lower this to split_block_before_instr() so that we can keep the + * after-a-jump-instr case contained to split_block_end(). + */ + if (nir_instr_is_last(cursor.instr)) { + before = cursor.instr->block; + after = split_block_end(cursor.instr->block); + } else { + after = cursor.instr->block; + before = split_block_before_instr(nir_instr_next(cursor.instr)); + } + break; + + default: + unreachable("not reached"); + } + + if (_before) + *_before = before; + if (_after) + *_after = after; +} + +/** + * Inserts a non-basic block between two basic blocks and links them together. + */ + +static void +insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after) +{ + node->parent = before->cf_node.parent; + exec_node_insert_after(&before->cf_node.node, &node->node); + link_block_to_non_block(before, node); + link_non_block_to_block(node, after); +} + +/* walk up the control flow tree to find the innermost enclosed loop */ +static nir_loop * +nearest_loop(nir_cf_node *node) +{ + while (node->type != nir_cf_node_loop) { + node = node->parent; + } + + return nir_cf_node_as_loop(node); +} + +/* + * update the CFG after a jump instruction has been added to the end of a block + */ + +void +nir_handle_add_jump(nir_block *block) +{ + nir_instr *instr = nir_block_last_instr(block); + nir_jump_instr *jump_instr = nir_instr_as_jump(instr); + + unlink_block_successors(block); + + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_metadata_preserve(impl, nir_metadata_none); + + if (jump_instr->type == nir_jump_break || + jump_instr->type == nir_jump_continue) { + nir_loop *loop = nearest_loop(&block->cf_node); + + if (jump_instr->type == nir_jump_continue) { + nir_cf_node *first_node = nir_loop_first_cf_node(loop); + assert(first_node->type == nir_cf_node_block); + nir_block *first_block = nir_cf_node_as_block(first_node); + link_blocks(block, first_block, NULL); + } else { + nir_cf_node *after = nir_cf_node_next(&loop->cf_node); + assert(after->type == nir_cf_node_block); + nir_block *after_block = nir_cf_node_as_block(after); + link_blocks(block, after_block, NULL); + + /* If we inserted a fake link, remove it */ + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + if (last_block->successors[1] != NULL) + unlink_blocks(last_block, after_block); + } + } else { + assert(jump_instr->type == nir_jump_return); + link_blocks(block, impl->end_block, NULL); + } +} + +static void +remove_phi_src(nir_block *block, nir_block *pred) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_foreach_phi_src_safe(phi, src) { + if (src->pred == pred) { + list_del(&src->src.use_link); + exec_node_remove(&src->node); + } + } + } +} + +/* Removes the successor of a block with a jump, and inserts a fake edge for + * infinite loops. Note that the jump to be eliminated may be free-floating. + */ + +static void +unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors) +{ + nir_block *next = block->successors[0]; + + if (block->successors[0]) + remove_phi_src(block->successors[0], block); + if (block->successors[1]) + remove_phi_src(block->successors[1], block); + + unlink_block_successors(block); + if (add_normal_successors) + block_add_normal_succs(block); + + /* If we've just removed a break, and the block we were jumping to (after + * the loop) now has zero predecessors, we've created a new infinite loop. + * + * NIR doesn't allow blocks (other than the start block) to have zero + * predecessors. In particular, dominance assumes all blocks are reachable. + * So, we insert a "fake link" by making successors[1] point after the loop. + * + * Note that we have to do this after unlinking/recreating the block's + * successors. If we removed a "break" at the end of the loop, then + * block == last_block, so block->successors[0] would already be "next", + * and adding a fake link would create two identical successors. Doing + * this afterward works, as we'll have changed block->successors[0] to + * be the top of the loop. + */ + if (type == nir_jump_break && next->predecessors->entries == 0) { + nir_loop *loop = + nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node)); + + /* insert fake link */ + nir_cf_node *last = nir_loop_last_cf_node(loop); + assert(last->type == nir_cf_node_block); + nir_block *last_block = nir_cf_node_as_block(last); + + last_block->successors[1] = next; + block_add_pred(next, last_block); + } +} + +void +nir_handle_remove_jump(nir_block *block, nir_jump_type type) +{ + unlink_jump(block, type, true); + + nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); + nir_metadata_preserve(impl, nir_metadata_none); +} + +static void +update_if_uses(nir_cf_node *node) +{ + if (node->type != nir_cf_node_if) + return; + + nir_if *if_stmt = nir_cf_node_as_if(node); + + if_stmt->condition.parent_if = if_stmt; + if (if_stmt->condition.is_ssa) { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.ssa->if_uses); + } else { + list_addtail(&if_stmt->condition.use_link, + &if_stmt->condition.reg.reg->if_uses); + } +} + +/** + * Stitch two basic blocks together into one. The aggregate must have the same + * predecessors as the first and the same successors as the second. + */ + +static void +stitch_blocks(nir_block *before, nir_block *after) +{ + /* + * We move after into before, so we have to deal with up to 2 successors vs. + * possibly a large number of predecessors. + * + * TODO: special case when before is empty and after isn't? + */ + + if (block_ends_in_jump(before)) { + assert(exec_list_is_empty(&after->instr_list)); + if (after->successors[0]) + remove_phi_src(after->successors[0], after); + if (after->successors[1]) + remove_phi_src(after->successors[1], after); + unlink_block_successors(after); + exec_node_remove(&after->cf_node.node); + } else { + move_successors(after, before); + + foreach_list_typed(nir_instr, instr, node, &after->instr_list) { + instr->block = before; + } + + exec_list_append(&before->instr_list, &after->instr_list); + exec_node_remove(&after->cf_node.node); + } +} + +void +nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node) +{ + nir_block *before, *after; + + split_block_cursor(cursor, &before, &after); + + if (node->type == nir_cf_node_block) { + nir_block *block = nir_cf_node_as_block(node); + exec_node_insert_after(&before->cf_node.node, &block->cf_node.node); + block->cf_node.parent = before->cf_node.parent; + /* stitch_blocks() assumes that any block that ends with a jump has + * already been setup with the correct successors, so we need to set + * up jumps here as the block is being inserted. + */ + if (block_ends_in_jump(block)) + nir_handle_add_jump(block); + + stitch_blocks(block, after); + stitch_blocks(before, block); + } else { + update_if_uses(node); + insert_non_block(before, node, after); + } +} + +static bool +replace_ssa_def_uses(nir_ssa_def *def, void *void_impl) +{ + nir_function_impl *impl = void_impl; + void *mem_ctx = ralloc_parent(impl); + + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(mem_ctx, def->num_components); + nir_instr_insert_before_cf_list(&impl->body, &undef->instr); + nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def)); + return true; +} + +static void +cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl) +{ + switch (node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(node); + /* We need to walk the instructions and clean up defs/uses */ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_jump) { + nir_jump_type jump_type = nir_instr_as_jump(instr)->type; + unlink_jump(block, jump_type, false); + } else { + nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl); + nir_instr_remove(instr); + } + } + break; + } + + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list) + cleanup_cf_node(child, impl); + foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) + cleanup_cf_node(child, impl); + + list_del(&if_stmt->condition.use_link); + break; + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + foreach_list_typed(nir_cf_node, child, node, &loop->body) + cleanup_cf_node(child, impl); + break; + } + case nir_cf_node_function: { + nir_function_impl *impl = nir_cf_node_as_function(node); + foreach_list_typed(nir_cf_node, child, node, &impl->body) + cleanup_cf_node(child, impl); + break; + } + default: + unreachable("Invalid CF node type"); + } +} + +void +nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end) +{ + nir_block *block_begin, *block_end, *block_before, *block_after; + + /* In the case where begin points to an instruction in some basic block and + * end points to the end of the same basic block, we rely on the fact that + * splitting on an instruction moves earlier instructions into a new basic + * block. If the later instructions were moved instead, then the end cursor + * would be pointing to the same place that begin used to point to, which + * is obviously not what we want. + */ + split_block_cursor(begin, &block_before, &block_begin); + split_block_cursor(end, &block_end, &block_after); + + extracted->impl = nir_cf_node_get_function(&block_begin->cf_node); + exec_list_make_empty(&extracted->list); + + /* Dominance and other block-related information is toast. */ + nir_metadata_preserve(extracted->impl, nir_metadata_none); + + nir_cf_node *cf_node = &block_begin->cf_node; + nir_cf_node *cf_node_end = &block_end->cf_node; + while (true) { + nir_cf_node *next = nir_cf_node_next(cf_node); + + exec_node_remove(&cf_node->node); + cf_node->parent = NULL; + exec_list_push_tail(&extracted->list, &cf_node->node); + + if (cf_node == cf_node_end) + break; + + cf_node = next; + } + + stitch_blocks(block_before, block_after); +} + +void +nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor) +{ + nir_block *before, *after; + + split_block_cursor(cursor, &before, &after); + + foreach_list_typed_safe(nir_cf_node, node, node, &cf_list->list) { + exec_node_remove(&node->node); + node->parent = before->cf_node.parent; + exec_node_insert_node_before(&after->cf_node.node, &node->node); + } + + stitch_blocks(before, + nir_cf_node_as_block(nir_cf_node_next(&before->cf_node))); + stitch_blocks(nir_cf_node_as_block(nir_cf_node_prev(&after->cf_node)), + after); +} + +void +nir_cf_delete(nir_cf_list *cf_list) +{ + foreach_list_typed(nir_cf_node, node, node, &cf_list->list) { + cleanup_cf_node(node, cf_list->impl); + } +} diff --git a/src/compiler/nir/nir_control_flow.h b/src/compiler/nir/nir_control_flow.h new file mode 100644 index 00000000000..b71382fc597 --- /dev/null +++ b/src/compiler/nir/nir_control_flow.h @@ -0,0 +1,162 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +/** NIR Control Flow Modification + * + * This file contains various API's that make modifying control flow in NIR, + * while maintaining the invariants checked by the validator, much easier. + * There are two parts to this: + * + * 1. Inserting control flow (if's and loops) in various places, for creating + * IR either from scratch or as part of some lowering pass. + * 2. Taking existing pieces of the IR and either moving them around or + * deleting them. + */ + +/** Control flow insertion. */ + +/** puts a control flow node where the cursor is */ +void nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node); + +/** puts a control flow node immediately after another control flow node */ +static inline void +nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after) +{ + nir_cf_node_insert(nir_after_cf_node(node), after); +} + +/** puts a control flow node immediately before another control flow node */ +static inline void +nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before) +{ + nir_cf_node_insert(nir_before_cf_node(node), before); +} + +/** puts a control flow node at the beginning of a list from an if, loop, or function */ +static inline void +nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node) +{ + nir_cf_node_insert(nir_before_cf_list(list), node); +} + +/** puts a control flow node at the end of a list from an if, loop, or function */ +static inline void +nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node) +{ + nir_cf_node_insert(nir_after_cf_list(list), node); +} + + +/** Control flow motion. + * + * These functions let you take a part of a control flow list (basically + * equivalent to a series of statement in GLSL) and "extract" it from the IR, + * so that it's a free-floating piece of IR that can be either re-inserted + * somewhere else or deleted entirely. A few notes on using it: + * + * 1. Phi nodes are considered attached to the piece of control flow that + * their sources come from. There are three places where phi nodes can + * occur, which are the three places where a block can have multiple + * predecessors: + * + * 1) After an if statement, if neither branch ends in a jump. + * 2) After a loop, if there are multiple break's. + * 3) At the beginning of a loop. + * + * For #1, the phi node is considered to be part of the if, and for #2 and + * #3 the phi node is considered to be part of the loop. This allows us to + * keep phi's intact, but it means that phi nodes cannot be separated from + * the control flow they come from. For example, extracting an if without + * extracting all the phi nodes after it is not allowed, and neither is + * extracting only some of the phi nodes at the beginning of a block. It + * also means that extracting from the beginning of a basic block actually + * means extracting from the first non-phi instruction, since there's no + * situation where extracting phi nodes without extracting what comes + * before them makes any sense. + * + * 2. Phi node sources are guaranteed to remain valid, meaning that they still + * correspond one-to-one with the predecessors of the basic block they're + * part of. In addition, the original sources will be preserved unless they + * correspond to a break or continue that was deleted. However, no attempt + * is made to ensure that SSA form is maintained. In particular, it is + * *not* guaranteed that definitions of SSA values will dominate all their + * uses after all is said and done. Either the caller must ensure that this + * is the case, or it must insert extra phi nodes to restore SSA. + * + * 3. It is invalid to move a piece of IR with a break/continue outside of the + * loop it references. Doing this will result in invalid + * successors/predecessors and phi node sources. + * + * 4. It is invalid to move a piece of IR from one function implementation to + * another. + * + * 5. Extracting a control flow list will leave lots of dangling references to + * and from other pieces of the IR. It also leaves things in a not 100% + * consistent state. This means that some things (e.g. inserting + * instructions) might not work reliably on the extracted control flow. It + * also means that extracting control flow without re-inserting it or + * deleting it is a Bad Thing (tm). + */ + +typedef struct { + struct exec_list list; + nir_function_impl *impl; /* for cleaning up if the list is deleted */ +} nir_cf_list; + +void nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end); + +void nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor); + +void nir_cf_delete(nir_cf_list *cf_list); + +static inline void +nir_cf_list_extract(nir_cf_list *extracted, struct exec_list *cf_list) +{ + nir_cf_extract(extracted, nir_before_cf_list(cf_list), + nir_after_cf_list(cf_list)); +} + +/** removes a control flow node, doing any cleanup necessary */ +static inline void +nir_cf_node_remove(nir_cf_node *node) +{ + nir_cf_list list; + nir_cf_extract(&list, nir_before_cf_node(node), nir_after_cf_node(node)); + nir_cf_delete(&list); +} + +#ifdef __cplusplus +} +#endif diff --git a/src/compiler/nir/nir_control_flow_private.h b/src/compiler/nir/nir_control_flow_private.h new file mode 100644 index 00000000000..f32b57a8cef --- /dev/null +++ b/src/compiler/nir/nir_control_flow_private.h @@ -0,0 +1,37 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir_control_flow.h" + +#pragma once + +/* Internal control-flow modification functions used when inserting/removing + * instructions. + */ + +void nir_handle_add_jump(nir_block *block); +void nir_handle_remove_jump(nir_block *block, nir_jump_type type); diff --git a/src/compiler/nir/nir_dominance.c b/src/compiler/nir/nir_dominance.c new file mode 100644 index 00000000000..b345b85e8a0 --- /dev/null +++ b/src/compiler/nir/nir_dominance.c @@ -0,0 +1,350 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +/* + * Implements the algorithms for computing the dominance tree and the + * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper, + * Harvey, and Kennedy. + */ + +typedef struct { + nir_function_impl *impl; + bool progress; +} dom_state; + +static bool +init_block_cb(nir_block *block, void *_state) +{ + dom_state *state = (dom_state *) _state; + if (block == nir_start_block(state->impl)) + block->imm_dom = block; + else + block->imm_dom = NULL; + block->num_dom_children = 0; + + struct set_entry *entry; + set_foreach(block->dom_frontier, entry) { + _mesa_set_remove(block->dom_frontier, entry); + } + + return true; +} + +static nir_block * +intersect(nir_block *b1, nir_block *b2) +{ + while (b1 != b2) { + /* + * Note, the comparisons here are the opposite of what the paper says + * because we index blocks from beginning -> end (i.e. reverse + * post-order) instead of post-order like they assume. + */ + while (b1->index > b2->index) + b1 = b1->imm_dom; + while (b2->index > b1->index) + b2 = b2->imm_dom; + } + + return b1; +} + +static bool +calc_dominance_cb(nir_block *block, void *_state) +{ + dom_state *state = (dom_state *) _state; + if (block == nir_start_block(state->impl)) + return true; + + nir_block *new_idom = NULL; + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + if (pred->imm_dom) { + if (new_idom) + new_idom = intersect(pred, new_idom); + else + new_idom = pred; + } + } + + assert(new_idom); + if (block->imm_dom != new_idom) { + block->imm_dom = new_idom; + state->progress = true; + } + + return true; +} + +static bool +calc_dom_frontier_cb(nir_block *block, void *state) +{ + (void) state; + + if (block->predecessors->entries > 1) { + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *runner = (nir_block *) entry->key; + while (runner != block->imm_dom) { + _mesa_set_add(runner->dom_frontier, block); + runner = runner->imm_dom; + } + } + } + + return true; +} + +/* + * Compute each node's children in the dominance tree from the immediate + * dominator information. We do this in three stages: + * + * 1. Calculate the number of children each node has + * 2. Allocate arrays, setting the number of children to 0 again + * 3. For each node, add itself to its parent's list of children, using + * num_dom_children as an index - at the end of this step, num_dom_children + * for each node will be the same as it was at the end of step #1. + */ + +static bool +block_count_children(nir_block *block, void *state) +{ + (void) state; + + if (block->imm_dom) + block->imm_dom->num_dom_children++; + + return true; +} + +static bool +block_alloc_children(nir_block *block, void *state) +{ + void *mem_ctx = state; + + block->dom_children = ralloc_array(mem_ctx, nir_block *, + block->num_dom_children); + block->num_dom_children = 0; + + return true; +} + +static bool +block_add_child(nir_block *block, void *state) +{ + (void) state; + + if (block->imm_dom) + block->imm_dom->dom_children[block->imm_dom->num_dom_children++] = block; + + return true; +} + +static void +calc_dom_children(nir_function_impl* impl) +{ + void *mem_ctx = ralloc_parent(impl); + + nir_foreach_block(impl, block_count_children, NULL); + nir_foreach_block(impl, block_alloc_children, mem_ctx); + nir_foreach_block(impl, block_add_child, NULL); +} + +static void +calc_dfs_indicies(nir_block *block, unsigned *index) +{ + block->dom_pre_index = (*index)++; + + for (unsigned i = 0; i < block->num_dom_children; i++) + calc_dfs_indicies(block->dom_children[i], index); + + block->dom_post_index = (*index)++; +} + +void +nir_calc_dominance_impl(nir_function_impl *impl) +{ + if (impl->valid_metadata & nir_metadata_dominance) + return; + + nir_metadata_require(impl, nir_metadata_block_index); + + dom_state state; + state.impl = impl; + state.progress = true; + + nir_foreach_block(impl, init_block_cb, &state); + + while (state.progress) { + state.progress = false; + nir_foreach_block(impl, calc_dominance_cb, &state); + } + + nir_foreach_block(impl, calc_dom_frontier_cb, &state); + + nir_block *start_block = nir_start_block(impl); + start_block->imm_dom = NULL; + + calc_dom_children(impl); + + unsigned dfs_index = 0; + calc_dfs_indicies(start_block, &dfs_index); +} + +void +nir_calc_dominance(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_calc_dominance_impl(function->impl); + } +} + +/** + * Computes the least common anscestor of two blocks. If one of the blocks + * is null, the other block is returned. + */ +nir_block * +nir_dominance_lca(nir_block *b1, nir_block *b2) +{ + if (b1 == NULL) + return b2; + + if (b2 == NULL) + return b1; + + assert(nir_cf_node_get_function(&b1->cf_node) == + nir_cf_node_get_function(&b2->cf_node)); + + assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata & + nir_metadata_dominance); + + return intersect(b1, b2); +} + +/** + * Returns true if parent dominates child + */ +bool +nir_block_dominates(nir_block *parent, nir_block *child) +{ + assert(nir_cf_node_get_function(&parent->cf_node) == + nir_cf_node_get_function(&child->cf_node)); + + assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata & + nir_metadata_dominance); + + return child->dom_pre_index >= parent->dom_pre_index && + child->dom_post_index <= parent->dom_post_index; +} + +static bool +dump_block_dom(nir_block *block, void *state) +{ + FILE *fp = state; + if (block->imm_dom) + fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index); + return true; +} + +void +nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp) +{ + fprintf(fp, "digraph doms_%s {\n", impl->function->name); + nir_foreach_block(impl, dump_block_dom, fp); + fprintf(fp, "}\n\n"); +} + +void +nir_dump_dom_tree(nir_shader *shader, FILE *fp) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_dump_dom_tree_impl(function->impl, fp); + } +} + +static bool +dump_block_dom_frontier(nir_block *block, void *state) +{ + FILE *fp = state; + + fprintf(fp, "DF(%u) = {", block->index); + struct set_entry *entry; + set_foreach(block->dom_frontier, entry) { + nir_block *df = (nir_block *) entry->key; + fprintf(fp, "%u, ", df->index); + } + fprintf(fp, "}\n"); + return true; +} + +void +nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp) +{ + nir_foreach_block(impl, dump_block_dom_frontier, fp); +} + +void +nir_dump_dom_frontier(nir_shader *shader, FILE *fp) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_dump_dom_frontier_impl(function->impl, fp); + } +} + +static bool +dump_block_succs(nir_block *block, void *state) +{ + FILE *fp = state; + if (block->successors[0]) + fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index); + if (block->successors[1]) + fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index); + return true; +} + +void +nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp) +{ + fprintf(fp, "digraph cfg_%s {\n", impl->function->name); + nir_foreach_block(impl, dump_block_succs, fp); + fprintf(fp, "}\n\n"); +} + +void +nir_dump_cfg(nir_shader *shader, FILE *fp) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_dump_cfg_impl(function->impl, fp); + } +} diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c new file mode 100644 index 00000000000..8bc9f24e406 --- /dev/null +++ b/src/compiler/nir/nir_from_ssa.c @@ -0,0 +1,805 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" +#include "nir_vla.h" + +/* + * This file implements an out-of-SSA pass as described in "Revisiting + * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by + * Boissinot et. al. + */ + +struct from_ssa_state { + void *mem_ctx; + void *dead_ctx; + bool phi_webs_only; + struct hash_table *merge_node_table; + nir_instr *instr; + nir_function_impl *impl; +}; + +/* Returns true if a dominates b */ +static bool +ssa_def_dominates(nir_ssa_def *a, nir_ssa_def *b) +{ + if (a->live_index == 0) { + /* SSA undefs always dominate */ + return true; + } else if (b->live_index < a->live_index) { + return false; + } else if (a->parent_instr->block == b->parent_instr->block) { + return a->live_index <= b->live_index; + } else { + return nir_block_dominates(a->parent_instr->block, + b->parent_instr->block); + } +} + + +/* The following data structure, which I have named merge_set is a way of + * representing a set registers of non-interfering registers. This is + * based on the concept of a "dominence forest" presented in "Fast Copy + * Coalescing and Live-Range Identification" by Budimlic et. al. but the + * implementation concept is taken from "Revisiting Out-of-SSA Translation + * for Correctness, Code Quality, and Efficiency" by Boissinot et. al.. + * + * Each SSA definition is associated with a merge_node and the association + * is represented by a combination of a hash table and the "def" parameter + * in the merge_node structure. The merge_set stores a linked list of + * merge_node's in dominence order of the ssa definitions. (Since the + * liveness analysis pass indexes the SSA values in dominence order for us, + * this is an easy thing to keep up.) It is assumed that no pair of the + * nodes in a given set interfere. Merging two sets or checking for + * interference can be done in a single linear-time merge-sort walk of the + * two lists of nodes. + */ +struct merge_set; + +typedef struct { + struct exec_node node; + struct merge_set *set; + nir_ssa_def *def; +} merge_node; + +typedef struct merge_set { + struct exec_list nodes; + unsigned size; + nir_register *reg; +} merge_set; + +#if 0 +static void +merge_set_dump(merge_set *set, FILE *fp) +{ + nir_ssa_def *dom[set->size]; + int dom_idx = -1; + + foreach_list_typed(merge_node, node, node, &set->nodes) { + while (dom_idx >= 0 && !ssa_def_dominates(dom[dom_idx], node->def)) + dom_idx--; + + for (int i = 0; i <= dom_idx; i++) + fprintf(fp, " "); + + if (node->def->name) + fprintf(fp, "ssa_%d /* %s */\n", node->def->index, node->def->name); + else + fprintf(fp, "ssa_%d\n", node->def->index); + + dom[++dom_idx] = node->def; + } +} +#endif + +static merge_node * +get_merge_node(nir_ssa_def *def, struct from_ssa_state *state) +{ + struct hash_entry *entry = + _mesa_hash_table_search(state->merge_node_table, def); + if (entry) + return entry->data; + + merge_set *set = ralloc(state->dead_ctx, merge_set); + exec_list_make_empty(&set->nodes); + set->size = 1; + set->reg = NULL; + + merge_node *node = ralloc(state->dead_ctx, merge_node); + node->set = set; + node->def = def; + exec_list_push_head(&set->nodes, &node->node); + + _mesa_hash_table_insert(state->merge_node_table, def, node); + + return node; +} + +static bool +merge_nodes_interfere(merge_node *a, merge_node *b) +{ + return nir_ssa_defs_interfere(a->def, b->def); +} + +/* Merges b into a */ +static merge_set * +merge_merge_sets(merge_set *a, merge_set *b) +{ + struct exec_node *an = exec_list_get_head(&a->nodes); + struct exec_node *bn = exec_list_get_head(&b->nodes); + while (!exec_node_is_tail_sentinel(bn)) { + merge_node *a_node = exec_node_data(merge_node, an, node); + merge_node *b_node = exec_node_data(merge_node, bn, node); + + if (exec_node_is_tail_sentinel(an) || + a_node->def->live_index > b_node->def->live_index) { + struct exec_node *next = bn->next; + exec_node_remove(bn); + exec_node_insert_node_before(an, bn); + exec_node_data(merge_node, bn, node)->set = a; + bn = next; + } else { + an = an->next; + } + } + + a->size += b->size; + b->size = 0; + + return a; +} + +/* Checks for any interference between two merge sets + * + * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA + * Translation for Correctness, Code Quality, and Efficiency" by + * Boissinot et. al. + */ +static bool +merge_sets_interfere(merge_set *a, merge_set *b) +{ + NIR_VLA(merge_node *, dom, a->size + b->size); + int dom_idx = -1; + + struct exec_node *an = exec_list_get_head(&a->nodes); + struct exec_node *bn = exec_list_get_head(&b->nodes); + while (!exec_node_is_tail_sentinel(an) || + !exec_node_is_tail_sentinel(bn)) { + + merge_node *current; + if (exec_node_is_tail_sentinel(an)) { + current = exec_node_data(merge_node, bn, node); + bn = bn->next; + } else if (exec_node_is_tail_sentinel(bn)) { + current = exec_node_data(merge_node, an, node); + an = an->next; + } else { + merge_node *a_node = exec_node_data(merge_node, an, node); + merge_node *b_node = exec_node_data(merge_node, bn, node); + + if (a_node->def->live_index <= b_node->def->live_index) { + current = a_node; + an = an->next; + } else { + current = b_node; + bn = bn->next; + } + } + + while (dom_idx >= 0 && + !ssa_def_dominates(dom[dom_idx]->def, current->def)) + dom_idx--; + + if (dom_idx >= 0 && merge_nodes_interfere(current, dom[dom_idx])) + return true; + + dom[++dom_idx] = current; + } + + return false; +} + +static bool +add_parallel_copy_to_end_of_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + bool need_end_copy = false; + if (block->successors[0]) { + nir_instr *instr = nir_block_first_instr(block->successors[0]); + if (instr && instr->type == nir_instr_type_phi) + need_end_copy = true; + } + + if (block->successors[1]) { + nir_instr *instr = nir_block_first_instr(block->successors[1]); + if (instr && instr->type == nir_instr_type_phi) + need_end_copy = true; + } + + if (need_end_copy) { + /* If one of our successors has at least one phi node, we need to + * create a parallel copy at the end of the block but before the jump + * (if there is one). + */ + nir_parallel_copy_instr *pcopy = + nir_parallel_copy_instr_create(state->dead_ctx); + + nir_instr_insert(nir_after_block_before_jump(block), &pcopy->instr); + } + + return true; +} + +static nir_parallel_copy_instr * +get_parallel_copy_at_end_of_block(nir_block *block) +{ + nir_instr *last_instr = nir_block_last_instr(block); + if (last_instr == NULL) + return NULL; + + /* The last instruction may be a jump in which case the parallel copy is + * right before it. + */ + if (last_instr->type == nir_instr_type_jump) + last_instr = nir_instr_prev(last_instr); + + if (last_instr && last_instr->type == nir_instr_type_parallel_copy) + return nir_instr_as_parallel_copy(last_instr); + else + return NULL; +} + +/** Isolate phi nodes with parallel copies + * + * In order to solve the dependency problems with the sources and + * destinations of phi nodes, we first isolate them by adding parallel + * copies to the beginnings and ends of basic blocks. For every block with + * phi nodes, we add a parallel copy immediately following the last phi + * node that copies the destinations of all of the phi nodes to new SSA + * values. We also add a parallel copy to the end of every block that has + * a successor with phi nodes that, for each phi node in each successor, + * copies the corresponding sorce of the phi node and adjust the phi to + * used the destination of the parallel copy. + * + * In SSA form, each value has exactly one definition. What this does is + * ensure that each value used in a phi also has exactly one use. The + * destinations of phis are only used by the parallel copy immediately + * following the phi nodes and. Thanks to the parallel copy at the end of + * the predecessor block, the sources of phi nodes are are the only use of + * that value. This allows us to immediately assign all the sources and + * destinations of any given phi node to the same register without worrying + * about interference at all. We do coalescing to get rid of the parallel + * copies where possible. + * + * Before this pass can be run, we have to iterate over the blocks with + * add_parallel_copy_to_end_of_block to ensure that the parallel copies at + * the ends of blocks exist. We can create the ones at the beginnings as + * we go, but the ones at the ends of blocks need to be created ahead of + * time because of potential back-edges in the CFG. + */ +static bool +isolate_phi_nodes_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + nir_instr *last_phi_instr = NULL; + nir_foreach_instr(block, instr) { + /* Phi nodes only ever come at the start of a block */ + if (instr->type != nir_instr_type_phi) + break; + + last_phi_instr = instr; + } + + /* If we don't have any phi's, then there's nothing for us to do. */ + if (last_phi_instr == NULL) + return true; + + /* If we have phi nodes, we need to create a parallel copy at the + * start of this block but after the phi nodes. + */ + nir_parallel_copy_instr *block_pcopy = + nir_parallel_copy_instr_create(state->dead_ctx); + nir_instr_insert_after(last_phi_instr, &block_pcopy->instr); + + nir_foreach_instr(block, instr) { + /* Phi nodes only ever come at the start of a block */ + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + assert(phi->dest.is_ssa); + nir_foreach_phi_src(phi, src) { + nir_parallel_copy_instr *pcopy = + get_parallel_copy_at_end_of_block(src->pred); + assert(pcopy); + + nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx, + nir_parallel_copy_entry); + nir_ssa_dest_init(&pcopy->instr, &entry->dest, + phi->dest.ssa.num_components, src->src.ssa->name); + exec_list_push_tail(&pcopy->entries, &entry->node); + + assert(src->src.is_ssa); + nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src); + + nir_instr_rewrite_src(&phi->instr, &src->src, + nir_src_for_ssa(&entry->dest.ssa)); + } + + nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx, + nir_parallel_copy_entry); + nir_ssa_dest_init(&block_pcopy->instr, &entry->dest, + phi->dest.ssa.num_components, phi->dest.ssa.name); + exec_list_push_tail(&block_pcopy->entries, &entry->node); + + nir_ssa_def_rewrite_uses(&phi->dest.ssa, + nir_src_for_ssa(&entry->dest.ssa)); + + nir_instr_rewrite_src(&block_pcopy->instr, &entry->src, + nir_src_for_ssa(&phi->dest.ssa)); + } + + return true; +} + +static bool +coalesce_phi_nodes_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + nir_foreach_instr(block, instr) { + /* Phi nodes only ever come at the start of a block */ + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + assert(phi->dest.is_ssa); + merge_node *dest_node = get_merge_node(&phi->dest.ssa, state); + + nir_foreach_phi_src(phi, src) { + assert(src->src.is_ssa); + merge_node *src_node = get_merge_node(src->src.ssa, state); + if (src_node->set != dest_node->set) + merge_merge_sets(dest_node->set, src_node->set); + } + } + + return true; +} + +static void +aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, + struct from_ssa_state *state) +{ + nir_foreach_parallel_copy_entry(pcopy, entry) { + if (!entry->src.is_ssa) + continue; + + /* Since load_const instructions are SSA only, we can't replace their + * destinations with registers and, therefore, can't coalesce them. + */ + if (entry->src.ssa->parent_instr->type == nir_instr_type_load_const) + continue; + + /* Don't try and coalesce these */ + if (entry->dest.ssa.num_components != entry->src.ssa->num_components) + continue; + + merge_node *src_node = get_merge_node(entry->src.ssa, state); + merge_node *dest_node = get_merge_node(&entry->dest.ssa, state); + + if (src_node->set == dest_node->set) + continue; + + if (!merge_sets_interfere(src_node->set, dest_node->set)) + merge_merge_sets(src_node->set, dest_node->set); + } +} + +static bool +aggressive_coalesce_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + nir_parallel_copy_instr *start_pcopy = NULL; + nir_foreach_instr(block, instr) { + /* Phi nodes only ever come at the start of a block */ + if (instr->type != nir_instr_type_phi) { + if (instr->type != nir_instr_type_parallel_copy) + break; /* The parallel copy must be right after the phis */ + + start_pcopy = nir_instr_as_parallel_copy(instr); + + aggressive_coalesce_parallel_copy(start_pcopy, state); + + break; + } + } + + nir_parallel_copy_instr *end_pcopy = + get_parallel_copy_at_end_of_block(block); + + if (end_pcopy && end_pcopy != start_pcopy) + aggressive_coalesce_parallel_copy(end_pcopy, state); + + return true; +} + +static bool +rewrite_ssa_def(nir_ssa_def *def, void *void_state) +{ + struct from_ssa_state *state = void_state; + nir_register *reg; + + struct hash_entry *entry = + _mesa_hash_table_search(state->merge_node_table, def); + if (entry) { + /* In this case, we're part of a phi web. Use the web's register. */ + merge_node *node = (merge_node *)entry->data; + + /* If it doesn't have a register yet, create one. Note that all of + * the things in the merge set should be the same so it doesn't + * matter which node's definition we use. + */ + if (node->set->reg == NULL) { + node->set->reg = nir_local_reg_create(state->impl); + node->set->reg->name = def->name; + node->set->reg->num_components = def->num_components; + node->set->reg->num_array_elems = 0; + } + + reg = node->set->reg; + } else { + if (state->phi_webs_only) + return true; + + /* We leave load_const SSA values alone. They act as immediates to + * the backend. If it got coalesced into a phi, that's ok. + */ + if (def->parent_instr->type == nir_instr_type_load_const) + return true; + + reg = nir_local_reg_create(state->impl); + reg->name = def->name; + reg->num_components = def->num_components; + reg->num_array_elems = 0; + } + + nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg)); + assert(list_empty(&def->uses) && list_empty(&def->if_uses)); + + if (def->parent_instr->type == nir_instr_type_ssa_undef) { + /* If it's an ssa_undef instruction, remove it since we know we just got + * rid of all its uses. + */ + nir_instr *parent_instr = def->parent_instr; + nir_instr_remove(parent_instr); + ralloc_steal(state->dead_ctx, parent_instr); + return true; + } + + assert(def->parent_instr->type != nir_instr_type_load_const); + + /* At this point we know a priori that this SSA def is part of a + * nir_dest. We can use exec_node_data to get the dest pointer. + */ + nir_dest *dest = exec_node_data(nir_dest, def, ssa); + + nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg)); + + return true; +} + +/* Resolves ssa definitions to registers. While we're at it, we also + * remove phi nodes. + */ +static bool +resolve_registers_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + state->instr = instr; + nir_foreach_ssa_def(instr, rewrite_ssa_def, state); + + if (instr->type == nir_instr_type_phi) { + nir_instr_remove(instr); + ralloc_steal(state->dead_ctx, instr); + } + } + state->instr = NULL; + + return true; +} + +static void +emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src, + void *mem_ctx) +{ + assert(!dest_src.is_ssa && + dest_src.reg.indirect == NULL && + dest_src.reg.base_offset == 0); + + if (src.is_ssa) + assert(src.ssa->num_components >= dest_src.reg.reg->num_components); + else + assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components); + + nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov); + nir_src_copy(&mov->src[0].src, &src, mov); + mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg); + mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1; + + nir_instr_insert_before(&pcopy->instr, &mov->instr); +} + +/* Resolves a single parallel copy operation into a sequence of mov's + * + * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for + * Correctness, Code Quality, and Efficiency" by Boissinot et. al.. + * However, I never got the algorithm to work as written, so this version + * is slightly modified. + * + * The algorithm works by playing this little shell game with the values. + * We start by recording where every source value is and which source value + * each destination value should receive. We then grab any copy whose + * destination is "empty", i.e. not used as a source, and do the following: + * - Find where its source value currently lives + * - Emit the move instruction + * - Set the location of the source value to the destination + * - Mark the location containing the source value + * - Mark the destination as no longer needing to be copied + * + * When we run out of "empty" destinations, we have a cycle and so we + * create a temporary register, copy to that register, and mark the value + * we copied as living in that temporary. Now, the cycle is broken, so we + * can continue with the above steps. + */ +static void +resolve_parallel_copy(nir_parallel_copy_instr *pcopy, + struct from_ssa_state *state) +{ + unsigned num_copies = 0; + nir_foreach_parallel_copy_entry(pcopy, entry) { + /* Sources may be SSA */ + if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) + continue; + + num_copies++; + } + + if (num_copies == 0) { + /* Hooray, we don't need any copies! */ + nir_instr_remove(&pcopy->instr); + return; + } + + /* The register/source corresponding to the given index */ + NIR_VLA_ZERO(nir_src, values, num_copies * 2); + + /* The current location of a given piece of data. We will use -1 for "null" */ + NIR_VLA_FILL(int, loc, num_copies * 2, -1); + + /* The piece of data that the given piece of data is to be copied from. We will use -1 for "null" */ + NIR_VLA_FILL(int, pred, num_copies * 2, -1); + + /* The destinations we have yet to properly fill */ + NIR_VLA(int, to_do, num_copies * 2); + int to_do_idx = -1; + + /* Now we set everything up: + * - All values get assigned a temporary index + * - Current locations are set from sources + * - Predicessors are recorded from sources and destinations + */ + int num_vals = 0; + nir_foreach_parallel_copy_entry(pcopy, entry) { + /* Sources may be SSA */ + if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) + continue; + + int src_idx = -1; + for (int i = 0; i < num_vals; ++i) { + if (nir_srcs_equal(values[i], entry->src)) + src_idx = i; + } + if (src_idx < 0) { + src_idx = num_vals++; + values[src_idx] = entry->src; + } + + nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg); + + int dest_idx = -1; + for (int i = 0; i < num_vals; ++i) { + if (nir_srcs_equal(values[i], dest_src)) { + /* Each destination of a parallel copy instruction should be + * unique. A destination may get used as a source, so we still + * have to walk the list. However, the predecessor should not, + * at this point, be set yet, so we should have -1 here. + */ + assert(pred[i] == -1); + dest_idx = i; + } + } + if (dest_idx < 0) { + dest_idx = num_vals++; + values[dest_idx] = dest_src; + } + + loc[src_idx] = src_idx; + pred[dest_idx] = src_idx; + + to_do[++to_do_idx] = dest_idx; + } + + /* Currently empty destinations we can go ahead and fill */ + NIR_VLA(int, ready, num_copies * 2); + int ready_idx = -1; + + /* Mark the ones that are ready for copying. We know an index is a + * destination if it has a predecessor and it's ready for copying if + * it's not marked as containing data. + */ + for (int i = 0; i < num_vals; i++) { + if (pred[i] != -1 && loc[i] == -1) + ready[++ready_idx] = i; + } + + while (to_do_idx >= 0) { + while (ready_idx >= 0) { + int b = ready[ready_idx--]; + int a = pred[b]; + emit_copy(pcopy, values[loc[a]], values[b], state->mem_ctx); + + /* If any other copies want a they can find it at b */ + loc[a] = b; + + /* b has been filled, mark it as not needing to be copied */ + pred[b] = -1; + + /* If a needs to be filled, it's ready for copying now */ + if (pred[a] != -1) + ready[++ready_idx] = a; + } + int b = to_do[to_do_idx--]; + if (pred[b] == -1) + continue; + + /* If we got here, then we don't have any more trivial copies that we + * can do. We have to break a cycle, so we create a new temporary + * register for that purpose. Normally, if going out of SSA after + * register allocation, you would want to avoid creating temporary + * registers. However, we are going out of SSA before register + * allocation, so we would rather not create extra register + * dependencies for the backend to deal with. If it wants, the + * backend can coalesce the (possibly multiple) temporaries. + */ + assert(num_vals < num_copies * 2); + nir_register *reg = nir_local_reg_create(state->impl); + reg->name = "copy_temp"; + reg->num_array_elems = 0; + if (values[b].is_ssa) + reg->num_components = values[b].ssa->num_components; + else + reg->num_components = values[b].reg.reg->num_components; + values[num_vals].is_ssa = false; + values[num_vals].reg.reg = reg; + + emit_copy(pcopy, values[b], values[num_vals], state->mem_ctx); + loc[b] = num_vals; + ready[++ready_idx] = b; + num_vals++; + } + + nir_instr_remove(&pcopy->instr); +} + +/* Resolves the parallel copies in a block. Each block can have at most + * two: One at the beginning, right after all the phi noces, and one at + * the end (or right before the final jump if it exists). + */ +static bool +resolve_parallel_copies_block(nir_block *block, void *void_state) +{ + struct from_ssa_state *state = void_state; + + /* At this point, we have removed all of the phi nodes. If a parallel + * copy existed right after the phi nodes in this block, it is now the + * first instruction. + */ + nir_instr *first_instr = nir_block_first_instr(block); + if (first_instr == NULL) + return true; /* Empty, nothing to do. */ + + if (first_instr->type == nir_instr_type_parallel_copy) { + nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr); + + resolve_parallel_copy(pcopy, state); + } + + /* It's possible that the above code already cleaned up the end parallel + * copy. However, doing so removed it form the instructions list so we + * won't find it here. Therefore, it's safe to go ahead and just look + * for one and clean it up if it exists. + */ + nir_parallel_copy_instr *end_pcopy = + get_parallel_copy_at_end_of_block(block); + if (end_pcopy) + resolve_parallel_copy(end_pcopy, state); + + return true; +} + +static void +nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only) +{ + struct from_ssa_state state; + + state.mem_ctx = ralloc_parent(impl); + state.dead_ctx = ralloc_context(NULL); + state.impl = impl; + state.phi_webs_only = phi_webs_only; + state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state); + nir_foreach_block(impl, isolate_phi_nodes_block, &state); + + /* Mark metadata as dirty before we ask for liveness analysis */ + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + nir_metadata_require(impl, nir_metadata_live_ssa_defs | + nir_metadata_dominance); + + nir_foreach_block(impl, coalesce_phi_nodes_block, &state); + nir_foreach_block(impl, aggressive_coalesce_block, &state); + + nir_foreach_block(impl, resolve_registers_block, &state); + + nir_foreach_block(impl, resolve_parallel_copies_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + /* Clean up dead instructions and the hash tables */ + _mesa_hash_table_destroy(state.merge_node_table, NULL); + ralloc_free(state.dead_ctx); +} + +void +nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_convert_from_ssa_impl(function->impl, phi_webs_only); + } +} diff --git a/src/compiler/nir/nir_gs_count_vertices.c b/src/compiler/nir/nir_gs_count_vertices.c new file mode 100644 index 00000000000..db15d160ee7 --- /dev/null +++ b/src/compiler/nir/nir_gs_count_vertices.c @@ -0,0 +1,93 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +static nir_intrinsic_instr * +as_intrinsic(nir_instr *instr, nir_intrinsic_op op) +{ + if (instr->type != nir_instr_type_intrinsic) + return NULL; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != op) + return NULL; + + return intrin; +} + +static nir_intrinsic_instr * +as_set_vertex_count(nir_instr *instr) +{ + return as_intrinsic(instr, nir_intrinsic_set_vertex_count); +} + +/** + * If a geometry shader emits a constant number of vertices, return the + * number of vertices. Otherwise, return -1 (unknown). + * + * This only works if you've used nir_lower_gs_intrinsics() to do vertex + * counting at the NIR level. + */ +int +nir_gs_count_vertices(const nir_shader *shader) +{ + int count = -1; + + nir_foreach_function(shader, function) { + if (!function->impl) + continue; + + /* set_vertex_count intrinsics only appear in predecessors of the + * end block. So we don't need to walk all of them. + */ + struct set_entry *entry; + set_foreach(function->impl->end_block->predecessors, entry) { + nir_block *block = (nir_block *) entry->key; + + nir_foreach_instr_reverse(block, instr) { + nir_intrinsic_instr *intrin = as_set_vertex_count(instr); + if (!intrin) + continue; + + nir_const_value *val = nir_src_as_const_value(intrin->src[0]); + /* We've found a non-constant value. Bail. */ + if (!val) + return -1; + + if (count == -1) + count = val->i[0]; + + /* We've found contradictory set_vertex_count intrinsics. + * This can happen if there are early-returns in main() and + * different paths emit different numbers of vertices. + */ + if (count != val->i[0]) + return -1; + } + } + } + + return count; +} diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c new file mode 100644 index 00000000000..d3f939fe805 --- /dev/null +++ b/src/compiler/nir/nir_instr_set.c @@ -0,0 +1,519 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir_instr_set.h" +#include "nir_vla.h" + +#define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data)) + +static uint32_t +hash_src(uint32_t hash, const nir_src *src) +{ + assert(src->is_ssa); + hash = HASH(hash, src->ssa); + return hash; +} + +static uint32_t +hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components) +{ + hash = HASH(hash, src->abs); + hash = HASH(hash, src->negate); + + for (unsigned i = 0; i < num_components; i++) + hash = HASH(hash, src->swizzle[i]); + + hash = hash_src(hash, &src->src); + return hash; +} + +static uint32_t +hash_alu(uint32_t hash, const nir_alu_instr *instr) +{ + hash = HASH(hash, instr->op); + hash = HASH(hash, instr->dest.dest.ssa.num_components); + + if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[instr->op].num_inputs == 2); + uint32_t hash0 = hash_alu_src(hash, &instr->src[0], + nir_ssa_alu_instr_src_components(instr, 0)); + uint32_t hash1 = hash_alu_src(hash, &instr->src[1], + nir_ssa_alu_instr_src_components(instr, 1)); + /* For commutative operations, we need some commutative way of + * combining the hashes. One option would be to XOR them but that + * means that anything with two identical sources will hash to 0 and + * that's common enough we probably don't want the guaranteed + * collision. Either addition or multiplication will also work. + */ + hash = hash0 * hash1; + } else { + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + hash = hash_alu_src(hash, &instr->src[i], + nir_ssa_alu_instr_src_components(instr, i)); + } + } + + return hash; +} + +static uint32_t +hash_load_const(uint32_t hash, const nir_load_const_instr *instr) +{ + hash = HASH(hash, instr->def.num_components); + + hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f, + instr->def.num_components + * sizeof(instr->value.f[0])); + + return hash; +} + +static int +cmp_phi_src(const void *data1, const void *data2) +{ + nir_phi_src *src1 = *(nir_phi_src **)data1; + nir_phi_src *src2 = *(nir_phi_src **)data2; + return src1->pred - src2->pred; +} + +static uint32_t +hash_phi(uint32_t hash, const nir_phi_instr *instr) +{ + hash = HASH(hash, instr->instr.block); + + /* sort sources by predecessor, since the order shouldn't matter */ + unsigned num_preds = instr->instr.block->predecessors->entries; + NIR_VLA(nir_phi_src *, srcs, num_preds); + unsigned i = 0; + nir_foreach_phi_src(instr, src) { + srcs[i++] = src; + } + + qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src); + + for (i = 0; i < num_preds; i++) { + hash = hash_src(hash, &srcs[i]->src); + hash = HASH(hash, srcs[i]->pred); + } + + return hash; +} + +static uint32_t +hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr) +{ + const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; + hash = HASH(hash, instr->intrinsic); + + if (info->has_dest) + hash = HASH(hash, instr->dest.ssa.num_components); + + assert(info->num_variables == 0); + + hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index, + info->num_indices + * sizeof(instr->const_index[0])); + return hash; +} + +static uint32_t +hash_tex(uint32_t hash, const nir_tex_instr *instr) +{ + hash = HASH(hash, instr->op); + hash = HASH(hash, instr->num_srcs); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + hash = HASH(hash, instr->src[i].src_type); + hash = hash_src(hash, &instr->src[i].src); + } + + hash = HASH(hash, instr->coord_components); + hash = HASH(hash, instr->sampler_dim); + hash = HASH(hash, instr->is_array); + hash = HASH(hash, instr->is_shadow); + hash = HASH(hash, instr->is_new_style_shadow); + hash = HASH(hash, instr->const_offset); + unsigned component = instr->component; + hash = HASH(hash, component); + hash = HASH(hash, instr->sampler_index); + hash = HASH(hash, instr->sampler_array_size); + + assert(!instr->sampler); + + return hash; +} + +/* Computes a hash of an instruction for use in a hash table. Note that this + * will only work for instructions where instr_can_rewrite() returns true, and + * it should return identical hashes for two instructions that are the same + * according nir_instrs_equal(). + */ + +static uint32_t +hash_instr(const void *data) +{ + const nir_instr *instr = data; + uint32_t hash = _mesa_fnv32_1a_offset_bias; + + switch (instr->type) { + case nir_instr_type_alu: + hash = hash_alu(hash, nir_instr_as_alu(instr)); + break; + case nir_instr_type_load_const: + hash = hash_load_const(hash, nir_instr_as_load_const(instr)); + break; + case nir_instr_type_phi: + hash = hash_phi(hash, nir_instr_as_phi(instr)); + break; + case nir_instr_type_intrinsic: + hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + hash = hash_tex(hash, nir_instr_as_tex(instr)); + break; + default: + unreachable("Invalid instruction type"); + } + + return hash; +} + +bool +nir_srcs_equal(nir_src src1, nir_src src2) +{ + if (src1.is_ssa) { + if (src2.is_ssa) { + return src1.ssa == src2.ssa; + } else { + return false; + } + } else { + if (src2.is_ssa) { + return false; + } else { + if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL)) + return false; + + if (src1.reg.indirect) { + if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect)) + return false; + } + + return src1.reg.reg == src2.reg.reg && + src1.reg.base_offset == src2.reg.base_offset; + } + } +} + +static bool +nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, + unsigned src1, unsigned src2) +{ + if (alu1->src[src1].abs != alu2->src[src2].abs || + alu1->src[src1].negate != alu2->src[src2].negate) + return false; + + for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { + if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) + return false; + } + + return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); +} + +/* Returns "true" if two instructions are equal. Note that this will only + * work for the subset of instructions defined by instr_can_rewrite(). Also, + * it should only return "true" for instructions that hash_instr() will return + * the same hash for (ignoring collisions, of course). + */ + +static bool +nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) +{ + if (instr1->type != instr2->type) + return false; + + switch (instr1->type) { + case nir_instr_type_alu: { + nir_alu_instr *alu1 = nir_instr_as_alu(instr1); + nir_alu_instr *alu2 = nir_instr_as_alu(instr2); + + if (alu1->op != alu2->op) + return false; + + /* TODO: We can probably acutally do something more inteligent such + * as allowing different numbers and taking a maximum or something + * here */ + if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) + return false; + + if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[alu1->op].num_inputs == 2); + return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && + nir_alu_srcs_equal(alu1, alu2, 1, 1)) || + (nir_alu_srcs_equal(alu1, alu2, 0, 1) && + nir_alu_srcs_equal(alu1, alu2, 1, 0)); + } else { + for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { + if (!nir_alu_srcs_equal(alu1, alu2, i, i)) + return false; + } + } + return true; + } + case nir_instr_type_tex: { + nir_tex_instr *tex1 = nir_instr_as_tex(instr1); + nir_tex_instr *tex2 = nir_instr_as_tex(instr2); + + if (tex1->op != tex2->op) + return false; + + if (tex1->num_srcs != tex2->num_srcs) + return false; + for (unsigned i = 0; i < tex1->num_srcs; i++) { + if (tex1->src[i].src_type != tex2->src[i].src_type || + !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) { + return false; + } + } + + if (tex1->coord_components != tex2->coord_components || + tex1->sampler_dim != tex2->sampler_dim || + tex1->is_array != tex2->is_array || + tex1->is_shadow != tex2->is_shadow || + tex1->is_new_style_shadow != tex2->is_new_style_shadow || + memcmp(tex1->const_offset, tex2->const_offset, + sizeof(tex1->const_offset)) != 0 || + tex1->component != tex2->component || + tex1->sampler_index != tex2->sampler_index || + tex1->sampler_array_size != tex2->sampler_array_size) { + return false; + } + + /* Don't support un-lowered sampler derefs currently. */ + assert(!tex1->sampler && !tex2->sampler); + + return true; + } + case nir_instr_type_load_const: { + nir_load_const_instr *load1 = nir_instr_as_load_const(instr1); + nir_load_const_instr *load2 = nir_instr_as_load_const(instr2); + + if (load1->def.num_components != load2->def.num_components) + return false; + + return memcmp(load1->value.f, load2->value.f, + load1->def.num_components * sizeof(*load2->value.f)) == 0; + } + case nir_instr_type_phi: { + nir_phi_instr *phi1 = nir_instr_as_phi(instr1); + nir_phi_instr *phi2 = nir_instr_as_phi(instr2); + + if (phi1->instr.block != phi2->instr.block) + return false; + + nir_foreach_phi_src(phi1, src1) { + nir_foreach_phi_src(phi2, src2) { + if (src1->pred == src2->pred) { + if (!nir_srcs_equal(src1->src, src2->src)) + return false; + + break; + } + } + } + + return true; + } + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1); + nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2); + const nir_intrinsic_info *info = + &nir_intrinsic_infos[intrinsic1->intrinsic]; + + if (intrinsic1->intrinsic != intrinsic2->intrinsic || + intrinsic1->num_components != intrinsic2->num_components) + return false; + + if (info->has_dest && intrinsic1->dest.ssa.num_components != + intrinsic2->dest.ssa.num_components) + return false; + + for (unsigned i = 0; i < info->num_srcs; i++) { + if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) + return false; + } + + assert(info->num_variables == 0); + + for (unsigned i = 0; i < info->num_indices; i++) { + if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) + return false; + } + + return true; + } + case nir_instr_type_call: + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + case nir_instr_type_parallel_copy: + default: + unreachable("Invalid instruction type"); + } + + return false; +} + +static bool +src_is_ssa(nir_src *src, void *data) +{ + (void) data; + return src->is_ssa; +} + +static bool +dest_is_ssa(nir_dest *dest, void *data) +{ + (void) data; + return dest->is_ssa; +} + +/* This function determines if uses of an instruction can safely be rewritten + * to use another identical instruction instead. Note that this function must + * be kept in sync with hash_instr() and nir_instrs_equal() -- only + * instructions that pass this test will be handed on to those functions, and + * conversely they must handle everything that this function returns true for. + */ + +static bool +instr_can_rewrite(nir_instr *instr) +{ + /* We only handle SSA. */ + if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || + !nir_foreach_src(instr, src_is_ssa, NULL)) + return false; + + switch (instr->type) { + case nir_instr_type_alu: + case nir_instr_type_load_const: + case nir_instr_type_phi: + return true; + case nir_instr_type_tex: { + nir_tex_instr *tex = nir_instr_as_tex(instr); + + /* Don't support un-lowered sampler derefs currently. */ + if (tex->sampler) + return false; + + return true; + } + case nir_instr_type_intrinsic: { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; + return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && + (info->flags & NIR_INTRINSIC_CAN_REORDER) && + info->num_variables == 0; /* not implemented yet */ + } + case nir_instr_type_call: + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + return false; + case nir_instr_type_parallel_copy: + default: + unreachable("Invalid instruction type"); + } + + return false; +} + +static nir_ssa_def * +nir_instr_get_dest_ssa_def(nir_instr *instr) +{ + switch (instr->type) { + case nir_instr_type_alu: + assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); + return &nir_instr_as_alu(instr)->dest.dest.ssa; + case nir_instr_type_load_const: + return &nir_instr_as_load_const(instr)->def; + case nir_instr_type_phi: + assert(nir_instr_as_phi(instr)->dest.is_ssa); + return &nir_instr_as_phi(instr)->dest.ssa; + case nir_instr_type_intrinsic: + assert(nir_instr_as_intrinsic(instr)->dest.is_ssa); + return &nir_instr_as_intrinsic(instr)->dest.ssa; + case nir_instr_type_tex: + assert(nir_instr_as_tex(instr)->dest.is_ssa); + return &nir_instr_as_tex(instr)->dest.ssa; + default: + unreachable("We never ask for any of these"); + } +} + +static bool +cmp_func(const void *data1, const void *data2) +{ + return nir_instrs_equal(data1, data2); +} + +struct set * +nir_instr_set_create(void *mem_ctx) +{ + return _mesa_set_create(mem_ctx, hash_instr, cmp_func); +} + +void +nir_instr_set_destroy(struct set *instr_set) +{ + _mesa_set_destroy(instr_set, NULL); +} + +bool +nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr) +{ + if (!instr_can_rewrite(instr)) + return false; + + struct set_entry *entry = _mesa_set_search(instr_set, instr); + if (entry) { + nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr); + nir_ssa_def *new_def = + nir_instr_get_dest_ssa_def((nir_instr *) entry->key); + nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def)); + return true; + } + + _mesa_set_add(instr_set, instr); + return false; +} + +void +nir_instr_set_remove(struct set *instr_set, nir_instr *instr) +{ + if (!instr_can_rewrite(instr)) + return; + + struct set_entry *entry = _mesa_set_search(instr_set, instr); + if (entry) + _mesa_set_remove(instr_set, entry); +} + diff --git a/src/compiler/nir/nir_instr_set.h b/src/compiler/nir/nir_instr_set.h new file mode 100644 index 00000000000..939e8ddbf58 --- /dev/null +++ b/src/compiler/nir/nir_instr_set.h @@ -0,0 +1,62 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +#include "nir.h" + +/** + * This file defines functions for creating, destroying, and manipulating an + * "instruction set," which is an abstraction for finding duplicate + * instructions using a hash set. Note that the question of whether an + * instruction is actually a duplicate (e.g. whether it has any side effects) + * is handled transparently. The user can pass any instruction to + * nir_instr_set_add_or_rewrite() and nir_instr_set_remove(), and if the + * instruction isn't safe to rewrite or isn't supported, it's silently + * removed. + */ + +/*@{*/ + +/** Creates an instruction set, using a given ralloc mem_ctx */ +struct set *nir_instr_set_create(void *mem_ctx); + +/** Destroys an instruction set. */ +void nir_instr_set_destroy(struct set *instr_set); + +/** + * Adds an instruction to an instruction set if it doesn't exist, or if it + * does already exist, rewrites all uses of it to point to the other + * already-inserted instruction. Returns 'true' if the uses of the instruction + * were rewritten. + */ +bool nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr); + +/** + * Removes an instruction from an instruction set, so that other instructions + * won't be merged with it. + */ +void nir_instr_set_remove(struct set *instr_set, nir_instr *instr); + +/*@}*/ + diff --git a/src/compiler/nir/nir_intrinsics.c b/src/compiler/nir/nir_intrinsics.c new file mode 100644 index 00000000000..a7c868c39af --- /dev/null +++ b/src/compiler/nir/nir_intrinsics.c @@ -0,0 +1,49 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +#define OPCODE(name) nir_intrinsic_##name + +#define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \ + _dest_components, _num_variables, _num_indices, _flags) \ +{ \ + .name = #_name, \ + .num_srcs = _num_srcs, \ + .src_components = _src_components, \ + .has_dest = _has_dest, \ + .dest_components = _dest_components, \ + .num_variables = _num_variables, \ + .num_indices = _num_indices, \ + .flags = _flags \ +}, + +#define LAST_INTRINSIC(name) + +const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = { +#include "nir_intrinsics.h" +}; \ No newline at end of file diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h new file mode 100644 index 00000000000..62eead4878a --- /dev/null +++ b/src/compiler/nir/nir_intrinsics.h @@ -0,0 +1,316 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +/** + * This header file defines all the available intrinsics in one place. It + * expands to a list of macros of the form: + * + * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, + * num_variables, num_indices, flags) + * + * Which should correspond one-to-one with the nir_intrinsic_info structure. It + * is included in both ir.h to create the nir_intrinsic enum (with members of + * the form nir_intrinsic_(name)) and and in opcodes.c to create + * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures + * for each intrinsic. + */ + +#define ARR(...) { __VA_ARGS__ } + + +INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0) +INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) + +/* + * Interpolation of input. The interp_var_at* intrinsics are similar to the + * load_var intrinsic acting an a shader input except that they interpolate + * the input differently. The at_sample and at_offset intrinsics take an + * aditional source that is a integer sample id or a vec2 position offset + * respectively. + */ + +INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + +/* + * Ask the driver for the size of a given buffer. It takes the buffer index + * as source. + */ +INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + +/* + * a barrier is an intrinsic with no inputs/outputs but which can't be moved + * around/optimized in general + */ +#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) + +BARRIER(barrier) +BARRIER(discard) + +/* + * Memory barrier with semantics analogous to the memoryBarrier() GLSL + * intrinsic. + */ +BARRIER(memory_barrier) + +/* + * Shader clock intrinsic with semantics analogous to the clock2x32ARB() + * GLSL intrinsic. + * The latter can be used as code motion barrier, which is currently not + * feasible with NIR. + */ +INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE) + +/* + * Memory barrier with semantics analogous to the compute shader + * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(), + * memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics. + */ +BARRIER(group_memory_barrier) +BARRIER(memory_barrier_atomic_counter) +BARRIER(memory_barrier_buffer) +BARRIER(memory_barrier_image) +BARRIER(memory_barrier_shared) + +/** A conditional discard, with a single boolean source. */ +INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) + +/** + * Basic Geometry Shader intrinsics. + * + * emit_vertex implements GLSL's EmitStreamVertex() built-in. It takes a single + * index, which is the stream ID to write to. + * + * end_primitive implements GLSL's EndPrimitive() built-in. + */ +INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0) +INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0) + +/** + * Geometry Shader intrinsics with a vertex count. + * + * Alternatively, drivers may implement these intrinsics, and use + * nir_lower_gs_intrinsics() to convert from the basic intrinsics. + * + * These maintain a count of the number of vertices emitted, as an additional + * unsigned integer source. + */ +INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0) +INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0) +INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0) + +/* + * Atomic counters + * + * The *_var variants take an atomic_uint nir_variable, while the other, + * lowered, variants take a constant buffer index and register offset. + */ + +#define ATOMIC(name, flags) \ + INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \ + INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags) + +ATOMIC(inc, 0) +ATOMIC(dec, 0) +ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE) + +/* + * Image load, store and atomic intrinsics. + * + * All image intrinsics take an image target passed as a nir_variable. Image + * variables contain a number of memory and layout qualifiers that influence + * the semantics of the intrinsic. + * + * All image intrinsics take a four-coordinate vector and a sample index as + * first two sources, determining the location within the image that will be + * accessed by the intrinsic. Components not applicable to the image target + * in use are undefined. Image store takes an additional four-component + * argument with the value to be written, and image atomic operations take + * either one or two additional scalar arguments with the same meaning as in + * the ARB_shader_image_load_store specification. + */ +INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE) +INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0) +INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0) +INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + +/* + * SSBO atomic intrinsics + * + * All of the SSBO atomic memory operations read a value from memory, + * compute a new value using one of the operations below, write the new + * value to memory, and return the original value read. + * + * All operations take 3 sources except CompSwap that takes 4. These + * sources represent: + * + * 0: The SSBO buffer index. + * 1: The offset into the SSBO buffer of the variable that the atomic + * operation will operate on. + * 2: The data parameter to the atomic function (i.e. the value to add + * in ssbo_atomic_add, etc). + * 3: For CompSwap only: the second data parameter. + */ +INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) +INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0) + +/* + * CS shared variable atomic intrinsics + * + * All of the shared variable atomic memory operations read a value from + * memory, compute a new value using one of the operations below, write the + * new value to memory, and return the original value read. + * + * All operations take 2 sources except CompSwap that takes 3. These + * sources represent: + * + * 0: The offset into the shared variable storage region that the atomic + * operation will operate on. + * 1: The data parameter to the atomic function (i.e. the value to add + * in shared_atomic_add, etc). + * 2: For CompSwap only: the second data parameter. + */ +INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0) +INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) + +#define SYSTEM_VALUE(name, components, num_indices) \ + INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \ + NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) + +SYSTEM_VALUE(front_face, 1, 0) +SYSTEM_VALUE(vertex_id, 1, 0) +SYSTEM_VALUE(vertex_id_zero_base, 1, 0) +SYSTEM_VALUE(base_vertex, 1, 0) +SYSTEM_VALUE(instance_id, 1, 0) +SYSTEM_VALUE(base_instance, 1, 0) +SYSTEM_VALUE(draw_id, 1, 0) +SYSTEM_VALUE(sample_id, 1, 0) +SYSTEM_VALUE(sample_pos, 2, 0) +SYSTEM_VALUE(sample_mask_in, 1, 0) +SYSTEM_VALUE(primitive_id, 1, 0) +SYSTEM_VALUE(invocation_id, 1, 0) +SYSTEM_VALUE(tess_coord, 3, 0) +SYSTEM_VALUE(tess_level_outer, 4, 0) +SYSTEM_VALUE(tess_level_inner, 2, 0) +SYSTEM_VALUE(patch_vertices_in, 1, 0) +SYSTEM_VALUE(local_invocation_id, 3, 0) +SYSTEM_VALUE(work_group_id, 3, 0) +SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */ +SYSTEM_VALUE(num_work_groups, 3, 0) +SYSTEM_VALUE(helper_invocation, 1, 0) + +/* + * Load operations pull data from some piece of GPU memory. All load + * operations operate in terms of offsets into some piece of theoretical + * memory. Loads from externally visible memory (UBO and SSBO) simply take a + * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.) + * take a base+offset pair where the base (const_index[0]) gives the location + * of the start of the variable being loaded and and the offset source is a + * offset into that variable. + * + * Some load operations such as UBO/SSBO load and per_vertex loads take an + * additional source to specify which UBO/SSBO/vertex to load from. + * + * The exact address type depends on the lowering pass that generates the + * load/store intrinsics. Typically, this is vec4 units for things such as + * varying slots and float units for fragment shader inputs. UBO and SSBO + * offsets are always in bytes. + */ + +#define LOAD(name, srcs, indices, flags) \ + INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags) + +/* src[] = { offset }. const_index[] = { base } */ +LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { buffer_index, offset }. No const_index */ +LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { vertex, offset }. const_index[] = { base } */ +LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +/* src[] = { buffer_index, offset }. No const_index */ +LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { vertex, offset }. const_index[] = { base } */ +LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE) +/* src[] = { offset }. const_index[] = { base } */ +LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) + +/* + * Stores work the same way as loads, except now the first source is the value + * to store and the second (and possibly third) source specify where to store + * the value. SSBO and shared memory stores also have a write mask as + * const_index[0]. + */ + +#define STORE(name, srcs, indices, flags) \ + INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags) + +/* src[] = { value, offset }. const_index[] = { base, write_mask } */ +STORE(output, 2, 2, 0) +/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */ +STORE(per_vertex_output, 3, 2, 0) +/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ +STORE(ssbo, 3, 1, 0) +/* src[] = { value, offset }. const_index[] = { base, write_mask } */ +STORE(shared, 2, 2, 0) + +LAST_INTRINSIC(store_shared) diff --git a/src/compiler/nir/nir_liveness.c b/src/compiler/nir/nir_liveness.c new file mode 100644 index 00000000000..05f79d7bc61 --- /dev/null +++ b/src/compiler/nir/nir_liveness.c @@ -0,0 +1,297 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + */ + +#include "nir.h" +#include "nir_worklist.h" +#include "nir_vla.h" + +/* + * Basic liveness analysis. This works only in SSA form. + * + * This liveness pass treats phi nodes as being melded to the space between + * blocks so that the destinations of a phi are in the livein of the block + * in which it resides and the sources are in the liveout of the + * corresponding block. By formulating the liveness information in this + * way, we ensure that the definition of any variable dominates its entire + * live range. This is true because the only way that the definition of an + * SSA value may not dominate a use is if the use is in a phi node and the + * uses in phi no are in the live-out of the corresponding predecessor + * block but not in the live-in of the block containing the phi node. + */ + +struct live_ssa_defs_state { + unsigned num_ssa_defs; + unsigned bitset_words; + + nir_block_worklist worklist; +}; + +static bool +index_ssa_def(nir_ssa_def *def, void *void_state) +{ + struct live_ssa_defs_state *state = void_state; + + if (def->parent_instr->type == nir_instr_type_ssa_undef) + def->live_index = 0; + else + def->live_index = state->num_ssa_defs++; + + return true; +} + +static bool +index_ssa_definitions_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, index_ssa_def, state); + + return true; +} + +/* Initialize the liveness data to zero and add the given block to the + * worklist. + */ +static bool +init_liveness_block(nir_block *block, void *void_state) +{ + struct live_ssa_defs_state *state = void_state; + + block->live_in = reralloc(block, block->live_in, BITSET_WORD, + state->bitset_words); + memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); + + block->live_out = reralloc(block, block->live_out, BITSET_WORD, + state->bitset_words); + memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD)); + + nir_block_worklist_push_head(&state->worklist, block); + + return true; +} + +static bool +set_src_live(nir_src *src, void *void_live) +{ + BITSET_WORD *live = void_live; + + if (!src->is_ssa) + return true; + + if (src->ssa->live_index == 0) + return true; /* undefined variables are never live */ + + BITSET_SET(live, src->ssa->live_index); + + return true; +} + +static bool +set_ssa_def_dead(nir_ssa_def *def, void *void_live) +{ + BITSET_WORD *live = void_live; + + BITSET_CLEAR(live, def->live_index); + + return true; +} + +/** Propagates the live in of succ across the edge to the live out of pred + * + * Phi nodes exist "between" blocks and all the phi nodes at the start of a + * block act "in parallel". When we propagate from the live_in of one + * block to the live out of the other, we have to kill any writes from phis + * and make live any sources. + * + * Returns true if updating live out of pred added anything + */ +static bool +propagate_across_edge(nir_block *pred, nir_block *succ, + struct live_ssa_defs_state *state) +{ + NIR_VLA(BITSET_WORD, live, state->bitset_words); + memcpy(live, succ->live_in, state->bitset_words * sizeof *live); + + nir_foreach_instr(succ, instr) { + if (instr->type != nir_instr_type_phi) + break; + nir_phi_instr *phi = nir_instr_as_phi(instr); + + assert(phi->dest.is_ssa); + set_ssa_def_dead(&phi->dest.ssa, live); + } + + nir_foreach_instr(succ, instr) { + if (instr->type != nir_instr_type_phi) + break; + nir_phi_instr *phi = nir_instr_as_phi(instr); + + nir_foreach_phi_src(phi, src) { + if (src->pred == pred) { + set_src_live(&src->src, live); + break; + } + } + } + + BITSET_WORD progress = 0; + for (unsigned i = 0; i < state->bitset_words; ++i) { + progress |= live[i] & ~pred->live_out[i]; + pred->live_out[i] |= live[i]; + } + return progress != 0; +} + +void +nir_live_ssa_defs_impl(nir_function_impl *impl) +{ + struct live_ssa_defs_state state; + + /* We start at 1 because we reserve the index value of 0 for ssa_undef + * instructions. Those are never live, so their liveness information + * can be compacted into a single bit. + */ + state.num_ssa_defs = 1; + nir_foreach_block(impl, index_ssa_definitions_block, &state); + + nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL); + + /* We now know how many unique ssa definitions we have and we can go + * ahead and allocate live_in and live_out sets and add all of the + * blocks to the worklist. + */ + state.bitset_words = BITSET_WORDS(state.num_ssa_defs); + nir_foreach_block(impl, init_liveness_block, &state); + + /* We're now ready to work through the worklist and update the liveness + * sets of each of the blocks. By the time we get to this point, every + * block in the function implementation has been pushed onto the + * worklist in reverse order. As long as we keep the worklist + * up-to-date as we go, everything will get covered. + */ + while (!nir_block_worklist_is_empty(&state.worklist)) { + /* We pop them off in the reverse order we pushed them on. This way + * the first walk of the instructions is backwards so we only walk + * once in the case of no control flow. + */ + nir_block *block = nir_block_worklist_pop_head(&state.worklist); + + memcpy(block->live_in, block->live_out, + state.bitset_words * sizeof(BITSET_WORD)); + + nir_if *following_if = nir_block_get_following_if(block); + if (following_if) + set_src_live(&following_if->condition, block->live_in); + + nir_foreach_instr_reverse(block, instr) { + /* Phi nodes are handled seperately so we want to skip them. Since + * we are going backwards and they are at the beginning, we can just + * break as soon as we see one. + */ + if (instr->type == nir_instr_type_phi) + break; + + nir_foreach_ssa_def(instr, set_ssa_def_dead, block->live_in); + nir_foreach_src(instr, set_src_live, block->live_in); + } + + /* Walk over all of the predecessors of the current block updating + * their live in with the live out of this one. If anything has + * changed, add the predecessor to the work list so that we ensure + * that the new information is used. + */ + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *)entry->key; + if (propagate_across_edge(pred, block, &state)) + nir_block_worklist_push_tail(&state.worklist, pred); + } + } + + nir_block_worklist_fini(&state.worklist); +} + +static bool +src_does_not_use_def(nir_src *src, void *def) +{ + return !src->is_ssa || src->ssa != (nir_ssa_def *)def; +} + +static bool +search_for_use_after_instr(nir_instr *start, nir_ssa_def *def) +{ + /* Only look for a use strictly after the given instruction */ + struct exec_node *node = start->node.next; + while (!exec_node_is_tail_sentinel(node)) { + nir_instr *instr = exec_node_data(nir_instr, node, node); + if (!nir_foreach_src(instr, src_does_not_use_def, def)) + return true; + node = node->next; + } + return false; +} + +/* Returns true if def is live at instr assuming that def comes before + * instr in a pre DFS search of the dominance tree. + */ +static bool +nir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr) +{ + if (BITSET_TEST(instr->block->live_out, def->live_index)) { + /* Since def dominates instr, if def is in the liveout of the block, + * it's live at instr + */ + return true; + } else { + if (BITSET_TEST(instr->block->live_in, def->live_index) || + def->parent_instr->block == instr->block) { + /* In this case it is either live coming into instr's block or it + * is defined in the same block. In this case, we simply need to + * see if it is used after instr. + */ + return search_for_use_after_instr(instr, def); + } else { + return false; + } + } +} + +bool +nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b) +{ + if (a->parent_instr == b->parent_instr) { + /* Two variables defined at the same time interfere assuming at + * least one isn't dead. + */ + return true; + } else if (a->live_index == 0 || b->live_index == 0) { + /* If either variable is an ssa_undef, then there's no interference */ + return false; + } else if (a->live_index < b->live_index) { + return nir_ssa_def_is_live_at(a, b->parent_instr); + } else { + return nir_ssa_def_is_live_at(b, a->parent_instr); + } +} diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c new file mode 100644 index 00000000000..0a27e66cf0f --- /dev/null +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c @@ -0,0 +1,210 @@ +/* + * Copyright © 2014-2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +/** @file nir_lower_alu_to_scalar.c + * + * Replaces nir_alu_instr operations with more than one channel used in the + * arguments with individual per-channel operations. + */ + +static void +nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components) +{ + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); + instr->dest.write_mask = (1 << num_components) - 1; +} + +static void +lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op, + nir_builder *builder) +{ + unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; + + nir_ssa_def *last = NULL; + for (unsigned i = 0; i < num_components; i++) { + nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op); + nir_alu_ssa_dest_init(chan, 1); + nir_alu_src_copy(&chan->src[0], &instr->src[0], chan); + chan->src[0].swizzle[0] = chan->src[0].swizzle[i]; + if (nir_op_infos[chan_op].num_inputs > 1) { + assert(nir_op_infos[chan_op].num_inputs == 2); + nir_alu_src_copy(&chan->src[1], &instr->src[1], chan); + chan->src[1].swizzle[0] = chan->src[1].swizzle[i]; + } + + nir_builder_instr_insert(builder, &chan->instr); + + if (i == 0) { + last = &chan->dest.dest.ssa; + } else { + last = nir_build_alu(builder, merge_op, + last, &chan->dest.dest.ssa, NULL, NULL); + } + } + + assert(instr->dest.write_mask == 1); + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last)); + nir_instr_remove(&instr->instr); +} + +static void +lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) +{ + unsigned num_src = nir_op_infos[instr->op].num_inputs; + unsigned i, chan; + + assert(instr->dest.dest.is_ssa); + assert(instr->dest.write_mask != 0); + + b->cursor = nir_before_instr(&instr->instr); + +#define LOWER_REDUCTION(name, chan, merge) \ + case name##2: \ + case name##3: \ + case name##4: \ + lower_reduction(instr, chan, merge, b); \ + return; + + switch (instr->op) { + case nir_op_vec4: + case nir_op_vec3: + case nir_op_vec2: + /* We don't need to scalarize these ops, they're the ones generated to + * group up outputs into a value that can be SSAed. + */ + return; + + case nir_op_unpack_unorm_4x8: + case nir_op_unpack_snorm_4x8: + case nir_op_unpack_unorm_2x16: + case nir_op_unpack_snorm_2x16: + /* There is no scalar version of these ops, unless we were to break it + * down to bitshifts and math (which is definitely not intended). + */ + return; + + case nir_op_unpack_half_2x16: + /* We could split this into unpack_half_2x16_split_[xy], but should + * we? + */ + return; + + case nir_op_fdph: { + nir_ssa_def *sum[4]; + for (unsigned i = 0; i < 3; i++) { + sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa, + instr->src[0].swizzle[i]), + nir_channel(b, instr->src[1].src.ssa, + instr->src[1].swizzle[i])); + } + sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]); + + nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]), + nir_fadd(b, sum[2], sum[3])); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); + nir_instr_remove(&instr->instr); + return; + } + + LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); + LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand); + LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand); + LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior); + LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior); + LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand); + LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for); + + default: + break; + } + + if (instr->dest.dest.ssa.num_components == 1) + return; + + unsigned num_components = instr->dest.dest.ssa.num_components; + nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL }; + + for (chan = 0; chan < 4; chan++) { + if (!(instr->dest.write_mask & (1 << chan))) + continue; + + nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op); + for (i = 0; i < num_src; i++) { + /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar + * args (input_sizes[] == 1). + */ + assert(nir_op_infos[instr->op].input_sizes[i] < 2); + unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ? + 0 : chan); + + nir_alu_src_copy(&lower->src[i], &instr->src[i], lower); + for (int j = 0; j < 4; j++) + lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan]; + } + + nir_alu_ssa_dest_init(lower, 1); + lower->dest.saturate = instr->dest.saturate; + comps[chan] = &lower->dest.dest.ssa; + + nir_builder_instr_insert(b, &lower->instr); + } + + nir_ssa_def *vec = nir_vec(b, comps, num_components); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); + + nir_instr_remove(&instr->instr); +} + +static bool +lower_alu_to_scalar_block(nir_block *block, void *builder) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_alu) + lower_alu_instr_scalar(nir_instr_as_alu(instr), builder); + } + + return true; +} + +static void +nir_lower_alu_to_scalar_impl(nir_function_impl *impl) +{ + nir_builder builder; + nir_builder_init(&builder, impl); + + nir_foreach_block(impl, lower_alu_to_scalar_block, &builder); +} + +void +nir_lower_alu_to_scalar(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_alu_to_scalar_impl(function->impl); + } +} diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c new file mode 100644 index 00000000000..2cbc1b75348 --- /dev/null +++ b/src/compiler/nir/nir_lower_atomics.c @@ -0,0 +1,166 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "glsl/ir_uniform.h" +#include "nir.h" +#include "main/config.h" +#include + +typedef struct { + const struct gl_shader_program *shader_program; + nir_shader *shader; +} lower_atomic_state; + +/* + * replace atomic counter intrinsics that use a variable with intrinsics + * that directly store the buffer index and byte offset + */ + +static void +lower_instr(nir_intrinsic_instr *instr, + lower_atomic_state *state) +{ + nir_intrinsic_op op; + switch (instr->intrinsic) { + case nir_intrinsic_atomic_counter_read_var: + op = nir_intrinsic_atomic_counter_read; + break; + + case nir_intrinsic_atomic_counter_inc_var: + op = nir_intrinsic_atomic_counter_inc; + break; + + case nir_intrinsic_atomic_counter_dec_var: + op = nir_intrinsic_atomic_counter_dec; + break; + + default: + return; + } + + if (instr->variables[0]->var->data.mode != nir_var_uniform && + instr->variables[0]->var->data.mode != nir_var_shader_storage) + return; /* atomics passed as function arguments can't be lowered */ + + void *mem_ctx = ralloc_parent(instr); + unsigned uniform_loc = instr->variables[0]->var->data.location; + + nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); + new_instr->const_index[0] = + state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; + + nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); + offset_const->value.u[0] = instr->variables[0]->var->data.offset; + + nir_instr_insert_before(&instr->instr, &offset_const->instr); + + nir_ssa_def *offset_def = &offset_const->def; + + nir_deref *tail = &instr->variables[0]->deref; + while (tail->child != NULL) { + assert(tail->child->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + tail = tail->child; + + unsigned child_array_elements = tail->child != NULL ? + glsl_get_aoa_size(tail->type) : 1; + + offset_const->value.u[0] += deref_array->base_offset * + child_array_elements * ATOMIC_COUNTER_SIZE; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_load_const_instr *atomic_counter_size = + nir_load_const_instr_create(mem_ctx, 1); + atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE; + nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr); + + nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul); + nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); + mul->dest.write_mask = 0x1; + nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul); + mul->src[1].src.is_ssa = true; + mul->src[1].src.ssa = &atomic_counter_size->def; + nir_instr_insert_before(&instr->instr, &mul->instr); + + nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd); + nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); + add->dest.write_mask = 0x1; + add->src[0].src.is_ssa = true; + add->src[0].src.ssa = &mul->dest.dest.ssa; + add->src[1].src.is_ssa = true; + add->src[1].src.ssa = offset_def; + nir_instr_insert_before(&instr->instr, &add->instr); + + offset_def = &add->dest.dest.ssa; + } + } + + new_instr->src[0].is_ssa = true; + new_instr->src[0].ssa = offset_def; + + if (instr->dest.is_ssa) { + nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, + instr->dest.ssa.num_components, NULL); + nir_ssa_def_rewrite_uses(&instr->dest.ssa, + nir_src_for_ssa(&new_instr->dest.ssa)); + } else { + nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx); + } + + nir_instr_insert_before(&instr->instr, &new_instr->instr); + nir_instr_remove(&instr->instr); +} + +static bool +lower_block(nir_block *block, void *state) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_intrinsic) + lower_instr(nir_instr_as_intrinsic(instr), + (lower_atomic_state *) state); + } + + return true; +} + +void +nir_lower_atomics(nir_shader *shader, + const struct gl_shader_program *shader_program) +{ + lower_atomic_state state = { + .shader = shader, + .shader_program = shader_program, + }; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_foreach_block(function->impl, lower_block, (void *) &state); + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } +} diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c new file mode 100644 index 00000000000..0ca6a289396 --- /dev/null +++ b/src/compiler/nir/nir_lower_clip.c @@ -0,0 +1,339 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "nir.h" +#include "nir_builder.h" + +#define MAX_CLIP_PLANES 8 + +/* Generates the lowering code for user-clip-planes, generating CLIPDIST + * from UCP[n] + CLIPVERTEX or POSITION. Additionally, an optional pass + * for fragment shaders to insert conditional kill's based on the inter- + * polated CLIPDIST + * + * NOTE: should be run after nir_lower_outputs_to_temporaries() (or at + * least in scenarios where you can count on each output written once + * and only once). + */ + + +static nir_variable * +create_clipdist_var(nir_shader *shader, unsigned drvloc, + bool output, gl_varying_slot slot) +{ + nir_variable *var = rzalloc(shader, nir_variable); + + var->data.driver_location = drvloc; + var->type = glsl_vec4_type(); + var->data.mode = output ? nir_var_shader_out : nir_var_shader_in; + var->name = ralloc_asprintf(var, "clipdist_%d", drvloc); + var->data.index = 0; + var->data.location = slot; + + if (output) { + exec_list_push_tail(&shader->outputs, &var->node); + shader->num_outputs++; /* TODO use type_size() */ + } + else { + exec_list_push_tail(&shader->inputs, &var->node); + shader->num_inputs++; /* TODO use type_size() */ + } + return var; +} + +static void +store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val) +{ + nir_intrinsic_instr *store; + + store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); + store->num_components = 4; + store->const_index[0] = out->data.driver_location; + store->const_index[1] = 0xf; /* wrmask */ + store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]); + store->src[0].is_ssa = true; + store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_builder_instr_insert(b, &store->instr); +} + +static void +load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val) +{ + nir_intrinsic_instr *load; + + load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); + load->num_components = 4; + load->const_index[0] = in->data.driver_location; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); + nir_builder_instr_insert(b, &load->instr); + + val[0] = nir_channel(b, &load->dest.ssa, 0); + val[1] = nir_channel(b, &load->dest.ssa, 1); + val[2] = nir_channel(b, &load->dest.ssa, 2); + val[3] = nir_channel(b, &load->dest.ssa, 3); +} + +struct find_output_state +{ + unsigned drvloc; + nir_ssa_def *def; +}; + +static bool +find_output_in_block(nir_block *block, void *void_state) +{ + struct find_output_state *state = void_state; + nir_foreach_instr(block, instr) { + + if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + if ((intr->intrinsic == nir_intrinsic_store_output) && + intr->const_index[0] == state->drvloc) { + assert(state->def == NULL); + assert(intr->src[0].is_ssa); + assert(nir_src_as_const_value(intr->src[1])); + state->def = intr->src[0].ssa; + +#if !defined(DEBUG) + /* for debug builds, scan entire shader to assert + * if output is written multiple times. For release + * builds just assume all is well and bail when we + * find first: + */ + return false; +#endif + } + } + } + + return true; +} + +/* TODO: maybe this would be a useful helper? + * NOTE: assumes each output is written exactly once (and unconditionally) + * so if needed nir_lower_outputs_to_temporaries() + */ +static nir_ssa_def * +find_output(nir_shader *shader, unsigned drvloc) +{ + struct find_output_state state = { + .drvloc = drvloc, + }; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_foreach_block_reverse(function->impl, + find_output_in_block, &state); + } + } + + return state.def; +} + +/* + * VS lowering + */ + +static void +lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables, + nir_ssa_def *cv, nir_variable **out) +{ + nir_ssa_def *clipdist[MAX_CLIP_PLANES]; + nir_builder b; + + nir_builder_init(&b, impl); + + /* NIR should ensure that, even in case of loops/if-else, there + * should be only a single predecessor block to end_block, which + * makes the perfect place to insert the clipdist calculations. + * + * NOTE: in case of early return's, these would have to be lowered + * to jumps to end_block predecessor in a previous pass. Not sure + * if there is a good way to sanity check this, but for now the + * users of this pass don't support sub-routines. + */ + assert(impl->end_block->predecessors->entries == 1); + b.cursor = nir_after_cf_list(&impl->body); + + for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { + if (ucp_enables & (1 << plane)) { + nir_ssa_def *ucp = + nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, plane); + + /* calculate clipdist[plane] - dot(ucp, cv): */ + clipdist[plane] = nir_fdot4(&b, ucp, cv); + } + else { + /* 0.0 == don't-clip == disabled: */ + clipdist[plane] = nir_imm_float(&b, 0.0); + } + } + + if (ucp_enables & 0x0f) + store_clipdist_output(&b, out[0], &clipdist[0]); + if (ucp_enables & 0xf0) + store_clipdist_output(&b, out[1], &clipdist[4]); + + nir_metadata_preserve(impl, nir_metadata_dominance); +} + +/* ucp_enables is bitmask of enabled ucp's. Actual ucp values are + * passed in to shader via user_clip_plane system-values + */ +void +nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables) +{ + int clipvertex = -1; + int position = -1; + int maxloc = -1; + nir_ssa_def *cv; + nir_variable *out[2] = { NULL }; + + if (!ucp_enables) + return; + + /* find clipvertex/position outputs: */ + nir_foreach_variable(var, &shader->outputs) { + int loc = var->data.driver_location; + + /* keep track of last used driver-location.. we'll be + * appending CLIP_DIST0/CLIP_DIST1 after last existing + * output: + */ + maxloc = MAX2(maxloc, loc); + + switch (var->data.location) { + case VARYING_SLOT_POS: + position = loc; + break; + case VARYING_SLOT_CLIP_VERTEX: + clipvertex = loc; + break; + case VARYING_SLOT_CLIP_DIST0: + case VARYING_SLOT_CLIP_DIST1: + /* if shader is already writing CLIPDIST, then + * there should be no user-clip-planes to deal + * with. + */ + return; + } + } + + if (clipvertex != -1) + cv = find_output(shader, clipvertex); + else if (position != -1) + cv = find_output(shader, position); + else + return; + + /* insert CLIPDIST outputs: */ + if (ucp_enables & 0x0f) + out[0] = + create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST0); + if (ucp_enables & 0xf0) + out[1] = + create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST1); + + nir_foreach_function(shader, function) { + if (!strcmp(function->name, "main")) + lower_clip_vs(function->impl, ucp_enables, cv, out); + } +} + +/* + * FS lowering + */ + +static void +lower_clip_fs(nir_function_impl *impl, unsigned ucp_enables, + nir_variable **in) +{ + nir_ssa_def *clipdist[MAX_CLIP_PLANES]; + nir_builder b; + + nir_builder_init(&b, impl); + b.cursor = nir_before_cf_list(&impl->body); + + if (ucp_enables & 0x0f) + load_clipdist_input(&b, in[0], &clipdist[0]); + if (ucp_enables & 0xf0) + load_clipdist_input(&b, in[1], &clipdist[4]); + + for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { + if (ucp_enables & (1 << plane)) { + nir_intrinsic_instr *discard; + nir_ssa_def *cond; + + cond = nir_flt(&b, clipdist[plane], nir_imm_float(&b, 0.0)); + + discard = nir_intrinsic_instr_create(b.shader, + nir_intrinsic_discard_if); + discard->src[0] = nir_src_for_ssa(cond); + nir_builder_instr_insert(&b, &discard->instr); + } + } +} + +/* insert conditional kill based on interpolated CLIPDIST + */ +void +nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables) +{ + nir_variable *in[2]; + int maxloc = -1; + + if (!ucp_enables) + return; + + nir_foreach_variable(var, &shader->inputs) { + int loc = var->data.driver_location; + + /* keep track of last used driver-location.. we'll be + * appending CLIP_DIST0/CLIP_DIST1 after last existing + * input: + */ + maxloc = MAX2(maxloc, loc); + } + + /* The shader won't normally have CLIPDIST inputs, so we + * must add our own: + */ + /* insert CLIPDIST outputs: */ + if (ucp_enables & 0x0f) + in[0] = + create_clipdist_var(shader, ++maxloc, false, + VARYING_SLOT_CLIP_DIST0); + if (ucp_enables & 0xf0) + in[1] = + create_clipdist_var(shader, ++maxloc, false, + VARYING_SLOT_CLIP_DIST1); + + nir_foreach_function(shader, function) { + if (!strcmp(function->name, "main")) + lower_clip_fs(function->impl, ucp_enables, in); + } +} diff --git a/src/compiler/nir/nir_lower_global_vars_to_local.c b/src/compiler/nir/nir_lower_global_vars_to_local.c new file mode 100644 index 00000000000..7b4cd4ee8dc --- /dev/null +++ b/src/compiler/nir/nir_lower_global_vars_to_local.c @@ -0,0 +1,113 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +/* + * This lowering pass detects when a global variable is only being used by + * one function and makes it local to that function + */ + +#include "nir.h" + +struct global_to_local_state { + nir_function_impl *impl; + /* A hash table keyed on variable pointers that stores the unique + * nir_function_impl that uses the given variable. If a variable is + * used in multiple functions, the data for the given key will be NULL. + */ + struct hash_table *var_func_table; +}; + +static bool +mark_global_var_uses_block(nir_block *block, void *void_state) +{ + struct global_to_local_state *state = void_state; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + unsigned num_vars = nir_intrinsic_infos[intrin->intrinsic].num_variables; + + for (unsigned i = 0; i < num_vars; i++) { + nir_variable *var = intrin->variables[i]->var; + if (var->data.mode != nir_var_global) + continue; + + struct hash_entry *entry = + _mesa_hash_table_search(state->var_func_table, var); + + if (entry) { + if (entry->data != state->impl) + entry->data = NULL; + } else { + _mesa_hash_table_insert(state->var_func_table, var, state->impl); + } + } + } + + return true; +} + +bool +nir_lower_global_vars_to_local(nir_shader *shader) +{ + struct global_to_local_state state; + bool progress = false; + + state.var_func_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + nir_foreach_function(shader, function) { + if (function->impl) { + state.impl = function->impl; + nir_foreach_block(function->impl, mark_global_var_uses_block, &state); + } + } + + struct hash_entry *entry; + hash_table_foreach(state.var_func_table, entry) { + nir_variable *var = (void *)entry->key; + nir_function_impl *impl = entry->data; + + assert(var->data.mode == nir_var_global); + + if (impl != NULL) { + exec_node_remove(&var->node); + var->data.mode = nir_var_local; + exec_list_push_tail(&impl->locals, &var->node); + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_ssa_defs); + progress = true; + } + } + + _mesa_hash_table_destroy(state.var_func_table, NULL); + + return progress; +} diff --git a/src/compiler/nir/nir_lower_gs_intrinsics.c b/src/compiler/nir/nir_lower_gs_intrinsics.c new file mode 100644 index 00000000000..fdff1656b4d --- /dev/null +++ b/src/compiler/nir/nir_lower_gs_intrinsics.c @@ -0,0 +1,219 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" + +/** + * \file nir_lower_gs_intrinsics.c + * + * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an + * arbitrary number of vertices. However, the shader must declare the maximum + * number of vertices that it will ever output - further attempts to emit + * vertices result in undefined behavior according to the GLSL specification. + * + * Drivers might use this maximum number of vertices to allocate enough space + * to hold the geometry shader's output. Some drivers (such as i965) need to + * implement "safety checks" which ensure that the shader hasn't emitted too + * many vertices, to avoid overflowing that space and trashing other memory. + * + * The count of emitted vertices can also be useful in buffer offset + * calculations, so drivers know where to write the GS output. + * + * However, for simple geometry shaders that emit a statically determinable + * number of vertices, this extra bookkeeping is unnecessary and inefficient. + * By tracking the vertex count in NIR, we allow constant folding/propagation + * and dead control flow optimizations to eliminate most of it where possible. + * + * This pass introduces a new global variable which stores the current vertex + * count (initialized to 0), and converts emit_vertex/end_primitive intrinsics + * to their *_with_counter variants. emit_vertex is also wrapped in a safety + * check to avoid buffer overflows. Finally, it adds a set_vertex_count + * intrinsic at the end of the program, informing the driver of the final + * vertex count. + */ + +struct state { + nir_builder *builder; + nir_variable *vertex_count_var; + bool progress; +}; + +/** + * Replace emit_vertex intrinsics with: + * + * if (vertex_count < max_vertices) { + * emit_vertex_with_counter vertex_count ... + * vertex_count += 1 + * } + */ +static void +rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state) +{ + nir_builder *b = state->builder; + + /* Load the vertex count */ + b->cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); + + nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->info.gs.vertices_out); + + /* Create: if (vertex_count < max_vertices) and insert it. + * + * The new if statement needs to be hooked up to the control flow graph + * before we start inserting instructions into it. + */ + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(nir_ilt(b, count, max_vertices)); + nir_builder_cf_insert(b, &if_stmt->cf_node); + + /* Fill out the new then-block */ + b->cursor = nir_after_cf_list(&if_stmt->then_list); + + nir_intrinsic_instr *lowered = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_emit_vertex_with_counter); + lowered->const_index[0] = intrin->const_index[0]; + lowered->src[0] = nir_src_for_ssa(count); + nir_builder_instr_insert(b, &lowered->instr); + + /* Increment the vertex count by 1 */ + nir_store_var(b, state->vertex_count_var, + nir_iadd(b, count, nir_imm_int(b, 1)), + 0x1); /* .x */ + + nir_instr_remove(&intrin->instr); + + state->progress = true; +} + +/** + * Replace end_primitive with end_primitive_with_counter. + */ +static void +rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state) +{ + nir_builder *b = state->builder; + + b->cursor = nir_before_instr(&intrin->instr); + nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); + + nir_intrinsic_instr *lowered = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_end_primitive_with_counter); + lowered->const_index[0] = intrin->const_index[0]; + lowered->src[0] = nir_src_for_ssa(count); + nir_builder_instr_insert(b, &lowered->instr); + + nir_instr_remove(&intrin->instr); + + state->progress = true; +} + +static bool +rewrite_intrinsics(nir_block *block, void *closure) +{ + struct state *state = closure; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + switch (intrin->intrinsic) { + case nir_intrinsic_emit_vertex: + rewrite_emit_vertex(intrin, state); + break; + case nir_intrinsic_end_primitive: + rewrite_end_primitive(intrin, state); + break; + default: + /* not interesting; skip this */ + break; + } + } + + return true; +} + +/** + * Add a set_vertex_count intrinsic at the end of the program + * (representing the final vertex count). + */ +static void +append_set_vertex_count(nir_block *end_block, struct state *state) +{ + nir_builder *b = state->builder; + nir_shader *shader = state->builder->shader; + + /* Insert the new intrinsic in all of the predecessors of the end block, + * but before any jump instructions (return). + */ + struct set_entry *entry; + set_foreach(end_block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + b->cursor = nir_after_block_before_jump(pred); + + nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); + + nir_intrinsic_instr *set_vertex_count = + nir_intrinsic_instr_create(shader, nir_intrinsic_set_vertex_count); + set_vertex_count->src[0] = nir_src_for_ssa(count); + + nir_builder_instr_insert(b, &set_vertex_count->instr); + } +} + +bool +nir_lower_gs_intrinsics(nir_shader *shader) +{ + struct state state; + state.progress = false; + + /* Create the counter variable */ + nir_variable *var = rzalloc(shader, nir_variable); + var->data.mode = nir_var_global; + var->type = glsl_uint_type(); + var->name = "vertex_count"; + var->constant_initializer = rzalloc(shader, nir_constant); /* initialize to 0 */ + + exec_list_push_tail(&shader->globals, &var->node); + state.vertex_count_var = var; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_builder b; + nir_builder_init(&b, function->impl); + state.builder = &b; + + nir_foreach_block(function->impl, rewrite_intrinsics, &state); + + /* This only works because we have a single main() function. */ + append_set_vertex_count(function->impl->end_block, &state); + + nir_metadata_preserve(function->impl, 0); + } + } + + return state.progress; +} diff --git a/src/compiler/nir/nir_lower_idiv.c b/src/compiler/nir/nir_lower_idiv.c new file mode 100644 index 00000000000..a084ad9c0e5 --- /dev/null +++ b/src/compiler/nir/nir_lower_idiv.c @@ -0,0 +1,151 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "nir.h" +#include "nir_builder.h" + +/* Lowers idiv/udiv/umod + * Based on NV50LegalizeSSA::handleDIV() + * + * Note that this is probably not enough precision for compute shaders. + * Perhaps we want a second higher precision (looping) version of this? + * Or perhaps we assume if you can do compute shaders you can also + * branch out to a pre-optimized shader library routine.. + */ + +static void +convert_instr(nir_builder *bld, nir_alu_instr *alu) +{ + nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r; + nir_op op = alu->op; + bool is_signed; + + if ((op != nir_op_idiv) && + (op != nir_op_udiv) && + (op != nir_op_umod)) + return; + + is_signed = (op == nir_op_idiv); + + bld->cursor = nir_before_instr(&alu->instr); + + numer = nir_ssa_for_alu_src(bld, alu, 0); + denom = nir_ssa_for_alu_src(bld, alu, 1); + + if (is_signed) { + af = nir_i2f(bld, numer); + bf = nir_i2f(bld, denom); + af = nir_fabs(bld, af); + bf = nir_fabs(bld, bf); + a = nir_iabs(bld, numer); + b = nir_iabs(bld, denom); + } else { + af = nir_u2f(bld, numer); + bf = nir_u2f(bld, denom); + a = numer; + b = denom; + } + + /* get first result: */ + bf = nir_frcp(bld, bf); + bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */ + q = nir_fmul(bld, af, bf); + + if (is_signed) { + q = nir_f2i(bld, q); + } else { + q = nir_f2u(bld, q); + } + + /* get error of first result: */ + r = nir_imul(bld, q, b); + r = nir_isub(bld, a, r); + r = nir_u2f(bld, r); + r = nir_fmul(bld, r, bf); + r = nir_f2u(bld, r); + + /* add quotients: */ + q = nir_iadd(bld, q, r); + + /* correction: if modulus >= divisor, add 1 */ + r = nir_imul(bld, q, b); + r = nir_isub(bld, a, r); + + r = nir_uge(bld, r, b); + r = nir_b2i(bld, r); + + q = nir_iadd(bld, q, r); + if (is_signed) { + /* fix the sign: */ + r = nir_ixor(bld, numer, denom); + r = nir_ushr(bld, r, nir_imm_int(bld, 31)); + r = nir_i2b(bld, r); + b = nir_ineg(bld, q); + q = nir_bcsel(bld, r, b, q); + } + + if (op == nir_op_umod) { + /* division result in q */ + r = nir_imul(bld, q, b); + q = nir_isub(bld, a, r); + } + + assert(alu->dest.dest.is_ssa); + nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q)); +} + +static bool +convert_block(nir_block *block, void *state) +{ + nir_builder *b = state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_alu) + convert_instr(b, nir_instr_as_alu(instr)); + } + + return true; +} + +static void +convert_impl(nir_function_impl *impl) +{ + nir_builder b; + nir_builder_init(&b, impl); + + nir_foreach_block(impl, convert_block, &b); + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_idiv(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + convert_impl(function->impl); + } +} diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c new file mode 100644 index 00000000000..80c5151f0ea --- /dev/null +++ b/src/compiler/nir/nir_lower_io.c @@ -0,0 +1,350 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +/* + * This lowering pass converts references to input/output variables with + * loads/stores to actual input/output intrinsics. + */ + +#include "nir.h" +#include "nir_builder.h" + +struct lower_io_state { + nir_builder builder; + void *mem_ctx; + int (*type_size)(const struct glsl_type *type); + nir_variable_mode mode; +}; + +void +nir_assign_var_locations(struct exec_list *var_list, unsigned *size, + int (*type_size)(const struct glsl_type *)) +{ + unsigned location = 0; + + nir_foreach_variable(var, var_list) { + /* + * UBO's have their own address spaces, so don't count them towards the + * number of global uniforms + */ + if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) && + var->interface_type != NULL) + continue; + + var->data.driver_location = location; + location += type_size(var->type); + } + + *size = location; +} + +/** + * Returns true if we're processing a stage whose inputs are arrays indexed + * by a vertex number (such as geometry shader inputs). + */ +static bool +is_per_vertex_input(struct lower_io_state *state, nir_variable *var) +{ + gl_shader_stage stage = state->builder.shader->stage; + + return var->data.mode == nir_var_shader_in && !var->data.patch && + (stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL || + stage == MESA_SHADER_GEOMETRY); +} + +static bool +is_per_vertex_output(struct lower_io_state *state, nir_variable *var) +{ + gl_shader_stage stage = state->builder.shader->stage; + return var->data.mode == nir_var_shader_out && !var->data.patch && + stage == MESA_SHADER_TESS_CTRL; +} + +static nir_ssa_def * +get_io_offset(nir_builder *b, nir_deref_var *deref, + nir_ssa_def **vertex_index, + int (*type_size)(const struct glsl_type *)) +{ + nir_deref *tail = &deref->deref; + + /* For per-vertex input arrays (i.e. geometry shader inputs), keep the + * outermost array index separate. Process the rest normally. + */ + if (vertex_index != NULL) { + tail = tail->child; + assert(tail->deref_type == nir_deref_type_array); + nir_deref_array *deref_array = nir_deref_as_array(tail); + + nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset); + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1)); + } + *vertex_index = vtx; + } + + /* Just emit code and let constant-folding go to town */ + nir_ssa_def *offset = nir_imm_int(b, 0); + + while (tail->child != NULL) { + const struct glsl_type *parent_type = tail->type; + tail = tail->child; + + if (tail->deref_type == nir_deref_type_array) { + nir_deref_array *deref_array = nir_deref_as_array(tail); + unsigned size = type_size(tail->type); + + offset = nir_iadd(b, offset, + nir_imm_int(b, size * deref_array->base_offset)); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_ssa_def *mul = + nir_imul(b, nir_imm_int(b, size), + nir_ssa_for_src(b, deref_array->indirect, 1)); + + offset = nir_iadd(b, offset, mul); + } + } else if (tail->deref_type == nir_deref_type_struct) { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + + unsigned field_offset = 0; + for (unsigned i = 0; i < deref_struct->index; i++) { + field_offset += type_size(glsl_get_struct_field(parent_type, i)); + } + offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); + } + } + + return offset; +} + +static nir_intrinsic_op +load_op(struct lower_io_state *state, + nir_variable_mode mode, bool per_vertex) +{ + nir_intrinsic_op op; + switch (mode) { + case nir_var_shader_in: + op = per_vertex ? nir_intrinsic_load_per_vertex_input : + nir_intrinsic_load_input; + break; + case nir_var_shader_out: + op = per_vertex ? nir_intrinsic_load_per_vertex_output : + nir_intrinsic_load_output; + break; + case nir_var_uniform: + op = nir_intrinsic_load_uniform; + break; + default: + unreachable("Unknown variable mode"); + } + return op; +} + +static bool +nir_lower_io_block(nir_block *block, void *void_state) +{ + struct lower_io_state *state = void_state; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic != nir_intrinsic_load_var && + intrin->intrinsic != nir_intrinsic_store_var) + continue; + + nir_variable_mode mode = intrin->variables[0]->var->data.mode; + + if (state->mode != nir_var_all && state->mode != mode) + continue; + + if (mode != nir_var_shader_in && + mode != nir_var_shader_out && + mode != nir_var_uniform) + continue; + + b->cursor = nir_before_instr(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + bool per_vertex = + is_per_vertex_input(state, intrin->variables[0]->var) || + is_per_vertex_output(state, intrin->variables[0]->var); + + nir_ssa_def *offset; + nir_ssa_def *vertex_index; + + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(state->mem_ctx, + load_op(state, mode, per_vertex)); + load->num_components = intrin->num_components; + + load->const_index[0] = + intrin->variables[0]->var->data.driver_location; + + if (per_vertex) + load->src[0] = nir_src_for_ssa(vertex_index); + + load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset); + + if (intrin->dest.is_ssa) { + nir_ssa_dest_init(&load->instr, &load->dest, + intrin->num_components, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&load->dest.ssa)); + } else { + nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx); + } + + nir_instr_insert_before(&intrin->instr, &load->instr); + nir_instr_remove(&intrin->instr); + break; + } + + case nir_intrinsic_store_var: { + assert(mode == nir_var_shader_out); + + nir_ssa_def *offset; + nir_ssa_def *vertex_index; + + bool per_vertex = + is_per_vertex_output(state, intrin->variables[0]->var); + + offset = get_io_offset(b, intrin->variables[0], + per_vertex ? &vertex_index : NULL, + state->type_size); + + nir_intrinsic_op store_op = + per_vertex ? nir_intrinsic_store_per_vertex_output : + nir_intrinsic_store_output; + + nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, + store_op); + store->num_components = intrin->num_components; + + nir_src_copy(&store->src[0], &intrin->src[0], store); + + store->const_index[0] = + intrin->variables[0]->var->data.driver_location; + + /* Copy the writemask */ + store->const_index[1] = intrin->const_index[0]; + + if (per_vertex) + store->src[1] = nir_src_for_ssa(vertex_index); + + store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset); + + nir_instr_insert_before(&intrin->instr, &store->instr); + nir_instr_remove(&intrin->instr); + break; + } + + default: + break; + } + } + + return true; +} + +static void +nir_lower_io_impl(nir_function_impl *impl, + nir_variable_mode mode, + int (*type_size)(const struct glsl_type *)) +{ + struct lower_io_state state; + + nir_builder_init(&state.builder, impl); + state.mem_ctx = ralloc_parent(impl); + state.mode = mode; + state.type_size = type_size; + + nir_foreach_block(impl, nir_lower_io_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_io(nir_shader *shader, nir_variable_mode mode, + int (*type_size)(const struct glsl_type *)) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_io_impl(function->impl, mode, type_size); + } +} + +/** + * Return the offset soruce for a load/store intrinsic. + */ +nir_src * +nir_get_io_offset_src(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_input: + case nir_intrinsic_load_output: + case nir_intrinsic_load_uniform: + return &instr->src[0]; + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + case nir_intrinsic_store_output: + return &instr->src[1]; + case nir_intrinsic_store_per_vertex_output: + return &instr->src[2]; + default: + return NULL; + } +} + +/** + * Return the vertex index source for a load/store per_vertex intrinsic. + */ +nir_src * +nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) +{ + switch (instr->intrinsic) { + case nir_intrinsic_load_per_vertex_input: + case nir_intrinsic_load_per_vertex_output: + return &instr->src[0]; + case nir_intrinsic_store_per_vertex_output: + return &instr->src[1]; + default: + return NULL; + } +} diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c b/src/compiler/nir/nir_lower_load_const_to_scalar.c new file mode 100644 index 00000000000..1eeed13cbac --- /dev/null +++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c @@ -0,0 +1,89 @@ +/* + * Copyright © 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "util/macros.h" +#include "nir.h" +#include "nir_builder.h" + +/** @file nir_lower_load_const_to_scalar.c + * + * Replaces vector nir_load_const instructions with a series of loads and a + * vec[234] to reconstruct the original vector (on the assumption that + * nir_lower_alu_to_scalar() will then be used to split it up). + * + * This gives NIR a chance to CSE more operations on a scalar shader, when the + * same value was used in different vector contant loads. + */ + +static void +lower_load_const_instr_scalar(nir_load_const_instr *lower) +{ + if (lower->def.num_components == 1) + return; + + nir_builder b; + nir_builder_init(&b, nir_cf_node_get_function(&lower->instr.block->cf_node)); + b.cursor = nir_before_instr(&lower->instr); + + /* Emit the individual loads. */ + nir_ssa_def *loads[4]; + for (unsigned i = 0; i < lower->def.num_components; i++) { + nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1); + load_comp->value.u[0] = lower->value.u[i]; + nir_builder_instr_insert(&b, &load_comp->instr); + loads[i] = &load_comp->def; + } + + /* Batch things back together into a vector. */ + nir_ssa_def *vec = nir_vec(&b, loads, lower->def.num_components); + + /* Replace the old load with a reference to our reconstructed vector. */ + nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec)); + nir_instr_remove(&lower->instr); +} + +static bool +lower_load_const_to_scalar_block(nir_block *block, void *data) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_load_const) + lower_load_const_instr_scalar(nir_instr_as_load_const(instr)); + } + + return true; +} + +static void +nir_lower_load_const_to_scalar_impl(nir_function_impl *impl) +{ + nir_foreach_block(impl, lower_load_const_to_scalar_block, NULL); +} + +void +nir_lower_load_const_to_scalar(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_load_const_to_scalar_impl(function->impl); + } +} diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c new file mode 100644 index 00000000000..51b0fa733f2 --- /dev/null +++ b/src/compiler/nir/nir_lower_locals_to_regs.c @@ -0,0 +1,396 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" +#include "nir_array.h" + +struct locals_to_regs_state { + nir_shader *shader; + nir_function_impl *impl; + + /* A hash table mapping derefs to registers */ + struct hash_table *regs_table; + + /* A growing array of derefs that we have encountered. There is exactly + * one element of this array per element in the hash table. This is + * used to make adding register initialization code deterministic. + */ + nir_array derefs_array; + + bool progress; +}; + +/* The following two functions implement a hash and equality check for + * variable dreferences. When the hash or equality function encounters an + * array, it ignores the offset and whether it is direct or indirect + * entirely. + */ +static uint32_t +hash_deref(const void *void_deref) +{ + uint32_t hash = _mesa_fnv32_1a_offset_bias; + + const nir_deref_var *deref_var = void_deref; + hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var); + + for (const nir_deref *deref = deref_var->deref.child; + deref; deref = deref->child) { + if (deref->deref_type == nir_deref_type_struct) { + const nir_deref_struct *deref_struct = nir_deref_as_struct(deref); + hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index); + } + } + + return hash; +} + +static bool +derefs_equal(const void *void_a, const void *void_b) +{ + const nir_deref_var *a_var = void_a; + const nir_deref_var *b_var = void_b; + + if (a_var->var != b_var->var) + return false; + + for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child; + a != NULL; a = a->child, b = b->child) { + if (a->deref_type != b->deref_type) + return false; + + if (a->deref_type == nir_deref_type_struct) { + if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index) + return false; + } + /* Do nothing for arrays. They're all the same. */ + + assert((a->child == NULL) == (b->child == NULL)); + if((a->child == NULL) != (b->child == NULL)) + return false; + } + + return true; +} + +static nir_register * +get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state) +{ + uint32_t hash = hash_deref(deref); + + struct hash_entry *entry = + _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref); + if (entry) + return entry->data; + + unsigned array_size = 1; + nir_deref *tail = &deref->deref; + while (tail->child) { + if (tail->child->deref_type == nir_deref_type_array) + array_size *= glsl_get_length(tail->type); + tail = tail->child; + } + + assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type)); + + nir_register *reg = nir_local_reg_create(state->impl); + reg->num_components = glsl_get_vector_elements(tail->type); + reg->num_array_elems = array_size > 1 ? array_size : 0; + + _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg); + nir_array_add(&state->derefs_array, nir_deref_var *, deref); + + return reg; +} + +static nir_src +get_deref_reg_src(nir_deref_var *deref, nir_instr *instr, + struct locals_to_regs_state *state) +{ + nir_src src; + + src.is_ssa = false; + src.reg.reg = get_reg_for_deref(deref, state); + src.reg.base_offset = 0; + src.reg.indirect = NULL; + + /* It is possible for a user to create a shader that has an array with a + * single element and then proceed to access it indirectly. Indirectly + * accessing a non-array register is not allowed in NIR. In order to + * handle this case we just convert it to a direct reference. + */ + if (src.reg.reg->num_array_elems == 0) + return src; + + nir_deref *tail = &deref->deref; + while (tail->child != NULL) { + const struct glsl_type *parent_type = tail->type; + tail = tail->child; + + if (tail->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *deref_array = nir_deref_as_array(tail); + + src.reg.base_offset *= glsl_get_length(parent_type); + src.reg.base_offset += deref_array->base_offset; + + if (src.reg.indirect) { + nir_load_const_instr *load_const = + nir_load_const_instr_create(state->shader, 1); + load_const->value.u[0] = glsl_get_length(parent_type); + nir_instr_insert_before(instr, &load_const->instr); + + nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul); + mul->src[0].src = *src.reg.indirect; + mul->src[1].src.is_ssa = true; + mul->src[1].src.ssa = &load_const->def; + mul->dest.write_mask = 1; + nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); + nir_instr_insert_before(instr, &mul->instr); + + src.reg.indirect->is_ssa = true; + src.reg.indirect->ssa = &mul->dest.dest.ssa; + } + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + if (src.reg.indirect == NULL) { + src.reg.indirect = ralloc(state->shader, nir_src); + nir_src_copy(src.reg.indirect, &deref_array->indirect, + state->shader); + } else { + nir_alu_instr *add = nir_alu_instr_create(state->shader, + nir_op_iadd); + add->src[0].src = *src.reg.indirect; + nir_src_copy(&add->src[1].src, &deref_array->indirect, add); + add->dest.write_mask = 1; + nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); + nir_instr_insert_before(instr, &add->instr); + + src.reg.indirect->is_ssa = true; + src.reg.indirect->ssa = &add->dest.dest.ssa; + } + } + } + + return src; +} + +static bool +lower_locals_to_regs_block(nir_block *block, void *void_state) +{ + struct locals_to_regs_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + if (intrin->variables[0]->var->data.mode != nir_var_local) + continue; + + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); + mov->src[0].src = get_deref_reg_src(intrin->variables[0], + &intrin->instr, state); + mov->dest.write_mask = (1 << intrin->num_components) - 1; + if (intrin->dest.is_ssa) { + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + intrin->num_components, NULL); + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa)); + } else { + nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr); + } + nir_instr_insert_before(&intrin->instr, &mov->instr); + + nir_instr_remove(&intrin->instr); + state->progress = true; + break; + } + + case nir_intrinsic_store_var: { + if (intrin->variables[0]->var->data.mode != nir_var_local) + continue; + + nir_src reg_src = get_deref_reg_src(intrin->variables[0], + &intrin->instr, state); + + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); + nir_src_copy(&mov->src[0].src, &intrin->src[0], mov); + mov->dest.write_mask = intrin->const_index[0]; + mov->dest.dest.is_ssa = false; + mov->dest.dest.reg.reg = reg_src.reg.reg; + mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; + mov->dest.dest.reg.indirect = reg_src.reg.indirect; + + nir_instr_insert_before(&intrin->instr, &mov->instr); + + nir_instr_remove(&intrin->instr); + state->progress = true; + break; + } + + case nir_intrinsic_copy_var: + unreachable("There should be no copies whatsoever at this point"); + break; + + default: + continue; + } + } + + return true; +} + +static nir_block * +compute_reg_usedef_lca(nir_register *reg) +{ + nir_block *lca = NULL; + + list_for_each_entry(nir_dest, def_dest, ®->defs, reg.def_link) + lca = nir_dominance_lca(lca, def_dest->reg.parent_instr->block); + + list_for_each_entry(nir_src, use_src, ®->uses, use_link) + lca = nir_dominance_lca(lca, use_src->parent_instr->block); + + list_for_each_entry(nir_src, use_src, ®->if_uses, use_link) { + nir_cf_node *prev_node = nir_cf_node_prev(&use_src->parent_if->cf_node); + assert(prev_node->type == nir_cf_node_block); + lca = nir_dominance_lca(lca, nir_cf_node_as_block(prev_node)); + } + + return lca; +} + +static void +insert_constant_initializer(nir_deref_var *deref_head, nir_deref *deref_tail, + nir_block *block, + struct locals_to_regs_state *state) +{ + if (deref_tail->child) { + switch (deref_tail->child->deref_type) { + case nir_deref_type_array: { + unsigned array_elems = glsl_get_length(deref_tail->type); + + nir_deref_array arr_deref; + arr_deref.deref = *deref_tail->child; + arr_deref.deref_array_type = nir_deref_array_type_direct; + + nir_deref *old_child = deref_tail->child; + deref_tail->child = &arr_deref.deref; + for (unsigned i = 0; i < array_elems; i++) { + arr_deref.base_offset = i; + insert_constant_initializer(deref_head, &arr_deref.deref, + block, state); + } + deref_tail->child = old_child; + return; + } + + case nir_deref_type_struct: + insert_constant_initializer(deref_head, deref_tail->child, + block, state); + return; + + default: + unreachable("Invalid deref child type"); + } + } + + assert(deref_tail->child == NULL); + + nir_load_const_instr *load = + nir_deref_get_const_initializer_load(state->shader, deref_head); + nir_instr_insert_before_block(block, &load->instr); + + nir_src reg_src = get_deref_reg_src(deref_head, &load->instr, state); + + nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); + mov->src[0].src = nir_src_for_ssa(&load->def); + mov->dest.write_mask = (1 << load->def.num_components) - 1; + mov->dest.dest.is_ssa = false; + mov->dest.dest.reg.reg = reg_src.reg.reg; + mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; + mov->dest.dest.reg.indirect = reg_src.reg.indirect; + + nir_instr_insert_after(&load->instr, &mov->instr); + state->progress = true; +} + +static bool +nir_lower_locals_to_regs_impl(nir_function_impl *impl) +{ + struct locals_to_regs_state state; + + state.shader = impl->function->shader; + state.impl = impl; + state.progress = false; + state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal); + nir_array_init(&state.derefs_array, NULL); + + nir_metadata_require(impl, nir_metadata_dominance); + + nir_foreach_block(impl, lower_locals_to_regs_block, &state); + + nir_array_foreach(&state.derefs_array, nir_deref_var *, deref_ptr) { + nir_deref_var *deref = *deref_ptr; + struct hash_entry *deref_entry = + _mesa_hash_table_search(state.regs_table, deref); + assert(deref_entry && deref_entry->key == deref); + nir_register *reg = (nir_register *)deref_entry->data; + + if (deref->var->constant_initializer == NULL) + continue; + + nir_block *usedef_lca = compute_reg_usedef_lca(reg); + + insert_constant_initializer(deref, &deref->deref, usedef_lca, &state); + } + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + nir_array_fini(&state.derefs_array); + _mesa_hash_table_destroy(state.regs_table, NULL); + + return state.progress; +} + +bool +nir_lower_locals_to_regs(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = nir_lower_locals_to_regs_impl(function->impl) || progress; + } + + return progress; +} diff --git a/src/compiler/nir/nir_lower_outputs_to_temporaries.c b/src/compiler/nir/nir_lower_outputs_to_temporaries.c new file mode 100644 index 00000000000..71b06b81fcc --- /dev/null +++ b/src/compiler/nir/nir_lower_outputs_to_temporaries.c @@ -0,0 +1,133 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * Implements a pass that lowers output variables to a temporary plus an + * output variable with a single copy at each exit point of the shader. + * This way the output variable is only ever written. + * + * Because valid NIR requires that output variables are never read, this + * pass is more of a helper for NIR producers and must be run before the + * shader is ever validated. + */ + +#include "nir.h" + +struct lower_outputs_state { + nir_shader *shader; + struct exec_list old_outputs; +}; + +static void +emit_output_copies(nir_cursor cursor, struct lower_outputs_state *state) +{ + assert(exec_list_length(&state->shader->outputs) == + exec_list_length(&state->old_outputs)); + + foreach_two_lists(out_node, &state->shader->outputs, + temp_node, &state->old_outputs) { + nir_variable *output = exec_node_data(nir_variable, out_node, node); + nir_variable *temp = exec_node_data(nir_variable, temp_node, node); + + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(state->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_var_create(copy, output); + copy->variables[1] = nir_deref_var_create(copy, temp); + + nir_instr_insert(cursor, ©->instr); + } +} + +static bool +emit_output_copies_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic == nir_intrinsic_emit_vertex) + emit_output_copies(nir_before_instr(&intrin->instr), state); + } + + return true; +} + +void +nir_lower_outputs_to_temporaries(nir_shader *shader) +{ + struct lower_outputs_state state; + + if (shader->stage == MESA_SHADER_TESS_CTRL) + return; + + state.shader = shader; + exec_list_move_nodes_to(&shader->outputs, &state.old_outputs); + + /* Walk over all of the outputs turn each output into a temporary and + * make a new variable for the actual output. + */ + nir_foreach_variable(var, &state.old_outputs) { + nir_variable *output = ralloc(shader, nir_variable); + memcpy(output, var, sizeof *output); + + /* The orignal is now the temporary */ + nir_variable *temp = var; + + /* Reparent the name to the new variable */ + ralloc_steal(output, output->name); + + /* Give the output a new name with @out-temp appended */ + temp->name = ralloc_asprintf(var, "%s@out-temp", output->name); + temp->data.mode = nir_var_global; + temp->constant_initializer = NULL; + + exec_list_push_tail(&shader->outputs, &output->node); + } + + nir_foreach_function(shader, function) { + if (function->impl == NULL) + continue; + + if (shader->stage == MESA_SHADER_GEOMETRY) { + /* For geometry shaders, we have to emit the output copies right + * before each EmitVertex call. + */ + nir_foreach_block(function->impl, emit_output_copies_block, &state); + } else if (strcmp(function->name, "main") == 0) { + /* For all other shader types, we need to do the copies right before + * the jumps to the end block. + */ + struct set_entry *block_entry; + set_foreach(function->impl->end_block->predecessors, block_entry) { + struct nir_block *block = (void *)block_entry->key; + emit_output_copies(nir_after_block_before_jump(block), &state); + } + } + + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + exec_list_append(&shader->globals, &state.old_outputs); +} diff --git a/src/compiler/nir/nir_lower_phis_to_scalar.c b/src/compiler/nir/nir_lower_phis_to_scalar.c new file mode 100644 index 00000000000..dd2abcf72f8 --- /dev/null +++ b/src/compiler/nir/nir_lower_phis_to_scalar.c @@ -0,0 +1,293 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements a pass that lowers vector phi nodes to scalar phi nodes when + * we don't think it will hurt anything. + */ + +struct lower_phis_to_scalar_state { + void *mem_ctx; + void *dead_ctx; + + /* Hash table marking which phi nodes are scalarizable. The key is + * pointers to phi instructions and the entry is either NULL for not + * scalarizable or non-null for scalarizable. + */ + struct hash_table *phi_table; +}; + +static bool +should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state); + +static bool +is_phi_src_scalarizable(nir_phi_src *src, + struct lower_phis_to_scalar_state *state) +{ + /* Don't know what to do with non-ssa sources */ + if (!src->src.is_ssa) + return false; + + nir_instr *src_instr = src->src.ssa->parent_instr; + switch (src_instr->type) { + case nir_instr_type_alu: { + nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); + + /* ALU operations with output_size == 0 should be scalarized. We + * will also see a bunch of vecN operations from scalarizing ALU + * operations and, since they can easily be copy-propagated, they + * are ok too. + */ + return nir_op_infos[src_alu->op].output_size == 0 || + src_alu->op == nir_op_vec2 || + src_alu->op == nir_op_vec3 || + src_alu->op == nir_op_vec4; + } + + case nir_instr_type_phi: + /* A phi is scalarizable if we're going to lower it */ + return should_lower_phi(nir_instr_as_phi(src_instr), state); + + case nir_instr_type_load_const: + case nir_instr_type_ssa_undef: + /* These are trivially scalarizable */ + return true; + + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr); + + switch (src_intrin->intrinsic) { + case nir_intrinsic_load_var: + return src_intrin->variables[0]->var->data.mode == nir_var_shader_in || + src_intrin->variables[0]->var->data.mode == nir_var_uniform; + + case nir_intrinsic_interp_var_at_centroid: + case nir_intrinsic_interp_var_at_sample: + case nir_intrinsic_interp_var_at_offset: + case nir_intrinsic_load_uniform: + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: + case nir_intrinsic_load_input: + return true; + default: + break; + } + } + + default: + /* We can't scalarize this type of instruction */ + return false; + } +} + +/** + * Determines if the given phi node should be lowered. The only phi nodes + * we will scalarize at the moment are those where all of the sources are + * scalarizable. + * + * The reason for this comes down to coalescing. Since phi sources can't + * swizzle, swizzles on phis have to be resolved by inserting a mov right + * before the phi. The choice then becomes between movs to pick off + * components for a scalar phi or potentially movs to recombine components + * for a vector phi. The problem is that the movs generated to pick off + * the components are almost uncoalescable. We can't coalesce them in NIR + * because we need them to pick off components and we can't coalesce them + * in the backend because the source register is a vector and the + * destination is a scalar that may be used at other places in the program. + * On the other hand, if we have a bunch of scalars going into a vector + * phi, the situation is much better. In this case, if the SSA def is + * generated in the predecessor block to the corresponding phi source, the + * backend code will be an ALU op into a temporary and then a mov into the + * given vector component; this move can almost certainly be coalesced + * away. + */ +static bool +should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state) +{ + /* Already scalar */ + if (phi->dest.ssa.num_components == 1) + return false; + + struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi); + if (entry) + return entry->data != NULL; + + /* Insert an entry and mark it as scalarizable for now. That way + * we don't recurse forever and a cycle in the dependence graph + * won't automatically make us fail to scalarize. + */ + entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1); + + bool scalarizable = true; + + nir_foreach_phi_src(phi, src) { + scalarizable = is_phi_src_scalarizable(src, state); + if (!scalarizable) + break; + } + + /* The hash table entry for 'phi' may have changed while recursing the + * dependence graph, so we need to reset it */ + entry = _mesa_hash_table_search(state->phi_table, phi); + assert(entry); + + entry->data = (void *)(intptr_t)scalarizable; + + return scalarizable; +} + +static bool +lower_phis_to_scalar_block(nir_block *block, void *void_state) +{ + struct lower_phis_to_scalar_state *state = void_state; + + /* Find the last phi node in the block */ + nir_phi_instr *last_phi = NULL; + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + last_phi = nir_instr_as_phi(instr); + } + + /* We have to handle the phi nodes in their own pass due to the way + * we're modifying the linked list of instructions. + */ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + if (!should_lower_phi(phi, state)) + continue; + + /* Create a vecN operation to combine the results. Most of these + * will be redundant, but copy propagation should clean them up for + * us. No need to add the complexity here. + */ + nir_op vec_op; + switch (phi->dest.ssa.num_components) { + case 2: vec_op = nir_op_vec2; break; + case 3: vec_op = nir_op_vec3; break; + case 4: vec_op = nir_op_vec4; break; + default: unreachable("Invalid number of components"); + } + + nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, + phi->dest.ssa.num_components, NULL); + vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; + + for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) { + nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx); + nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL); + + vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa); + + nir_foreach_phi_src(phi, src) { + /* We need to insert a mov to grab the i'th component of src */ + nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, + nir_op_imov); + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL); + mov->dest.write_mask = 1; + nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx); + mov->src[0].swizzle[0] = i; + + /* Insert at the end of the predecessor but before the jump */ + nir_instr *pred_last_instr = nir_block_last_instr(src->pred); + if (pred_last_instr && pred_last_instr->type == nir_instr_type_jump) + nir_instr_insert_before(pred_last_instr, &mov->instr); + else + nir_instr_insert_after_block(src->pred, &mov->instr); + + nir_phi_src *new_src = ralloc(new_phi, nir_phi_src); + new_src->pred = src->pred; + new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa); + + exec_list_push_tail(&new_phi->srcs, &new_src->node); + } + + nir_instr_insert_before(&phi->instr, &new_phi->instr); + } + + nir_instr_insert_after(&last_phi->instr, &vec->instr); + + nir_ssa_def_rewrite_uses(&phi->dest.ssa, + nir_src_for_ssa(&vec->dest.dest.ssa)); + + ralloc_steal(state->dead_ctx, phi); + nir_instr_remove(&phi->instr); + + /* We're using the safe iterator and inserting all the newly + * scalarized phi nodes before their non-scalarized version so that's + * ok. However, we are also inserting vec operations after all of + * the last phi node so once we get here, we can't trust even the + * safe iterator to stop properly. We have to break manually. + */ + if (instr == &last_phi->instr) + break; + } + + return true; +} + +static void +lower_phis_to_scalar_impl(nir_function_impl *impl) +{ + struct lower_phis_to_scalar_state state; + + state.mem_ctx = ralloc_parent(impl); + state.dead_ctx = ralloc_context(NULL); + state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + nir_foreach_block(impl, lower_phis_to_scalar_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + ralloc_free(state.dead_ctx); +} + +/** A pass that lowers vector phi nodes to scalar + * + * This pass loops through the blocks and lowers looks for vector phi nodes + * it can lower to scalar phi nodes. Not all phi nodes are lowered. For + * instance, if one of the sources is a non-scalarizable vector, then we + * don't bother lowering because that would generate hard-to-coalesce movs. + */ +void +nir_lower_phis_to_scalar(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + lower_phis_to_scalar_impl(function->impl); + } +} diff --git a/src/compiler/nir/nir_lower_samplers.c b/src/compiler/nir/nir_lower_samplers.c new file mode 100644 index 00000000000..9c912129f09 --- /dev/null +++ b/src/compiler/nir/nir_lower_samplers.c @@ -0,0 +1,187 @@ +/* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "nir.h" +#include "nir_builder.h" +#include "program/hash_table.h" +#include "glsl/ir_uniform.h" + +#include "main/compiler.h" +#include "main/mtypes.h" +#include "program/prog_parameter.h" +#include "program/program.h" + +/* Calculate the sampler index based on array indicies and also + * calculate the base uniform location for struct members. + */ +static void +calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr, + unsigned *array_elements, nir_ssa_def **indirect, + nir_builder *b, unsigned *location) +{ + if (tail->child == NULL) + return; + + switch (tail->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + + assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); + + calc_sampler_offsets(tail->child, instr, array_elements, + indirect, b, location); + instr->sampler_index += deref_array->base_offset * *array_elements; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_ssa_def *mul = + nir_imul(b, nir_imm_int(b, *array_elements), + nir_ssa_for_src(b, deref_array->indirect, 1)); + + nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, + NIR_SRC_INIT); + + if (*indirect) { + *indirect = nir_iadd(b, *indirect, mul); + } else { + *indirect = mul; + } + } + + *array_elements *= glsl_get_length(tail->type); + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child); + *location += glsl_get_record_location_offset(tail->type, deref_struct->index); + calc_sampler_offsets(tail->child, instr, array_elements, + indirect, b, location); + break; + } + + default: + unreachable("Invalid deref type"); + break; + } +} + +static void +lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, + gl_shader_stage stage, nir_builder *builder) +{ + if (instr->sampler == NULL) + return; + + instr->sampler_index = 0; + unsigned location = instr->sampler->var->data.location; + unsigned array_elements = 1; + nir_ssa_def *indirect = NULL; + + builder->cursor = nir_before_instr(&instr->instr); + calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements, + &indirect, builder, &location); + + if (indirect) { + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, + instr->num_srcs + 1); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, + &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; + instr->num_srcs++; + nir_instr_rewrite_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + nir_src_for_ssa(indirect)); + + instr->sampler_array_size = array_elements; + } + + if (location > shader_program->NumUniformStorage - 1 || + !shader_program->UniformStorage[location].opaque[stage].active) { + assert(!"cannot return a sampler"); + return; + } + + instr->sampler_index += + shader_program->UniformStorage[location].opaque[stage].index; + + instr->sampler = NULL; +} + +typedef struct { + nir_builder builder; + const struct gl_shader_program *shader_program; + gl_shader_stage stage; +} lower_state; + +static bool +lower_block_cb(nir_block *block, void *_state) +{ + lower_state *state = (lower_state *) _state; + + nir_foreach_instr(block, instr) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex_instr = nir_instr_as_tex(instr); + lower_sampler(tex_instr, state->shader_program, state->stage, + &state->builder); + } + } + + return true; +} + +static void +lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, + gl_shader_stage stage) +{ + lower_state state; + + nir_builder_init(&state.builder, impl); + state.shader_program = shader_program; + state.stage = stage; + + nir_foreach_block(impl, lower_block_cb, &state); +} + +void +nir_lower_samplers(nir_shader *shader, + const struct gl_shader_program *shader_program) +{ + nir_foreach_function(shader, function) { + if (function->impl) + lower_impl(function->impl, shader_program, shader->stage); + } +} diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c new file mode 100644 index 00000000000..2bd787d3574 --- /dev/null +++ b/src/compiler/nir/nir_lower_system_values.c @@ -0,0 +1,98 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include "nir_builder.h" + +struct lower_system_values_state { + nir_builder builder; + bool progress; +}; + +static bool +convert_block(nir_block *block, void *void_state) +{ + struct lower_system_values_state *state = void_state; + + nir_builder *b = &state->builder; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *load_var = nir_instr_as_intrinsic(instr); + + if (load_var->intrinsic != nir_intrinsic_load_var) + continue; + + nir_variable *var = load_var->variables[0]->var; + if (var->data.mode != nir_var_system_value) + continue; + + b->cursor = nir_after_instr(&load_var->instr); + + nir_intrinsic_op sysval_op = + nir_intrinsic_from_system_value(var->data.location); + nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0); + + nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval)); + nir_instr_remove(&load_var->instr); + + state->progress = true; + } + + return true; +} + +static bool +convert_impl(nir_function_impl *impl) +{ + struct lower_system_values_state state; + + state.progress = false; + nir_builder_init(&state.builder, impl); + + nir_foreach_block(impl, convert_block, &state); + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + return state.progress; +} + +bool +nir_lower_system_values(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = convert_impl(function->impl) || progress; + } + + exec_list_make_empty(&shader->system_values); + + return progress; +} diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c new file mode 100644 index 00000000000..ae24fb2e16a --- /dev/null +++ b/src/compiler/nir/nir_lower_tex.c @@ -0,0 +1,355 @@ +/* + * Copyright © 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* + * This lowering pass supports (as configured via nir_lower_tex_options) + * various texture related conversions: + * + texture projector lowering: converts the coordinate division for + * texture projection to be done in ALU instructions instead of + * asking the texture operation to do so. + * + lowering RECT: converts the un-normalized RECT texture coordinates + * to normalized coordinates with txs plus ALU instructions + * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes, + * inserts instructions to clamp specified coordinates to [0.0, 1.0]. + * Note that this automatically triggers texture projector lowering if + * needed, since clamping must happen after projector lowering. + */ + +#include "nir.h" +#include "nir_builder.h" + +typedef struct { + nir_builder b; + const nir_lower_tex_options *options; + bool progress; +} lower_tex_state; + +static void +project_src(nir_builder *b, nir_tex_instr *tex) +{ + /* Find the projector in the srcs list, if present. */ + unsigned proj_index; + for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) { + if (tex->src[proj_index].src_type == nir_tex_src_projector) + break; + } + if (proj_index == tex->num_srcs) + return; + + b->cursor = nir_before_instr(&tex->instr); + + nir_ssa_def *inv_proj = + nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); + + /* Walk through the sources projecting the arguments. */ + for (unsigned i = 0; i < tex->num_srcs; i++) { + switch (tex->src[i].src_type) { + case nir_tex_src_coord: + case nir_tex_src_comparitor: + break; + default: + continue; + } + nir_ssa_def *unprojected = + nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); + nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); + + /* Array indices don't get projected, so make an new vector with the + * coordinate's array index untouched. + */ + if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { + switch (tex->coord_components) { + case 4: + projected = nir_vec4(b, + nir_channel(b, projected, 0), + nir_channel(b, projected, 1), + nir_channel(b, projected, 2), + nir_channel(b, unprojected, 3)); + break; + case 3: + projected = nir_vec3(b, + nir_channel(b, projected, 0), + nir_channel(b, projected, 1), + nir_channel(b, unprojected, 2)); + break; + case 2: + projected = nir_vec2(b, + nir_channel(b, projected, 0), + nir_channel(b, unprojected, 1)); + break; + default: + unreachable("bad texture coord count for array"); + break; + } + } + + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(projected)); + } + + /* Now move the later tex sources down the array so that the projector + * disappears. + */ + nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, + NIR_SRC_INIT); + for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) { + tex->src[i-1].src_type = tex->src[i].src_type; + nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src); + } + tex->num_srcs--; +} + +static nir_ssa_def * +get_texture_size(nir_builder *b, nir_tex_instr *tex) +{ + b->cursor = nir_before_instr(&tex->instr); + + /* RECT textures should not be array: */ + assert(!tex->is_array); + + nir_tex_instr *txs; + + txs = nir_tex_instr_create(b->shader, 1); + txs->op = nir_texop_txs; + txs->sampler_dim = GLSL_SAMPLER_DIM_RECT; + txs->sampler_index = tex->sampler_index; + txs->dest_type = nir_type_int; + + /* only single src, the lod: */ + txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0)); + txs->src[0].src_type = nir_tex_src_lod; + + nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL); + nir_builder_instr_insert(b, &txs->instr); + + return nir_i2f(b, &txs->dest.ssa); +} + +static void +lower_rect(nir_builder *b, nir_tex_instr *tex) +{ + nir_ssa_def *txs = get_texture_size(b, tex); + nir_ssa_def *scale = nir_frcp(b, txs); + + /* Walk through the sources normalizing the requested arguments. */ + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type != nir_tex_src_coord) + continue; + + nir_ssa_def *coords = + nir_ssa_for_src(b, tex->src[i].src, tex->coord_components); + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(nir_fmul(b, coords, scale))); + } + + tex->sampler_dim = GLSL_SAMPLER_DIM_2D; +} + +static void +saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) +{ + b->cursor = nir_before_instr(&tex->instr); + + /* Walk through the sources saturating the requested arguments. */ + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type != nir_tex_src_coord) + continue; + + nir_ssa_def *src = + nir_ssa_for_src(b, tex->src[i].src, tex->coord_components); + + /* split src into components: */ + nir_ssa_def *comp[4]; + + for (unsigned j = 0; j < tex->coord_components; j++) + comp[j] = nir_channel(b, src, j); + + /* clamp requested components, array index does not get clamped: */ + unsigned ncomp = tex->coord_components; + if (tex->is_array) + ncomp--; + + for (unsigned j = 0; j < ncomp; j++) { + if ((1 << j) & sat_mask) { + if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { + /* non-normalized texture coords, so clamp to texture + * size rather than [0.0, 1.0] + */ + nir_ssa_def *txs = get_texture_size(b, tex); + comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); + comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); + } else { + comp[j] = nir_fsat(b, comp[j]); + } + } + } + + /* and move the result back into a single vecN: */ + src = nir_vec(b, comp, tex->coord_components); + + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(src)); + } +} + +static nir_ssa_def * +get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) +{ + nir_const_value v; + + memset(&v, 0, sizeof(v)); + + if (swizzle_val == 4) { + v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0; + } else { + assert(swizzle_val == 5); + if (type == nir_type_float) + v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0; + else + v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1; + } + + return nir_build_imm(b, 4, v); +} + +static void +swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) +{ + assert(tex->dest.is_ssa); + + b->cursor = nir_after_instr(&tex->instr); + + nir_ssa_def *swizzled; + if (tex->op == nir_texop_tg4) { + if (swizzle[tex->component] < 4) { + /* This one's easy */ + tex->component = swizzle[tex->component]; + return; + } else { + swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); + } + } else { + assert(nir_tex_instr_dest_size(tex) == 4); + if (swizzle[0] < 4 && swizzle[1] < 4 && + swizzle[2] < 4 && swizzle[3] < 4) { + unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; + /* We have no 0's or 1's, just emit a swizzling MOV */ + swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false); + } else { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < 4; i++) { + if (swizzle[i] < 4) { + srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]); + } else { + srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]); + } + } + swizzled = nir_vec(b, srcs, 4); + } + } + + nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled), + swizzled->parent_instr); +} + +static bool +nir_lower_tex_block(nir_block *block, void *void_state) +{ + lower_tex_state *state = void_state; + nir_builder *b = &state->b; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + bool lower_txp = !!(state->options->lower_txp & (1 << tex->sampler_dim)); + + /* mask of src coords to saturate (clamp): */ + unsigned sat_mask = 0; + + if ((1 << tex->sampler_index) & state->options->saturate_r) + sat_mask |= (1 << 2); /* .z */ + if ((1 << tex->sampler_index) & state->options->saturate_t) + sat_mask |= (1 << 1); /* .y */ + if ((1 << tex->sampler_index) & state->options->saturate_s) + sat_mask |= (1 << 0); /* .x */ + + /* If we are clamping any coords, we must lower projector first + * as clamping happens *after* projection: + */ + if (lower_txp || sat_mask) { + project_src(b, tex); + state->progress = true; + } + + if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && + state->options->lower_rect) { + lower_rect(b, tex); + state->progress = true; + } + + if (sat_mask) { + saturate_src(b, tex, sat_mask); + state->progress = true; + } + + if (((1 << tex->sampler_index) & state->options->swizzle_result) && + !nir_tex_instr_is_query(tex) && + !(tex->is_shadow && tex->is_new_style_shadow)) { + swizzle_result(b, tex, state->options->swizzles[tex->sampler_index]); + state->progress = true; + } + } + + return true; +} + +static void +nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state) +{ + nir_builder_init(&state->b, impl); + + nir_foreach_block(impl, nir_lower_tex_block, state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +bool +nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) +{ + lower_tex_state state; + state.options = options; + state.progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_tex_impl(function->impl, &state); + } + + return state.progress; +} diff --git a/src/compiler/nir/nir_lower_to_source_mods.c b/src/compiler/nir/nir_lower_to_source_mods.c new file mode 100644 index 00000000000..6c4e1f0d3f3 --- /dev/null +++ b/src/compiler/nir/nir_lower_to_source_mods.c @@ -0,0 +1,196 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * This pass lowers the neg, abs, and sat operations to source modifiers on + * ALU operations to make things nicer for the backend. It's just much + * easier to not have them when we're doing optimizations. + */ + +static bool +nir_lower_to_source_mods_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *alu = nir_instr_as_alu(instr); + + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + if (!alu->src[i].src.is_ssa) + continue; + + if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr); + + if (parent->dest.saturate) + continue; + + switch (nir_op_infos[alu->op].input_types[i]) { + case nir_type_float: + if (parent->op != nir_op_fmov) + continue; + break; + case nir_type_int: + if (parent->op != nir_op_imov) + continue; + break; + default: + continue; + } + + /* We can only do a rewrite if the source we are copying is SSA. + * Otherwise, moving the read might invalidly reorder reads/writes + * on a register. + */ + if (!parent->src[0].src.is_ssa) + continue; + + nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src); + if (alu->src[i].abs) { + /* abs trumps both neg and abs, do nothing */ + } else { + alu->src[i].negate = (alu->src[i].negate != parent->src[0].negate); + alu->src[i].abs |= parent->src[0].abs; + } + + for (int j = 0; j < 4; ++j) { + if (!nir_alu_instr_channel_used(alu, i, j)) + continue; + alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]]; + } + + if (list_empty(&parent->dest.dest.ssa.uses) && + list_empty(&parent->dest.dest.ssa.if_uses)) + nir_instr_remove(&parent->instr); + } + + switch (alu->op) { + case nir_op_fsat: + alu->op = nir_op_fmov; + alu->dest.saturate = true; + break; + case nir_op_ineg: + alu->op = nir_op_imov; + alu->src[0].negate = !alu->src[0].negate; + break; + case nir_op_fneg: + alu->op = nir_op_fmov; + alu->src[0].negate = !alu->src[0].negate; + break; + case nir_op_iabs: + alu->op = nir_op_imov; + alu->src[0].abs = true; + alu->src[0].negate = false; + break; + case nir_op_fabs: + alu->op = nir_op_fmov; + alu->src[0].abs = true; + alu->src[0].negate = false; + break; + default: + break; + } + + /* We've covered sources. Now we're going to try and saturate the + * destination if we can. + */ + + if (!alu->dest.dest.is_ssa) + continue; + + /* We can only saturate float destinations */ + if (nir_op_infos[alu->op].output_type != nir_type_float) + continue; + + if (!list_empty(&alu->dest.dest.ssa.if_uses)) + continue; + + bool all_children_are_sat = true; + nir_foreach_use(&alu->dest.dest.ssa, child_src) { + assert(child_src->is_ssa); + nir_instr *child = child_src->parent_instr; + if (child->type != nir_instr_type_alu) { + all_children_are_sat = false; + continue; + } + + nir_alu_instr *child_alu = nir_instr_as_alu(child); + if (child_alu->src[0].negate || child_alu->src[0].abs) { + all_children_are_sat = false; + continue; + } + + if (child_alu->op != nir_op_fsat && + !(child_alu->op == nir_op_fmov && child_alu->dest.saturate)) { + all_children_are_sat = false; + continue; + } + } + + if (!all_children_are_sat) + continue; + + alu->dest.saturate = true; + + nir_foreach_use(&alu->dest.dest.ssa, child_src) { + assert(child_src->is_ssa); + nir_instr *child = child_src->parent_instr; + assert(child->type == nir_instr_type_alu); + nir_alu_instr *child_alu = nir_instr_as_alu(child); + + child_alu->op = nir_op_fmov; + child_alu->dest.saturate = false; + /* We could propagate the dest of our instruction to the + * destinations of the uses here. However, one quick round of + * copy propagation will clean that all up and then we don't have + * the complexity. + */ + } + } + + return true; +} + +static void +nir_lower_to_source_mods_impl(nir_function_impl *impl) +{ + nir_foreach_block(impl, nir_lower_to_source_mods_block, NULL); +} + +void +nir_lower_to_source_mods(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_to_source_mods_impl(function->impl); + } +} diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c new file mode 100644 index 00000000000..1294cb89004 --- /dev/null +++ b/src/compiler/nir/nir_lower_two_sided_color.c @@ -0,0 +1,212 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Rob Clark + */ + +#include "nir.h" +#include "nir_builder.h" + +#define MAX_COLORS 2 /* VARYING_SLOT_COL0/COL1 */ + +typedef struct { + nir_builder b; + nir_shader *shader; + nir_variable *face; + struct { + nir_variable *front; /* COLn */ + nir_variable *back; /* BFCn */ + } colors[MAX_COLORS]; + int colors_count; +} lower_2side_state; + + +/* Lowering pass for fragment shaders to emulated two-sided-color. For + * each COLOR input, a corresponding BCOLOR input is created, and bcsel + * instruction used to select front or back color based on FACE. + */ + +static nir_variable * +create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot) +{ + nir_variable *var = rzalloc(shader, nir_variable); + + var->data.driver_location = drvloc; + var->type = glsl_vec4_type(); + var->data.mode = nir_var_shader_in; + var->name = ralloc_asprintf(var, "in_%d", drvloc); + var->data.index = 0; + var->data.location = slot; + + exec_list_push_tail(&shader->inputs, &var->node); + + shader->num_inputs++; /* TODO use type_size() */ + + return var; +} + +static nir_ssa_def * +load_input(nir_builder *b, nir_variable *in) +{ + nir_intrinsic_instr *load; + + load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); + load->num_components = 4; + load->const_index[0] = in->data.driver_location; + load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); + nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); + nir_builder_instr_insert(b, &load->instr); + + return &load->dest.ssa; +} + +static int +setup_inputs(lower_2side_state *state) +{ + int maxloc = -1; + + /* find color/face inputs: */ + nir_foreach_variable(var, &state->shader->inputs) { + int loc = var->data.driver_location; + + /* keep track of last used driver-location.. we'll be + * appending BCLr/FACE after last existing input: + */ + maxloc = MAX2(maxloc, loc); + + switch (var->data.location) { + case VARYING_SLOT_COL0: + case VARYING_SLOT_COL1: + assert(state->colors_count < ARRAY_SIZE(state->colors)); + state->colors[state->colors_count].front = var; + state->colors_count++; + break; + case VARYING_SLOT_FACE: + state->face = var; + break; + } + } + + /* if we don't have any color inputs, nothing to do: */ + if (state->colors_count == 0) + return -1; + + /* if we don't already have one, insert a FACE input: */ + if (!state->face) { + state->face = create_input(state->shader, ++maxloc, VARYING_SLOT_FACE); + state->face->data.interpolation = INTERP_QUALIFIER_FLAT; + } + + /* add required back-face color inputs: */ + for (int i = 0; i < state->colors_count; i++) { + gl_varying_slot slot; + + if (state->colors[i].front->data.location == VARYING_SLOT_COL0) + slot = VARYING_SLOT_BFC0; + else + slot = VARYING_SLOT_BFC1; + + state->colors[i].back = create_input(state->shader, ++maxloc, slot); + } + + return 0; +} + +static bool +nir_lower_two_sided_color_block(nir_block *block, void *void_state) +{ + lower_2side_state *state = void_state; + nir_builder *b = &state->b; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); + + if (intr->intrinsic != nir_intrinsic_load_input) + continue; + + int idx; + for (idx = 0; idx < state->colors_count; idx++) { + unsigned drvloc = + state->colors[idx].front->data.driver_location; + if (intr->const_index[0] == drvloc) { + assert(nir_src_as_const_value(intr->src[0])); + break; + } + } + + if (idx == state->colors_count) + continue; + + /* replace load_input(COLn) with + * bcsel(load_input(FACE), load_input(COLn), load_input(BFCn)) + */ + b->cursor = nir_before_instr(&intr->instr); + nir_ssa_def *face = nir_channel(b, load_input(b, state->face), 0); + nir_ssa_def *front = load_input(b, state->colors[idx].front); + nir_ssa_def *back = load_input(b, state->colors[idx].back); + nir_ssa_def *cond = nir_flt(b, face, nir_imm_float(b, 0.0)); + nir_ssa_def *color = nir_bcsel(b, cond, back, front); + + assert(intr->dest.is_ssa); + nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(color)); + } + + return true; +} + +static void +nir_lower_two_sided_color_impl(nir_function_impl *impl, + lower_2side_state *state) +{ + nir_builder *b = &state->b; + + nir_builder_init(b, impl); + + nir_foreach_block(impl, nir_lower_two_sided_color_block, state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_lower_two_sided_color(nir_shader *shader) +{ + lower_2side_state state = { + .shader = shader, + }; + + if (shader->stage != MESA_SHADER_FRAGMENT) + return; + + if (setup_inputs(&state) != 0) + return; + + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_two_sided_color_impl(function->impl, &state); + } + +} diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c new file mode 100644 index 00000000000..8cb3edd0a84 --- /dev/null +++ b/src/compiler/nir/nir_lower_var_copies.c @@ -0,0 +1,190 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" +#include "compiler/nir_types.h" + +/* + * Lowers all copy intrinsics to sequences of load/store intrinsics. + */ + +/* Walks down the deref chain and returns the next deref in the chain whose + * child is a wildcard. In other words, given the chain a[1].foo[*].bar, + * this function will return the deref to foo. Calling it a second time + * with the [*].bar, it will return NULL. + */ +static nir_deref * +deref_next_wildcard_parent(nir_deref *deref) +{ + for (nir_deref *tail = deref; tail->child; tail = tail->child) { + if (tail->child->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail->child); + + if (arr->deref_array_type == nir_deref_array_type_wildcard) + return tail; + } + + return NULL; +} + +/* This function recursively walks the given deref chain and replaces the + * given copy instruction with an equivalent sequence load/store + * operations. + * + * @copy_instr The copy instruction to replace; new instructions will be + * inserted before this one + * + * @dest_head The head of the destination variable deref chain + * + * @src_head The head of the source variable deref chain + * + * @dest_tail The current tail of the destination variable deref chain; + * this is used for recursion and external callers of this + * function should call it with tail == head + * + * @src_tail The current tail of the source variable deref chain; + * this is used for recursion and external callers of this + * function should call it with tail == head + * + * @state The current variable lowering state + */ +static void +emit_copy_load_store(nir_intrinsic_instr *copy_instr, + nir_deref_var *dest_head, nir_deref_var *src_head, + nir_deref *dest_tail, nir_deref *src_tail, void *mem_ctx) +{ + /* Find the next pair of wildcards */ + nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail); + nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail); + + if (src_arr_parent || dest_arr_parent) { + /* Wildcards had better come in matched pairs */ + assert(dest_arr_parent && dest_arr_parent); + + nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child); + nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child); + + unsigned length = glsl_get_length(src_arr_parent->type); + /* The wildcards should represent the same number of elements */ + assert(length == glsl_get_length(dest_arr_parent->type)); + assert(length > 0); + + /* Walk over all of the elements that this wildcard refers to and + * call emit_copy_load_store on each one of them */ + src_arr->deref_array_type = nir_deref_array_type_direct; + dest_arr->deref_array_type = nir_deref_array_type_direct; + for (unsigned i = 0; i < length; i++) { + src_arr->base_offset = i; + dest_arr->base_offset = i; + emit_copy_load_store(copy_instr, dest_head, src_head, + &dest_arr->deref, &src_arr->deref, mem_ctx); + } + src_arr->deref_array_type = nir_deref_array_type_wildcard; + dest_arr->deref_array_type = nir_deref_array_type_wildcard; + } else { + /* In this case, we have no wildcards anymore, so all we have to do + * is just emit the load and store operations. */ + src_tail = nir_deref_tail(src_tail); + dest_tail = nir_deref_tail(dest_tail); + + assert(src_tail->type == dest_tail->type); + + unsigned num_components = glsl_get_vector_elements(src_tail->type); + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var); + load->num_components = num_components; + load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref)); + nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); + + nir_instr_insert_before(©_instr->instr, &load->instr); + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); + store->num_components = num_components; + store->const_index[0] = (1 << num_components) - 1; + store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref)); + + store->src[0].is_ssa = true; + store->src[0].ssa = &load->dest.ssa; + + nir_instr_insert_before(©_instr->instr, &store->instr); + } +} + +/* Lowers a copy instruction to a sequence of load/store instructions + * + * The new instructions are placed before the copy instruction in the IR. + */ +void +nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx) +{ + assert(copy->intrinsic == nir_intrinsic_copy_var); + emit_copy_load_store(copy, copy->variables[0], copy->variables[1], + ©->variables[0]->deref, + ©->variables[1]->deref, mem_ctx); +} + +static bool +lower_var_copies_block(nir_block *block, void *mem_ctx) +{ + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); + if (copy->intrinsic != nir_intrinsic_copy_var) + continue; + + nir_lower_var_copy_instr(copy, mem_ctx); + + nir_instr_remove(©->instr); + ralloc_free(copy); + } + + return true; +} + +static void +lower_var_copies_impl(nir_function_impl *impl) +{ + nir_foreach_block(impl, lower_var_copies_block, ralloc_parent(impl)); +} + +/* Lowers every copy_var instruction in the program to a sequence of + * load/store instructions. + */ +void +nir_lower_var_copies(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + lower_var_copies_impl(function->impl); + } +} diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c new file mode 100644 index 00000000000..75d31ff60af --- /dev/null +++ b/src/compiler/nir/nir_lower_vars_to_ssa.c @@ -0,0 +1,973 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" +#include "nir_builder.h" +#include "nir_vla.h" + + +struct deref_node { + struct deref_node *parent; + const struct glsl_type *type; + + bool lower_to_ssa; + + /* Only valid for things that end up in the direct list. + * Note that multiple nir_deref_vars may correspond to this node, but they + * will all be equivalent, so any is as good as the other. + */ + nir_deref_var *deref; + struct exec_node direct_derefs_link; + + struct set *loads; + struct set *stores; + struct set *copies; + + nir_ssa_def **def_stack; + nir_ssa_def **def_stack_tail; + + struct deref_node *wildcard; + struct deref_node *indirect; + struct deref_node *children[0]; +}; + +struct lower_variables_state { + nir_shader *shader; + void *dead_ctx; + nir_function_impl *impl; + + /* A hash table mapping variables to deref_node data */ + struct hash_table *deref_var_nodes; + + /* A hash table mapping fully-qualified direct dereferences, i.e. + * dereferences with no indirect or wildcard array dereferences, to + * deref_node data. + * + * At the moment, we only lower loads, stores, and copies that can be + * trivially lowered to loads and stores, i.e. copies with no indirects + * and no wildcards. If a part of a variable that is being loaded from + * and/or stored into is also involved in a copy operation with + * wildcards, then we lower that copy operation to loads and stores, but + * otherwise we leave copies with wildcards alone. Since the only derefs + * used in these loads, stores, and trivial copies are ones with no + * wildcards and no indirects, these are precisely the derefs that we + * can actually consider lowering. + */ + struct exec_list direct_deref_nodes; + + /* Controls whether get_deref_node will add variables to the + * direct_deref_nodes table. This is turned on when we are initially + * scanning for load/store instructions. It is then turned off so we + * don't accidentally change the direct_deref_nodes table while we're + * iterating throug it. + */ + bool add_to_direct_deref_nodes; + + /* A hash table mapping phi nodes to deref_state data */ + struct hash_table *phi_table; +}; + +static struct deref_node * +deref_node_create(struct deref_node *parent, + const struct glsl_type *type, nir_shader *shader) +{ + size_t size = sizeof(struct deref_node) + + glsl_get_length(type) * sizeof(struct deref_node *); + + struct deref_node *node = rzalloc_size(shader, size); + node->type = type; + node->parent = parent; + node->deref = NULL; + exec_node_init(&node->direct_derefs_link); + + return node; +} + +/* Returns the deref node associated with the given variable. This will be + * the root of the tree representing all of the derefs of the given variable. + */ +static struct deref_node * +get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state) +{ + struct deref_node *node; + + struct hash_entry *var_entry = + _mesa_hash_table_search(state->deref_var_nodes, var); + + if (var_entry) { + return var_entry->data; + } else { + node = deref_node_create(NULL, var->type, state->dead_ctx); + _mesa_hash_table_insert(state->deref_var_nodes, var, node); + return node; + } +} + +/* Gets the deref_node for the given deref chain and creates it if it + * doesn't yet exist. If the deref is fully-qualified and direct and + * state->add_to_direct_deref_nodes is true, it will be added to the hash + * table of of fully-qualified direct derefs. + */ +static struct deref_node * +get_deref_node(nir_deref_var *deref, struct lower_variables_state *state) +{ + bool is_direct = true; + + /* Start at the base of the chain. */ + struct deref_node *node = get_deref_node_for_var(deref->var, state); + assert(deref->deref.type == node->type); + + for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { + switch (tail->deref_type) { + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail); + + assert(deref_struct->index < glsl_get_length(node->type)); + + if (node->children[deref_struct->index] == NULL) + node->children[deref_struct->index] = + deref_node_create(node, tail->type, state->dead_ctx); + + node = node->children[deref_struct->index]; + break; + } + + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(tail); + + switch (arr->deref_array_type) { + case nir_deref_array_type_direct: + /* This is possible if a loop unrolls and generates an + * out-of-bounds offset. We need to handle this at least + * somewhat gracefully. + */ + if (arr->base_offset >= glsl_get_length(node->type)) + return NULL; + + if (node->children[arr->base_offset] == NULL) + node->children[arr->base_offset] = + deref_node_create(node, tail->type, state->dead_ctx); + + node = node->children[arr->base_offset]; + break; + + case nir_deref_array_type_indirect: + if (node->indirect == NULL) + node->indirect = deref_node_create(node, tail->type, + state->dead_ctx); + + node = node->indirect; + is_direct = false; + break; + + case nir_deref_array_type_wildcard: + if (node->wildcard == NULL) + node->wildcard = deref_node_create(node, tail->type, + state->dead_ctx); + + node = node->wildcard; + is_direct = false; + break; + + default: + unreachable("Invalid array deref type"); + } + break; + } + default: + unreachable("Invalid deref type"); + } + } + + assert(node); + + /* Only insert if it isn't already in the list. */ + if (is_direct && state->add_to_direct_deref_nodes && + node->direct_derefs_link.next == NULL) { + node->deref = deref; + assert(deref->var != NULL); + exec_list_push_tail(&state->direct_deref_nodes, + &node->direct_derefs_link); + } + + return node; +} + +/* \sa foreach_deref_node_match */ +static bool +foreach_deref_node_worker(struct deref_node *node, nir_deref *deref, + bool (* cb)(struct deref_node *node, + struct lower_variables_state *state), + struct lower_variables_state *state) +{ + if (deref->child == NULL) { + return cb(node, state); + } else { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(deref->child); + assert(arr->deref_array_type == nir_deref_array_type_direct); + if (node->children[arr->base_offset] && + !foreach_deref_node_worker(node->children[arr->base_offset], + deref->child, cb, state)) + return false; + + if (node->wildcard && + !foreach_deref_node_worker(node->wildcard, + deref->child, cb, state)) + return false; + + return true; + } + + case nir_deref_type_struct: { + nir_deref_struct *str = nir_deref_as_struct(deref->child); + return foreach_deref_node_worker(node->children[str->index], + deref->child, cb, state); + } + + default: + unreachable("Invalid deref child type"); + } + } +} + +/* Walks over every "matching" deref_node and calls the callback. A node + * is considered to "match" if either refers to that deref or matches up t + * a wildcard. In other words, the following would match a[6].foo[3].bar: + * + * a[6].foo[3].bar + * a[*].foo[3].bar + * a[6].foo[*].bar + * a[*].foo[*].bar + * + * The given deref must be a full-length and fully qualified (no wildcards + * or indirects) deref chain. + */ +static bool +foreach_deref_node_match(nir_deref_var *deref, + bool (* cb)(struct deref_node *node, + struct lower_variables_state *state), + struct lower_variables_state *state) +{ + nir_deref_var var_deref = *deref; + var_deref.deref.child = NULL; + struct deref_node *node = get_deref_node(&var_deref, state); + + if (node == NULL) + return false; + + return foreach_deref_node_worker(node, &deref->deref, cb, state); +} + +/* \sa deref_may_be_aliased */ +static bool +deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref, + struct lower_variables_state *state) +{ + if (deref->child == NULL) { + return false; + } else { + switch (deref->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *arr = nir_deref_as_array(deref->child); + if (arr->deref_array_type == nir_deref_array_type_indirect) + return true; + + /* If there is an indirect at this level, we're aliased. */ + if (node->indirect) + return true; + + assert(arr->deref_array_type == nir_deref_array_type_direct); + + if (node->children[arr->base_offset] && + deref_may_be_aliased_node(node->children[arr->base_offset], + deref->child, state)) + return true; + + if (node->wildcard && + deref_may_be_aliased_node(node->wildcard, deref->child, state)) + return true; + + return false; + } + + case nir_deref_type_struct: { + nir_deref_struct *str = nir_deref_as_struct(deref->child); + if (node->children[str->index]) { + return deref_may_be_aliased_node(node->children[str->index], + deref->child, state); + } else { + return false; + } + } + + default: + unreachable("Invalid nir_deref child type"); + } + } +} + +/* Returns true if there are no indirects that can ever touch this deref. + * + * For example, if the given deref is a[6].foo, then any uses of a[i].foo + * would cause this to return false, but a[i].bar would not affect it + * because it's a different structure member. A var_copy involving of + * a[*].bar also doesn't affect it because that can be lowered to entirely + * direct load/stores. + * + * We only support asking this question about fully-qualified derefs. + * Obviously, it's pointless to ask this about indirects, but we also + * rule-out wildcards. Handling Wildcard dereferences would involve + * checking each array index to make sure that there aren't any indirect + * references. + */ +static bool +deref_may_be_aliased(nir_deref_var *deref, + struct lower_variables_state *state) +{ + return deref_may_be_aliased_node(get_deref_node_for_var(deref->var, state), + &deref->deref, state); +} + +static void +register_load_instr(nir_intrinsic_instr *load_instr, + struct lower_variables_state *state) +{ + struct deref_node *node = get_deref_node(load_instr->variables[0], state); + if (node == NULL) + return; + + if (node->loads == NULL) + node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + _mesa_set_add(node->loads, load_instr); +} + +static void +register_store_instr(nir_intrinsic_instr *store_instr, + struct lower_variables_state *state) +{ + struct deref_node *node = get_deref_node(store_instr->variables[0], state); + if (node == NULL) + return; + + if (node->stores == NULL) + node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + _mesa_set_add(node->stores, store_instr); +} + +static void +register_copy_instr(nir_intrinsic_instr *copy_instr, + struct lower_variables_state *state) +{ + for (unsigned idx = 0; idx < 2; idx++) { + struct deref_node *node = + get_deref_node(copy_instr->variables[idx], state); + + if (node == NULL) + continue; + + if (node->copies == NULL) + node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + _mesa_set_add(node->copies, copy_instr); + } +} + +/* Registers all variable uses in the given block. */ +static bool +register_variable_uses_block(nir_block *block, void *void_state) +{ + struct lower_variables_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: + register_load_instr(intrin, state); + break; + + case nir_intrinsic_store_var: + register_store_instr(intrin, state); + break; + + case nir_intrinsic_copy_var: + register_copy_instr(intrin, state); + break; + + default: + continue; + } + } + + return true; +} + +/* Walks over all of the copy instructions to or from the given deref_node + * and lowers them to load/store intrinsics. + */ +static bool +lower_copies_to_load_store(struct deref_node *node, + struct lower_variables_state *state) +{ + if (!node->copies) + return true; + + struct set_entry *copy_entry; + set_foreach(node->copies, copy_entry) { + nir_intrinsic_instr *copy = (void *)copy_entry->key; + + nir_lower_var_copy_instr(copy, state->shader); + + for (unsigned i = 0; i < 2; ++i) { + struct deref_node *arg_node = + get_deref_node(copy->variables[i], state); + + /* Only bother removing copy entries for other nodes */ + if (arg_node == NULL || arg_node == node) + continue; + + struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy); + assert(arg_entry); + _mesa_set_remove(node->copies, arg_entry); + } + + nir_instr_remove(©->instr); + } + + node->copies = NULL; + + return true; +} + +/** Pushes an SSA def onto the def stack for the given node + * + * Each node is potentially associated with a stack of SSA definitions. + * This stack is used for determining what SSA definition reaches a given + * point in the program for variable renaming. The stack is always kept in + * dominance-order with at most one SSA def per block. If the SSA + * definition on the top of the stack is in the same block as the one being + * pushed, the top element is replaced. + */ +static void +def_stack_push(struct deref_node *node, nir_ssa_def *def, + struct lower_variables_state *state) +{ + if (node->def_stack == NULL) { + node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *, + state->impl->num_blocks); + node->def_stack_tail = node->def_stack - 1; + } + + if (node->def_stack_tail >= node->def_stack) { + nir_ssa_def *top_def = *node->def_stack_tail; + + if (def->parent_instr->block == top_def->parent_instr->block) { + /* They're in the same block, just replace the top */ + *node->def_stack_tail = def; + return; + } + } + + *(++node->def_stack_tail) = def; +} + +/* Pop the top of the def stack if it's in the given block */ +static void +def_stack_pop_if_in_block(struct deref_node *node, nir_block *block) +{ + /* If we're popping, then we have presumably pushed at some time in the + * past so this should exist. + */ + assert(node->def_stack != NULL); + + /* The stack is already empty. Do nothing. */ + if (node->def_stack_tail < node->def_stack) + return; + + nir_ssa_def *def = *node->def_stack_tail; + if (def->parent_instr->block == block) + node->def_stack_tail--; +} + +/** Retrieves the SSA definition on the top of the stack for the given + * node, if one exists. If the stack is empty, then we return the constant + * initializer (if it exists) or an SSA undef. + */ +static nir_ssa_def * +get_ssa_def_for_block(struct deref_node *node, nir_block *block, + struct lower_variables_state *state) +{ + /* If we have something on the stack, go ahead and return it. We're + * assuming that the top of the stack dominates the given block. + */ + if (node->def_stack && node->def_stack_tail >= node->def_stack) + return *node->def_stack_tail; + + /* If we got here then we don't have a definition that dominates the + * given block. This means that we need to add an undef and use that. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->shader, + glsl_get_vector_elements(node->type)); + nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr); + def_stack_push(node, &undef->def, state); + return &undef->def; +} + +/* Given a block and one of its predecessors, this function fills in the + * souces of the phi nodes to take SSA defs from the given predecessor. + * This function must be called exactly once per block/predecessor pair. + */ +static void +add_phi_sources(nir_block *block, nir_block *pred, + struct lower_variables_state *state) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + struct hash_entry *entry = + _mesa_hash_table_search(state->phi_table, phi); + if (!entry) + continue; + + struct deref_node *node = entry->data; + + nir_phi_src *src = ralloc(phi, nir_phi_src); + src->pred = pred; + src->src.parent_instr = &phi->instr; + src->src.is_ssa = true; + src->src.ssa = get_ssa_def_for_block(node, pred, state); + + list_addtail(&src->src.use_link, &src->src.ssa->uses); + + exec_list_push_tail(&phi->srcs, &src->node); + } +} + +/* Performs variable renaming by doing a DFS of the dominance tree + * + * This algorithm is very similar to the one outlined in "Efficiently + * Computing Static Single Assignment Form and the Control Dependence + * Graph" by Cytron et. al. The primary difference is that we only put one + * SSA def on the stack per block. + */ +static bool +rename_variables_block(nir_block *block, struct lower_variables_state *state) +{ + nir_builder b; + nir_builder_init(&b, state->impl); + + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(instr); + + struct hash_entry *entry = + _mesa_hash_table_search(state->phi_table, phi); + + /* This can happen if we already have phi nodes in the program + * that were not created in this pass. + */ + if (!entry) + continue; + + struct deref_node *node = entry->data; + + def_stack_push(node, &phi->dest.ssa, state); + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); + + if (node == NULL) { + /* If we hit this path then we are referencing an invalid + * value. Most likely, we unrolled something and are + * reading past the end of some array. In any case, this + * should result in an undefined value. + */ + nir_ssa_undef_instr *undef = + nir_ssa_undef_instr_create(state->shader, + intrin->num_components); + + nir_instr_insert_before(&intrin->instr, &undef->instr); + nir_instr_remove(&intrin->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&undef->def)); + continue; + } + + if (!node->lower_to_ssa) + continue; + + nir_alu_instr *mov = nir_alu_instr_create(state->shader, + nir_op_imov); + mov->src[0].src.is_ssa = true; + mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state); + for (unsigned i = intrin->num_components; i < 4; i++) + mov->src[0].swizzle[i] = 0; + + assert(intrin->dest.is_ssa); + + mov->dest.write_mask = (1 << intrin->num_components) - 1; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + intrin->num_components, NULL); + + nir_instr_insert_before(&intrin->instr, &mov->instr); + nir_instr_remove(&intrin->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa)); + break; + } + + case nir_intrinsic_store_var: { + struct deref_node *node = + get_deref_node(intrin->variables[0], state); + + if (node == NULL) { + /* Probably an out-of-bounds array store. That should be a + * no-op. */ + nir_instr_remove(&intrin->instr); + continue; + } + + if (!node->lower_to_ssa) + continue; + + assert(intrin->num_components == + glsl_get_vector_elements(node->type)); + + assert(intrin->src[0].is_ssa); + + nir_ssa_def *new_def; + b.cursor = nir_before_instr(&intrin->instr); + + if (intrin->const_index[0] == (1 << intrin->num_components) - 1) { + /* Whole variable store - just copy the source. Note that + * intrin->num_components and intrin->src[0].ssa->num_components + * may differ. + */ + unsigned swiz[4]; + for (unsigned i = 0; i < 4; i++) + swiz[i] = i < intrin->num_components ? i : 0; + + new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, + intrin->num_components, false); + } else { + nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state); + /* For writemasked store_var intrinsics, we combine the newly + * written values with the existing contents of unwritten + * channels, creating a new SSA value for the whole vector. + */ + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < intrin->num_components; i++) { + if (intrin->const_index[0] & (1 << i)) { + srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); + } else { + srcs[i] = nir_channel(&b, old_def, i); + } + } + new_def = nir_vec(&b, srcs, intrin->num_components); + } + + assert(new_def->num_components == intrin->num_components); + + def_stack_push(node, new_def, state); + + /* We'll wait to remove the instruction until the next pass + * where we pop the node we just pushed back off the stack. + */ + break; + } + + default: + break; + } + } + } + + if (block->successors[0]) + add_phi_sources(block->successors[0], block, state); + if (block->successors[1]) + add_phi_sources(block->successors[1], block, state); + + for (unsigned i = 0; i < block->num_dom_children; ++i) + rename_variables_block(block->dom_children[i], state); + + /* Now we iterate over the instructions and pop off any SSA defs that we + * pushed in the first loop. + */ + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(instr); + + struct hash_entry *entry = + _mesa_hash_table_search(state->phi_table, phi); + + /* This can happen if we already have phi nodes in the program + * that were not created in this pass. + */ + if (!entry) + continue; + + struct deref_node *node = entry->data; + + def_stack_pop_if_in_block(node, block); + } else if (instr->type == nir_instr_type_intrinsic) { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + if (intrin->intrinsic != nir_intrinsic_store_var) + continue; + + struct deref_node *node = get_deref_node(intrin->variables[0], state); + if (!node) + continue; + + if (!node->lower_to_ssa) + continue; + + def_stack_pop_if_in_block(node, block); + nir_instr_remove(&intrin->instr); + } + } + + return true; +} + +/* Inserts phi nodes for all variables marked lower_to_ssa + * + * This is the same algorithm as presented in "Efficiently Computing Static + * Single Assignment Form and the Control Dependence Graph" by Cytron et. + * al. + */ +static void +insert_phi_nodes(struct lower_variables_state *state) +{ + NIR_VLA_ZERO(unsigned, work, state->impl->num_blocks); + NIR_VLA_ZERO(unsigned, has_already, state->impl->num_blocks); + + /* + * Since the work flags already prevent us from inserting a node that has + * ever been inserted into W, we don't need to use a set to represent W. + * Also, since no block can ever be inserted into W more than once, we know + * that the maximum size of W is the number of basic blocks in the + * function. So all we need to handle W is an array and a pointer to the + * next element to be inserted and the next element to be removed. + */ + NIR_VLA(nir_block *, W, state->impl->num_blocks); + + unsigned w_start, w_end; + unsigned iter_count = 0; + + foreach_list_typed(struct deref_node, node, direct_derefs_link, + &state->direct_deref_nodes) { + if (node->stores == NULL) + continue; + + if (!node->lower_to_ssa) + continue; + + w_start = w_end = 0; + iter_count++; + + struct set_entry *store_entry; + set_foreach(node->stores, store_entry) { + nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key; + if (work[store->instr.block->index] < iter_count) + W[w_end++] = store->instr.block; + work[store->instr.block->index] = iter_count; + } + + while (w_start != w_end) { + nir_block *cur = W[w_start++]; + struct set_entry *dom_entry; + set_foreach(cur->dom_frontier, dom_entry) { + nir_block *next = (nir_block *) dom_entry->key; + + /* + * If there's more than one return statement, then the end block + * can be a join point for some definitions. However, there are + * no instructions in the end block, so nothing would use those + * phi nodes. Of course, we couldn't place those phi nodes + * anyways due to the restriction of having no instructions in the + * end block... + */ + if (next == state->impl->end_block) + continue; + + if (has_already[next->index] < iter_count) { + nir_phi_instr *phi = nir_phi_instr_create(state->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(node->type), NULL); + nir_instr_insert_before_block(next, &phi->instr); + + _mesa_hash_table_insert(state->phi_table, phi, node); + + has_already[next->index] = iter_count; + if (work[next->index] < iter_count) { + work[next->index] = iter_count; + W[w_end++] = next; + } + } + } + } + } +} + + +/** Implements a pass to lower variable uses to SSA values + * + * This path walks the list of instructions and tries to lower as many + * local variable load/store operations to SSA defs and uses as it can. + * The process involves four passes: + * + * 1) Iterate over all of the instructions and mark where each local + * variable deref is used in a load, store, or copy. While we're at + * it, we keep track of all of the fully-qualified (no wildcards) and + * fully-direct references we see and store them in the + * direct_deref_nodes hash table. + * + * 2) Walk over the the list of fully-qualified direct derefs generated in + * the previous pass. For each deref, we determine if it can ever be + * aliased, i.e. if there is an indirect reference anywhere that may + * refer to it. If it cannot be aliased, we mark it for lowering to an + * SSA value. At this point, we lower any var_copy instructions that + * use the given deref to load/store operations and, if the deref has a + * constant initializer, we go ahead and add a load_const value at the + * beginning of the function with the initialized value. + * + * 3) Walk over the list of derefs we plan to lower to SSA values and + * insert phi nodes as needed. + * + * 4) Perform "variable renaming" by replacing the load/store instructions + * with SSA definitions and SSA uses. + */ +static bool +nir_lower_vars_to_ssa_impl(nir_function_impl *impl) +{ + struct lower_variables_state state; + + state.shader = impl->function->shader; + state.dead_ctx = ralloc_context(state.shader); + state.impl = impl; + + state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + exec_list_make_empty(&state.direct_deref_nodes); + state.phi_table = _mesa_hash_table_create(state.dead_ctx, + _mesa_hash_pointer, + _mesa_key_pointer_equal); + + /* Build the initial deref structures and direct_deref_nodes table */ + state.add_to_direct_deref_nodes = true; + nir_foreach_block(impl, register_variable_uses_block, &state); + + bool progress = false; + + nir_metadata_require(impl, nir_metadata_block_index); + + /* We're about to iterate through direct_deref_nodes. Don't modify it. */ + state.add_to_direct_deref_nodes = false; + + foreach_list_typed_safe(struct deref_node, node, direct_derefs_link, + &state.direct_deref_nodes) { + nir_deref_var *deref = node->deref; + + if (deref->var->data.mode != nir_var_local) { + exec_node_remove(&node->direct_derefs_link); + continue; + } + + if (deref_may_be_aliased(deref, &state)) { + exec_node_remove(&node->direct_derefs_link); + continue; + } + + node->lower_to_ssa = true; + progress = true; + + if (deref->var->constant_initializer) { + nir_load_const_instr *load = + nir_deref_get_const_initializer_load(state.shader, deref); + nir_ssa_def_init(&load->instr, &load->def, + glsl_get_vector_elements(node->type), NULL); + nir_instr_insert_before_cf_list(&impl->body, &load->instr); + def_stack_push(node, &load->def, &state); + } + + foreach_deref_node_match(deref, lower_copies_to_load_store, &state); + } + + if (!progress) + return false; + + nir_metadata_require(impl, nir_metadata_dominance); + + /* We may have lowered some copy instructions to load/store + * instructions. The uses from the copy instructions hav already been + * removed but we need to rescan to ensure that the uses from the newly + * added load/store instructions are registered. We need this + * information for phi node insertion below. + */ + nir_foreach_block(impl, register_variable_uses_block, &state); + + insert_phi_nodes(&state); + rename_variables_block(nir_start_block(impl), &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + ralloc_free(state.dead_ctx); + + return progress; +} + +void +nir_lower_vars_to_ssa(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_lower_vars_to_ssa_impl(function->impl); + } +} diff --git a/src/compiler/nir/nir_lower_vec_to_movs.c b/src/compiler/nir/nir_lower_vec_to_movs.c new file mode 100644 index 00000000000..06d627900c6 --- /dev/null +++ b/src/compiler/nir/nir_lower_vec_to_movs.c @@ -0,0 +1,310 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements a simple pass that lowers vecN instructions to a series of + * moves with partial writes. + */ + +struct vec_to_movs_state { + nir_function_impl *impl; + bool progress; +}; + +static bool +src_matches_dest_reg(nir_dest *dest, nir_src *src) +{ + if (dest->is_ssa || src->is_ssa) + return false; + + return (dest->reg.reg == src->reg.reg && + dest->reg.base_offset == src->reg.base_offset && + !dest->reg.indirect && + !src->reg.indirect); +} + +/** + * For a given starting writemask channel and corresponding source index in + * the vec instruction, insert a MOV to the vec instruction's dest of all the + * writemask channels that get read from the same src reg. + * + * Returns the writemask of our MOV, so the parent loop calling this knows + * which ones have been processed. + */ +static unsigned +insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) +{ + assert(start_idx < nir_op_infos[vec->op].num_inputs); + + nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov); + nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov); + nir_alu_dest_copy(&mov->dest, &vec->dest, mov); + + mov->dest.write_mask = (1u << start_idx); + mov->src[0].swizzle[start_idx] = vec->src[start_idx].swizzle[0]; + mov->src[0].negate = vec->src[start_idx].negate; + mov->src[0].abs = vec->src[start_idx].abs; + + for (unsigned i = start_idx + 1; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) && + vec->src[i].negate == vec->src[start_idx].negate && + vec->src[i].abs == vec->src[start_idx].abs) { + mov->dest.write_mask |= (1 << i); + mov->src[0].swizzle[i] = vec->src[i].swizzle[0]; + } + } + + /* In some situations (if the vecN is involved in a phi-web), we can end + * up with a mov from a register to itself. Some of those channels may end + * up doing nothing and there's no reason to have them as part of the mov. + */ + if (src_matches_dest_reg(&mov->dest.dest, &mov->src[0].src) && + !mov->src[0].abs && !mov->src[0].negate) { + for (unsigned i = 0; i < 4; i++) { + if (mov->src[0].swizzle[i] == i) { + mov->dest.write_mask &= ~(1 << i); + } + } + } + + /* Only emit the instruction if it actually does something */ + if (mov->dest.write_mask) { + nir_instr_insert_before(&vec->instr, &mov->instr); + } else { + ralloc_free(mov); + } + + return mov->dest.write_mask; +} + +static bool +has_replicated_dest(nir_alu_instr *alu) +{ + return alu->op == nir_op_fdot_replicated2 || + alu->op == nir_op_fdot_replicated3 || + alu->op == nir_op_fdot_replicated4 || + alu->op == nir_op_fdph_replicated; +} + +/* Attempts to coalesce the "move" from the given source of the vec to the + * destination of the instruction generating the value. If, for whatever + * reason, we cannot coalesce the mmove, it does nothing and returns 0. We + * can then call insert_mov as normal. + */ +static unsigned +try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) +{ + assert(start_idx < nir_op_infos[vec->op].num_inputs); + + /* We will only even try if the source is SSA */ + if (!vec->src[start_idx].src.is_ssa) + return 0; + + assert(vec->src[start_idx].src.ssa); + + /* If we are going to do a reswizzle, then the vecN operation must be the + * only use of the source value. We also can't have any source modifiers. + */ + nir_foreach_use(vec->src[start_idx].src.ssa, src) { + if (src->parent_instr != &vec->instr) + return 0; + + nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src); + if (alu_src->abs || alu_src->negate) + return 0; + } + + if (!list_empty(&vec->src[start_idx].src.ssa->if_uses)) + return 0; + + if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu) + return 0; + + nir_alu_instr *src_alu = + nir_instr_as_alu(vec->src[start_idx].src.ssa->parent_instr); + + if (has_replicated_dest(src_alu)) { + /* The fdot instruction is special: It replicates its result to all + * components. This means that we can always rewrite its destination + * and we don't need to swizzle anything. + */ + } else { + /* We only care about being able to re-swizzle the instruction if it is + * something that we can reswizzle. It must be per-component. The one + * exception to this is the fdotN instructions which implicitly splat + * their result out to all channels. + */ + if (nir_op_infos[src_alu->op].output_size != 0) + return 0; + + /* If we are going to reswizzle the instruction, we can't have any + * non-per-component sources either. + */ + for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++) + if (nir_op_infos[src_alu->op].input_sizes[j] != 0) + return 0; + } + + /* Stash off all of the ALU instruction's swizzles. */ + uint8_t swizzles[4][4]; + for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++) + for (unsigned i = 0; i < 4; i++) + swizzles[j][i] = src_alu->src[j].swizzle[i]; + + unsigned write_mask = 0; + for (unsigned i = start_idx; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (!vec->src[i].src.is_ssa || + vec->src[i].src.ssa != &src_alu->dest.dest.ssa) + continue; + + /* At this point, the give vec source matchese up with the ALU + * instruction so we can re-swizzle that component to match. + */ + write_mask |= 1 << i; + if (has_replicated_dest(src_alu)) { + /* Since the destination is a single replicated value, we don't need + * to do any reswizzling + */ + } else { + for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++) + src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]]; + } + + /* Clear the no longer needed vec source */ + nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, NIR_SRC_INIT); + } + + nir_instr_rewrite_dest(&src_alu->instr, &src_alu->dest.dest, vec->dest.dest); + src_alu->dest.write_mask = write_mask; + + return write_mask; +} + +static bool +lower_vec_to_movs_block(nir_block *block, void *void_state) +{ + struct vec_to_movs_state *state = void_state; + nir_function_impl *impl = state->impl; + nir_shader *shader = impl->function->shader; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *vec = nir_instr_as_alu(instr); + + switch (vec->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + break; + default: + continue; /* The loop */ + } + + if (vec->dest.dest.is_ssa) { + /* Since we insert multiple MOVs, we have a register destination. */ + nir_register *reg = nir_local_reg_create(impl); + reg->num_components = vec->dest.dest.ssa.num_components; + + nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg)); + + nir_instr_rewrite_dest(&vec->instr, &vec->dest.dest, + nir_dest_for_reg(reg)); + } + + unsigned finished_write_mask = 0; + + /* First, emit a MOV for all the src channels that are in the + * destination reg, in case other values we're populating in the dest + * might overwrite them. + */ + for (unsigned i = 0; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) { + finished_write_mask |= insert_mov(vec, i, shader); + break; + } + } + + /* Now, emit MOVs for all the other src channels. */ + for (unsigned i = 0; i < 4; i++) { + if (!(vec->dest.write_mask & (1 << i))) + continue; + + if (!(finished_write_mask & (1 << i))) + finished_write_mask |= try_coalesce(vec, i, shader); + + if (!(finished_write_mask & (1 << i))) + finished_write_mask |= insert_mov(vec, i, shader); + } + + nir_instr_remove(&vec->instr); + ralloc_free(vec); + state->progress = true; + } + + return true; +} + +static bool +nir_lower_vec_to_movs_impl(nir_function_impl *impl) +{ + struct vec_to_movs_state state = { impl, false }; + + nir_foreach_block(impl, lower_vec_to_movs_block, &state); + + if (state.progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + return state.progress; +} + +bool +nir_lower_vec_to_movs(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = nir_lower_vec_to_movs_impl(function->impl) || progress; + } + + return progress; +} diff --git a/src/compiler/nir/nir_metadata.c b/src/compiler/nir/nir_metadata.c new file mode 100644 index 00000000000..61aae73221e --- /dev/null +++ b/src/compiler/nir/nir_metadata.c @@ -0,0 +1,90 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + */ + +#include "nir.h" + +/* + * Handles management of the metadata. + */ + +void +nir_metadata_require(nir_function_impl *impl, nir_metadata required) +{ +#define NEEDS_UPDATE(X) ((required & ~impl->valid_metadata) & (X)) + + if (NEEDS_UPDATE(nir_metadata_block_index)) + nir_index_blocks(impl); + if (NEEDS_UPDATE(nir_metadata_dominance)) + nir_calc_dominance_impl(impl); + if (NEEDS_UPDATE(nir_metadata_live_ssa_defs)) + nir_live_ssa_defs_impl(impl); + +#undef NEEDS_UPDATE + + impl->valid_metadata |= required; +} + +void +nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved) +{ + impl->valid_metadata &= preserved; +} + +#ifdef DEBUG +/** + * Make sure passes properly invalidate metadata (part 1). + * + * Call this before running a pass to set a bogus metadata flag, which will + * only be preserved if the pass forgets to call nir_metadata_preserve(). + */ +void +nir_metadata_set_validation_flag(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) { + function->impl->valid_metadata |= nir_metadata_not_properly_reset; + } + } +} + +/** + * Make sure passes properly invalidate metadata (part 2). + * + * Call this after a pass makes progress to verify that the bogus metadata set by + * the earlier function was properly thrown away. Note that passes may not call + * nir_metadata_preserve() if they don't actually make any changes at all. + */ +void +nir_metadata_check_validation_flag(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) { + assert(!(function->impl->valid_metadata & + nir_metadata_not_properly_reset)); + } + } +} +#endif diff --git a/src/compiler/nir/nir_move_vec_src_uses_to_dest.c b/src/compiler/nir/nir_move_vec_src_uses_to_dest.c new file mode 100644 index 00000000000..b5186e6e944 --- /dev/null +++ b/src/compiler/nir/nir_move_vec_src_uses_to_dest.c @@ -0,0 +1,197 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements a pass that tries to move uses vecN sources to their + * destinations. This is kind of like an inverse copy-propagation pass. + * For instance, if you have + * + * ssa_1 = vec4(a, b, c, d) + * ssa_2 = fadd(a, b) + * + * This will be turned into + * + * ssa_1 = vec4(a, b, c, d) + * ssa_2 = fadd(ssa_1.x, ssa_1.y) + * + * While this is "worse" because it adds a bunch of unneeded dependencies, it + * actually makes it much easier for vec4-based backends to coalesce the MOV's + * that result from the vec4 operation because it doesn't have to worry about + * quite as many reads. + */ + +/* Returns true if the given SSA def dominates the instruction. An SSA def is + * considered to *not* dominate the instruction that defines it. + */ +static bool +ssa_def_dominates_instr(nir_ssa_def *def, nir_instr *instr) +{ + if (instr->index <= def->parent_instr->index) { + return false; + } else if (def->parent_instr->block == instr->block) { + return def->parent_instr->index < instr->index; + } else { + return nir_block_dominates(def->parent_instr->block, instr->block); + } +} + +static bool +move_vec_src_uses_to_dest_block(nir_block *block, void *shader) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_alu) + continue; + + nir_alu_instr *vec = nir_instr_as_alu(instr); + + switch (vec->op) { + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + break; + default: + continue; /* The loop */ + } + + /* Can't handle non-SSA vec operations */ + if (!vec->dest.dest.is_ssa) + continue; + + /* Can't handle saturation */ + if (vec->dest.saturate) + continue; + + /* First, mark all of the sources we are going to consider for rewriting + * to the destination + */ + int srcs_remaining = 0; + for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) { + /* We can't rewrite a source if it's not in SSA form */ + if (!vec->src[i].src.is_ssa) + continue; + + /* We can't rewrite a source if it has modifiers */ + if (vec->src[i].abs || vec->src[i].negate) + continue; + + srcs_remaining |= 1 << i; + } + + /* We can't actually do anything with this instruction */ + if (srcs_remaining == 0) + continue; + + for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) { + int8_t swizzle[4] = { -1, -1, -1, -1 }; + + for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) { + if (vec->src[j].src.ssa != vec->src[i].src.ssa) + continue; + + /* Mark the given chanle as having been handled */ + srcs_remaining &= ~(1 << j); + + /* Mark the appropreate channel as coming from src j */ + swizzle[vec->src[j].swizzle[0]] = j; + } + + nir_foreach_use_safe(vec->src[i].src.ssa, use) { + if (use->parent_instr == &vec->instr) + continue; + + /* We need to dominate the use if we are going to rewrite it */ + if (!ssa_def_dominates_instr(&vec->dest.dest.ssa, use->parent_instr)) + continue; + + /* For now, we'll just rewrite ALU instructions */ + if (use->parent_instr->type != nir_instr_type_alu) + continue; + + assert(use->is_ssa); + + nir_alu_instr *use_alu = nir_instr_as_alu(use->parent_instr); + + /* Figure out which source we're actually looking at */ + nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src); + unsigned src_idx = use_alu_src - use_alu->src; + assert(src_idx < nir_op_infos[use_alu->op].num_inputs); + + bool can_reswizzle = true; + for (unsigned j = 0; j < 4; j++) { + if (!nir_alu_instr_channel_used(use_alu, src_idx, j)) + continue; + + if (swizzle[use_alu_src->swizzle[j]] == -1) { + can_reswizzle = false; + break; + } + } + + if (!can_reswizzle) + continue; + + /* At this point, we have determined that the given use can be + * reswizzled to actually use the destination of the vecN operation. + * Go ahead and rewrite it as needed. + */ + nir_instr_rewrite_src(use->parent_instr, use, + nir_src_for_ssa(&vec->dest.dest.ssa)); + for (unsigned j = 0; j < 4; j++) { + if (!nir_alu_instr_channel_used(use_alu, src_idx, j)) + continue; + + use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]]; + } + } + } + } + + return true; +} + +static void +nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl) +{ + nir_metadata_require(impl, nir_metadata_dominance); + + nir_index_instrs(impl); + nir_foreach_block(impl, move_vec_src_uses_to_dest_block, shader); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); +} + +void +nir_move_vec_src_uses_to_dest(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_move_vec_src_uses_to_dest_impl(shader, function->impl); + } +} diff --git a/src/compiler/nir/nir_normalize_cubemap_coords.c b/src/compiler/nir/nir_normalize_cubemap_coords.c new file mode 100644 index 00000000000..9c15eb8c15c --- /dev/null +++ b/src/compiler/nir/nir_normalize_cubemap_coords.c @@ -0,0 +1,120 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand + */ + +#include "nir.h" +#include "nir_builder.h" + +/** + * This file implements a NIR lowering pass to perform the normalization of + * the cubemap coordinates to have the largest magnitude component be -1.0 + * or 1.0. This is based on the old GLSL IR based pass by Eric. + */ + +struct normalize_cubemap_state { + nir_builder b; + bool progress; +}; + +static bool +normalize_cubemap_coords_block(nir_block *block, void *void_state) +{ + struct normalize_cubemap_state *state = void_state; + nir_builder *b = &state->b; + + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_tex) + continue; + + nir_tex_instr *tex = nir_instr_as_tex(instr); + if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) + continue; + + b->cursor = nir_before_instr(&tex->instr); + + for (unsigned i = 0; i < tex->num_srcs; i++) { + if (tex->src[i].src_type != nir_tex_src_coord) + continue; + + nir_ssa_def *orig_coord = + nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); + assert(orig_coord->num_components >= 3); + + nir_ssa_def *abs = nir_fabs(b, orig_coord); + nir_ssa_def *norm = nir_fmax(b, nir_channel(b, abs, 0), + nir_fmax(b, nir_channel(b, abs, 1), + nir_channel(b, abs, 2))); + + nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm)); + + /* Array indices don't have to be normalized, so make a new vector + * with the coordinate's array index untouched. + */ + if (tex->coord_components == 4) { + normalized = nir_vec4(b, + nir_channel(b, normalized, 0), + nir_channel(b, normalized, 1), + nir_channel(b, normalized, 2), + nir_channel(b, orig_coord, 3)); + } + + nir_instr_rewrite_src(&tex->instr, + &tex->src[i].src, + nir_src_for_ssa(normalized)); + + state->progress = true; + } + } + + return true; +} + +static bool +normalize_cubemap_coords_impl(nir_function_impl *impl) +{ + struct normalize_cubemap_state state; + nir_builder_init(&state.b, impl); + state.progress = false; + + nir_foreach_block(impl, normalize_cubemap_coords_block, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; +} + +bool +nir_normalize_cubemap_coords(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = normalize_cubemap_coords_impl(function->impl) || progress; + } + + return progress; +} diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py new file mode 100644 index 00000000000..e79810c1991 --- /dev/null +++ b/src/compiler/nir/nir_opcodes.py @@ -0,0 +1,668 @@ +#! /usr/bin/env python +# +# Copyright (C) 2014 Connor Abbott +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Connor Abbott (cwabbott0@gmail.com) + + +# Class that represents all the information we have about the opcode +# NOTE: this must be kept in sync with nir_op_info + +class Opcode(object): + """Class that represents all the information we have about the opcode + NOTE: this must be kept in sync with nir_op_info + """ + def __init__(self, name, output_size, output_type, input_sizes, + input_types, algebraic_properties, const_expr): + """Parameters: + + - name is the name of the opcode (prepend nir_op_ for the enum name) + - all types are strings that get nir_type_ prepended to them + - input_types is a list of types + - algebraic_properties is a space-seperated string, where nir_op_is_ is + prepended before each entry + - const_expr is an expression or series of statements that computes the + constant value of the opcode given the constant values of its inputs. + + Constant expressions are formed from the variables src0, src1, ..., + src(N-1), where N is the number of arguments. The output of the + expression should be stored in the dst variable. Per-component input + and output variables will be scalars and non-per-component input and + output variables will be a struct with fields named x, y, z, and w + all of the correct type. Input and output variables can be assumed + to already be of the correct type and need no conversion. In + particular, the conversion from the C bool type to/from NIR_TRUE and + NIR_FALSE happens automatically. + + For per-component instructions, the entire expression will be + executed once for each component. For non-per-component + instructions, the expression is expected to store the correct values + in dst.x, dst.y, etc. If "dst" does not exist anywhere in the + constant expression, an assignment to dst will happen automatically + and the result will be equivalent to "dst = " for + per-component instructions and "dst.x = dst.y = ... = " + for non-per-component instructions. + """ + assert isinstance(name, str) + assert isinstance(output_size, int) + assert isinstance(output_type, str) + assert isinstance(input_sizes, list) + assert isinstance(input_sizes[0], int) + assert isinstance(input_types, list) + assert isinstance(input_types[0], str) + assert isinstance(algebraic_properties, str) + assert isinstance(const_expr, str) + assert len(input_sizes) == len(input_types) + assert 0 <= output_size <= 4 + for size in input_sizes: + assert 0 <= size <= 4 + if output_size != 0: + assert size != 0 + self.name = name + self.num_inputs = len(input_sizes) + self.output_size = output_size + self.output_type = output_type + self.input_sizes = input_sizes + self.input_types = input_types + self.algebraic_properties = algebraic_properties + self.const_expr = const_expr + +# helper variables for strings +tfloat = "float" +tint = "int" +tbool = "bool" +tuint = "uint" + +commutative = "commutative " +associative = "associative " + +# global dictionary of opcodes +opcodes = {} + +def opcode(name, output_size, output_type, input_sizes, input_types, + algebraic_properties, const_expr): + assert name not in opcodes + opcodes[name] = Opcode(name, output_size, output_type, input_sizes, + input_types, algebraic_properties, const_expr) + +def unop_convert(name, in_type, out_type, const_expr): + opcode(name, 0, out_type, [0], [in_type], "", const_expr) + +def unop(name, ty, const_expr): + opcode(name, 0, ty, [0], [ty], "", const_expr) + +def unop_horiz(name, output_size, output_type, input_size, input_type, + const_expr): + opcode(name, output_size, output_type, [input_size], [input_type], "", + const_expr) + +def unop_reduce(name, output_size, output_type, input_type, prereduce_expr, + reduce_expr, final_expr): + def prereduce(src): + return "(" + prereduce_expr.format(src=src) + ")" + def final(src): + return final_expr.format(src="(" + src + ")") + def reduce_(src0, src1): + return reduce_expr.format(src0=src0, src1=src1) + src0 = prereduce("src0.x") + src1 = prereduce("src0.y") + src2 = prereduce("src0.z") + src3 = prereduce("src0.w") + unop_horiz(name + "2", output_size, output_type, 2, input_type, + final(reduce_(src0, src1))) + unop_horiz(name + "3", output_size, output_type, 3, input_type, + final(reduce_(reduce_(src0, src1), src2))) + unop_horiz(name + "4", output_size, output_type, 4, input_type, + final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) + + +# These two move instructions differ in what modifiers they support and what +# the negate modifier means. Otherwise, they are identical. +unop("fmov", tfloat, "src0") +unop("imov", tint, "src0") + +unop("ineg", tint, "-src0") +unop("fneg", tfloat, "-src0") +unop("inot", tint, "~src0") # invert every bit of the integer +unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f") +unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)") +unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)") +unop("iabs", tint, "(src0 < 0) ? -src0 : src0") +unop("fabs", tfloat, "fabsf(src0)") +unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)") +unop("frcp", tfloat, "1.0f / src0") +unop("frsq", tfloat, "1.0f / sqrtf(src0)") +unop("fsqrt", tfloat, "sqrtf(src0)") +unop("fexp2", tfloat, "exp2f(src0)") +unop("flog2", tfloat, "log2f(src0)") +unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. +unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion +unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. +# Float-to-boolean conversion +unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") +# Boolean-to-float conversion +unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") +# Int-to-boolean conversion +unop_convert("i2b", tint, tbool, "src0 != 0") +unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion +unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion. + +# Unary floating-point rounding operations. + + +unop("ftrunc", tfloat, "truncf(src0)") +unop("fceil", tfloat, "ceilf(src0)") +unop("ffloor", tfloat, "floorf(src0)") +unop("ffract", tfloat, "src0 - floorf(src0)") +unop("fround_even", tfloat, "_mesa_roundevenf(src0)") + + +# Trigonometric operations. + + +unop("fsin", tfloat, "sinf(src0)") +unop("fcos", tfloat, "cosf(src0)") + + +# Partial derivatives. + + +unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0. +unop("fddy", tfloat, "0.0f") +unop("fddx_fine", tfloat, "0.0f") +unop("fddy_fine", tfloat, "0.0f") +unop("fddx_coarse", tfloat, "0.0f") +unop("fddy_coarse", tfloat, "0.0f") + + +# Floating point pack and unpack operations. + +def pack_2x16(fmt): + unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """ +dst.x = (uint32_t) pack_fmt_1x16(src0.x); +dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16; +""".replace("fmt", fmt)) + +def pack_4x8(fmt): + unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """ +dst.x = (uint32_t) pack_fmt_1x8(src0.x); +dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8; +dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16; +dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24; +""".replace("fmt", fmt)) + +def unpack_2x16(fmt): + unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """ +dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff)); +dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16)); +""".replace("fmt", fmt)) + +def unpack_4x8(fmt): + unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """ +dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff)); +dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff)); +dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff)); +dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24)); +""".replace("fmt", fmt)) + + +pack_2x16("snorm") +pack_4x8("snorm") +pack_2x16("unorm") +pack_4x8("unorm") +pack_2x16("half") +unpack_2x16("snorm") +unpack_4x8("snorm") +unpack_2x16("unorm") +unpack_4x8("unorm") +unpack_2x16("half") + + +# Lowered floating point unpacking operations. + + +unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint, + "unpack_half_1x16((uint16_t)(src0.x & 0xffff))") +unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint, + "unpack_half_1x16((uint16_t)(src0.x >> 16))") + + +# Bit operations, part of ARB_gpu_shader5. + + +unop("bitfield_reverse", tuint, """ +/* we're not winning any awards for speed here, but that's ok */ +dst = 0; +for (unsigned bit = 0; bit < 32; bit++) + dst |= ((src0 >> bit) & 1) << (31 - bit); +""") +unop("bit_count", tuint, """ +dst = 0; +for (unsigned bit = 0; bit < 32; bit++) { + if ((src0 >> bit) & 1) + dst++; +} +""") + +unop_convert("ufind_msb", tuint, tint, """ +dst = -1; +for (int bit = 31; bit > 0; bit--) { + if ((src0 >> bit) & 1) { + dst = bit; + break; + } +} +""") + +unop("ifind_msb", tint, """ +dst = -1; +for (int bit = 31; bit >= 0; bit--) { + /* If src0 < 0, we're looking for the first 0 bit. + * if src0 >= 0, we're looking for the first 1 bit. + */ + if ((((src0 >> bit) & 1) && (src0 >= 0)) || + (!((src0 >> bit) & 1) && (src0 < 0))) { + dst = bit; + break; + } +} +""") + +unop("find_lsb", tint, """ +dst = -1; +for (unsigned bit = 0; bit < 32; bit++) { + if ((src0 >> bit) & 1) { + dst = bit; + break; + } +} +""") + + +for i in xrange(1, 5): + for j in xrange(1, 5): + unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f") + +def binop_convert(name, out_type, in_type, alg_props, const_expr): + opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr) + +def binop(name, ty, alg_props, const_expr): + binop_convert(name, ty, ty, alg_props, const_expr) + +def binop_compare(name, ty, alg_props, const_expr): + binop_convert(name, tbool, ty, alg_props, const_expr) + +def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size, + src2_type, const_expr): + opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type], + "", const_expr) + +def binop_reduce(name, output_size, output_type, src_type, prereduce_expr, + reduce_expr, final_expr): + def final(src): + return final_expr.format(src= "(" + src + ")") + def reduce_(src0, src1): + return reduce_expr.format(src0=src0, src1=src1) + def prereduce(src0, src1): + return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")" + src0 = prereduce("src0.x", "src1.x") + src1 = prereduce("src0.y", "src1.y") + src2 = prereduce("src0.z", "src1.z") + src3 = prereduce("src0.w", "src1.w") + opcode(name + "2", output_size, output_type, + [2, 2], [src_type, src_type], commutative, + final(reduce_(src0, src1))) + opcode(name + "3", output_size, output_type, + [3, 3], [src_type, src_type], commutative, + final(reduce_(reduce_(src0, src1), src2))) + opcode(name + "4", output_size, output_type, + [4, 4], [src_type, src_type], commutative, + final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) + +binop("fadd", tfloat, commutative + associative, "src0 + src1") +binop("iadd", tint, commutative + associative, "src0 + src1") +binop("fsub", tfloat, "", "src0 - src1") +binop("isub", tint, "", "src0 - src1") + +binop("fmul", tfloat, commutative + associative, "src0 * src1") +# low 32-bits of signed/unsigned integer multiply +binop("imul", tint, commutative + associative, "src0 * src1") +# high 32-bits of signed integer multiply +binop("imul_high", tint, commutative, + "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)") +# high 32-bits of unsigned integer multiply +binop("umul_high", tuint, commutative, + "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)") + +binop("fdiv", tfloat, "", "src0 / src1") +binop("idiv", tint, "", "src0 / src1") +binop("udiv", tuint, "", "src0 / src1") + +# returns a boolean representing the carry resulting from the addition of +# the two unsigned arguments. + +binop_convert("uadd_carry", tuint, tuint, commutative, "src0 + src1 < src0") + +# returns a boolean representing the borrow resulting from the subtraction +# of the two unsigned arguments. + +binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1") + +binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") +binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1") + +# +# Comparisons +# + + +# these integer-aware comparisons return a boolean (0 or ~0) + +binop_compare("flt", tfloat, "", "src0 < src1") +binop_compare("fge", tfloat, "", "src0 >= src1") +binop_compare("feq", tfloat, commutative, "src0 == src1") +binop_compare("fne", tfloat, commutative, "src0 != src1") +binop_compare("ilt", tint, "", "src0 < src1") +binop_compare("ige", tint, "", "src0 >= src1") +binop_compare("ieq", tint, commutative, "src0 == src1") +binop_compare("ine", tint, commutative, "src0 != src1") +binop_compare("ult", tuint, "", "src0 < src1") +binop_compare("uge", tuint, "", "src0 >= src1") + +# integer-aware GLSL-style comparisons that compare floats and ints + +binop_reduce("ball_fequal", 1, tbool, tfloat, "{src0} == {src1}", + "{src0} && {src1}", "{src}") +binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}", + "{src0} || {src1}", "{src}") +binop_reduce("ball_iequal", 1, tbool, tint, "{src0} == {src1}", + "{src0} && {src1}", "{src}") +binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}", + "{src0} || {src1}", "{src}") + +# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0 + +binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}", + "{src0} && {src1}", "{src} ? 1.0f : 0.0f") +binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}", + "{src0} || {src1}", "{src} ? 1.0f : 0.0f") + +# These comparisons for integer-less hardware return 1.0 and 0.0 for true +# and false respectively + +binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than +binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal +binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal +binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal + + +binop("ishl", tint, "", "src0 << src1") +binop("ishr", tint, "", "src0 >> src1") +binop("ushr", tuint, "", "src0 >> src1") + +# bitwise logic operators +# +# These are also used as boolean and, or, xor for hardware supporting +# integers. + + +binop("iand", tuint, commutative + associative, "src0 & src1") +binop("ior", tuint, commutative + associative, "src0 | src1") +binop("ixor", tuint, commutative + associative, "src0 ^ src1") + + +# floating point logic operators +# +# These use (src != 0.0) for testing the truth of the input, and output 1.0 +# for true and 0.0 for false + +binop("fand", tfloat, commutative, + "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f") +binop("for", tfloat, commutative, + "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f") +binop("fxor", tfloat, commutative, + "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f") + +binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}", + "{src}") + +binop_reduce("fdot_replicated", 4, tfloat, tfloat, + "{src0} * {src1}", "{src0} + {src1}", "{src}") + +opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], "", + "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w") +opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "", + "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w") + +binop("fmin", tfloat, "", "fminf(src0, src1)") +binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1") +binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1") +binop("fmax", tfloat, "", "fmaxf(src0, src1)") +binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") +binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0") + +# Saturated vector add for 4 8bit ints. +binop("usadd_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; +} +""") + +# Saturated vector subtract for 4 8bit ints. +binop("ussub_4x8", tint, "", """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + if (src0_chan > src1_chan) + dst |= (src0_chan - src1_chan) << i; +} +""") + +# vector min for 4 8bit ints. +binop("umin_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# vector max for 4 8bit ints. +binop("umax_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; +} +""") + +# unorm multiply: (a * b) / 255. +binop("umul_unorm_4x8", tint, commutative + associative, """ +dst = 0; +for (int i = 0; i < 32; i += 8) { + int src0_chan = (src0 >> i) & 0xff; + int src1_chan = (src1 >> i) & 0xff; + dst |= ((src0_chan * src1_chan) / 255) << i; +} +""") + +binop("fpow", tfloat, "", "powf(src0, src1)") + +binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat, + "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)") + +# bfm implements the behavior of the first operation of the SM5 "bfi" assembly +# and that of the "bfi1" i965 instruction. That is, it has undefined behavior +# if either of its arguments are 32. +binop_convert("bfm", tuint, tint, "", """ +int bits = src0, offset = src1; +if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32) + dst = 0; /* undefined */ +else + dst = ((1u << bits) - 1) << offset; +""") + +opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """ +dst = ldexpf(src0, src1); +/* flush denormals to zero. */ +if (!isnormal(dst)) + dst = copysignf(0.0f, src0); +""") + +# Combines the first component of each input to make a 2-component vector. + +binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """ +dst.x = src0.x; +dst.y = src1.x; +""") + +def triop(name, ty, const_expr): + opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr) +def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): + opcode(name, output_size, tuint, + [src1_size, src2_size, src3_size], + [tuint, tuint, tuint], "", const_expr) + +triop("ffma", tfloat, "src0 * src1 + src2") + +triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2") + +# Conditional Select +# +# A vector conditional select instruction (like ?:, but operating per- +# component on vectors). There are two versions, one for floating point +# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0). + + +triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2") +opcode("bcsel", 0, tuint, [0, 0, 0], + [tbool, tuint, tuint], "", "src0 ? src1 : src2") + +# SM5 bfi assembly +triop("bfi", tuint, """ +unsigned mask = src0, insert = src1, base = src2; +if (mask == 0) { + dst = base; +} else { + unsigned tmp = mask; + while (!(tmp & 1)) { + tmp >>= 1; + insert <<= 1; + } + dst = (base & ~mask) | (insert & mask); +} +""") + +# SM5 ubfe/ibfe assembly +opcode("ubfe", 0, tuint, + [0, 0, 0], [tuint, tint, tint], "", """ +unsigned base = src0; +int offset = src1, bits = src2; +if (bits == 0) { + dst = 0; +} else if (bits < 0 || offset < 0) { + dst = 0; /* undefined */ +} else if (offset + bits < 32) { + dst = (base << (32 - bits - offset)) >> (32 - bits); +} else { + dst = base >> offset; +} +""") +opcode("ibfe", 0, tint, + [0, 0, 0], [tint, tint, tint], "", """ +int base = src0; +int offset = src1, bits = src2; +if (bits == 0) { + dst = 0; +} else if (bits < 0 || offset < 0) { + dst = 0; /* undefined */ +} else if (offset + bits < 32) { + dst = (base << (32 - bits - offset)) >> (32 - bits); +} else { + dst = base >> offset; +} +""") + +# GLSL bitfieldExtract() +opcode("ubitfield_extract", 0, tuint, + [0, 0, 0], [tuint, tint, tint], "", """ +unsigned base = src0; +int offset = src1, bits = src2; +if (bits == 0) { + dst = 0; +} else if (bits < 0 || offset < 0 || offset + bits > 32) { + dst = 0; /* undefined per the spec */ +} else { + dst = (base >> offset) & ((1ull << bits) - 1); +} +""") +opcode("ibitfield_extract", 0, tint, + [0, 0, 0], [tint, tint, tint], "", """ +int base = src0; +int offset = src1, bits = src2; +if (bits == 0) { + dst = 0; +} else if (offset < 0 || bits < 0 || offset + bits > 32) { + dst = 0; +} else { + dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */ +} +""") + +# Combines the first component of each input to make a 3-component vector. + +triop_horiz("vec3", 3, 1, 1, 1, """ +dst.x = src0.x; +dst.y = src1.x; +dst.z = src2.x; +""") + +def quadop_horiz(name, output_size, src1_size, src2_size, src3_size, + src4_size, const_expr): + opcode(name, output_size, tuint, + [src1_size, src2_size, src3_size, src4_size], + [tuint, tuint, tuint, tuint], + "", const_expr) + +opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0], + [tuint, tuint, tint, tint], "", """ +unsigned base = src0, insert = src1; +int offset = src2, bits = src3; +if (bits == 0) { + dst = 0; +} else if (offset < 0 || bits < 0 || bits + offset > 32) { + dst = 0; +} else { + unsigned mask = ((1ull << bits) - 1) << offset; + dst = (base & ~mask) | ((insert << bits) & mask); +} +""") + +quadop_horiz("vec4", 4, 1, 1, 1, 1, """ +dst.x = src0.x; +dst.y = src1.x; +dst.z = src2.x; +dst.w = src3.x; +""") + + diff --git a/src/compiler/nir/nir_opcodes_c.py b/src/compiler/nir/nir_opcodes_c.py new file mode 100644 index 00000000000..7049c5be676 --- /dev/null +++ b/src/compiler/nir/nir_opcodes_c.py @@ -0,0 +1,55 @@ +#! /usr/bin/env python +# +# Copyright (C) 2014 Connor Abbott +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Connor Abbott (cwabbott0@gmail.com) + +from nir_opcodes import opcodes +from mako.template import Template + +template = Template(""" +#include "nir.h" + +const nir_op_info nir_op_infos[nir_num_opcodes] = { +% for name, opcode in sorted(opcodes.iteritems()): +{ + .name = "${name}", + .num_inputs = ${opcode.num_inputs}, + .output_size = ${opcode.output_size}, + .output_type = ${"nir_type_" + opcode.output_type}, + .input_sizes = { + ${ ", ".join(str(size) for size in opcode.input_sizes) } + }, + .input_types = { + ${ ", ".join("nir_type_" + type for type in opcode.input_types) } + }, + .algebraic_properties = + ${ "0" if opcode.algebraic_properties == "" else " | ".join( + "NIR_OP_IS_" + prop.upper() for prop in + opcode.algebraic_properties.strip().split(" ")) } +}, +% endfor +}; +""") + +print template.render(opcodes=opcodes) diff --git a/src/compiler/nir/nir_opcodes_h.py b/src/compiler/nir/nir_opcodes_h.py new file mode 100644 index 00000000000..be15a96d236 --- /dev/null +++ b/src/compiler/nir/nir_opcodes_h.py @@ -0,0 +1,47 @@ +#! /usr/bin/env python + +template = """\ +/* Copyright (C) 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + */ + +#ifndef _NIR_OPCODES_ +#define _NIR_OPCODES_ + +<% opcode_names = sorted(opcodes.iterkeys()) %> + +typedef enum { +% for name in opcode_names: + nir_op_${name}, +% endfor + nir_last_opcode = nir_op_${opcode_names[-1]}, + nir_num_opcodes = nir_last_opcode + 1 +} nir_op; + +#endif /* _NIR_OPCODES_ */""" + +from nir_opcodes import opcodes +from mako.template import Template + +print Template(template).render(opcodes=opcodes) diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py new file mode 100644 index 00000000000..7745b76f7ce --- /dev/null +++ b/src/compiler/nir/nir_opt_algebraic.py @@ -0,0 +1,285 @@ +#! /usr/bin/env python +# +# Copyright (C) 2014 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. +# +# Authors: +# Jason Ekstrand (jason@jlekstrand.net) + +import nir_algebraic + +# Convenience variables +a = 'a' +b = 'b' +c = 'c' +d = 'd' + +# Written in the form (, ) where is an expression +# and is either an expression or a value. An expression is +# defined as a tuple of the form (, , , , ) +# where each source is either an expression or a value. A value can be +# either a numeric constant or a string representing a variable name. +# +# Variable names are specified as "[#]name[@type]" where "#" inicates that +# the given variable will only match constants and the type indicates that +# the given variable will only match values from ALU instructions with the +# given output type. +# +# For constants, you have to be careful to make sure that it is the right +# type because python is unaware of the source and destination types of the +# opcodes. + +optimizations = [ + (('fneg', ('fneg', a)), a), + (('ineg', ('ineg', a)), a), + (('fabs', ('fabs', a)), ('fabs', a)), + (('fabs', ('fneg', a)), ('fabs', a)), + (('iabs', ('iabs', a)), ('iabs', a)), + (('iabs', ('ineg', a)), ('iabs', a)), + (('fadd', a, 0.0), a), + (('iadd', a, 0), a), + (('usadd_4x8', a, 0), a), + (('usadd_4x8', a, ~0), ~0), + (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), + (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), + (('fadd', ('fneg', a), a), 0.0), + (('iadd', ('ineg', a), a), 0), + (('iadd', ('ineg', a), ('iadd', a, b)), b), + (('iadd', a, ('iadd', ('ineg', a), b)), b), + (('fadd', ('fneg', a), ('fadd', a, b)), b), + (('fadd', a, ('fadd', ('fneg', a), b)), b), + (('fmul', a, 0.0), 0.0), + (('imul', a, 0), 0), + (('umul_unorm_4x8', a, 0), 0), + (('umul_unorm_4x8', a, ~0), a), + (('fmul', a, 1.0), a), + (('imul', a, 1), a), + (('fmul', a, -1.0), ('fneg', a)), + (('imul', a, -1), ('ineg', a)), + (('ffma', 0.0, a, b), b), + (('ffma', a, 0.0, b), b), + (('ffma', a, b, 0.0), ('fmul', a, b)), + (('ffma', a, 1.0, b), ('fadd', a, b)), + (('ffma', 1.0, a, b), ('fadd', a, b)), + (('flrp', a, b, 0.0), a), + (('flrp', a, b, 1.0), b), + (('flrp', a, a, b), a), + (('flrp', 0.0, a, b), ('fmul', a, b)), + (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'), + (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'), + (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'), + (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'), + (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), + (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'), + # Comparison simplifications + (('inot', ('flt', a, b)), ('fge', a, b)), + (('inot', ('fge', a, b)), ('flt', a, b)), + (('inot', ('feq', a, b)), ('fne', a, b)), + (('inot', ('fne', a, b)), ('feq', a, b)), + (('inot', ('ilt', a, b)), ('ige', a, b)), + (('inot', ('ige', a, b)), ('ilt', a, b)), + (('inot', ('ieq', a, b)), ('ine', a, b)), + (('inot', ('ine', a, b)), ('ieq', a, b)), + (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), + (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)), + (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), + (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)), + (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)), + (('fmin', a, a), a), + (('fmax', a, a), a), + (('imin', a, a), a), + (('imax', a, a), a), + (('umin', a, a), a), + (('umax', a, a), a), + (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'), + (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'), + (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), + (('fsat', ('fsat', a)), ('fsat', a)), + (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)), + (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))), + (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)), + (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))), + (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)), + (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'), + (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'), + (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'), + (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'), + (('fne', ('fneg', a), a), ('fne', a, 0.0)), + (('feq', ('fneg', a), a), ('feq', a, 0.0)), + # Emulating booleans + (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))), + (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), + (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), + (('iand', 'a@bool', 1.0), ('b2f', a)), + (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. + (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. + # Comparison with the same args. Note that these are not done for + # the float versions because NaN always returns false on float + # inequalities. + (('ilt', a, a), False), + (('ige', a, a), True), + (('ieq', a, a), True), + (('ine', a, a), False), + (('ult', a, a), False), + (('uge', a, a), True), + # Logical and bit operations + (('fand', a, 0.0), 0.0), + (('iand', a, a), a), + (('iand', a, ~0), a), + (('iand', a, 0), 0), + (('ior', a, a), a), + (('ior', a, 0), a), + (('fxor', a, a), 0.0), + (('ixor', a, a), 0), + (('inot', ('inot', a)), a), + # DeMorgan's Laws + (('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))), + (('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))), + # Shift optimizations + (('ishl', 0, a), 0), + (('ishl', a, 0), a), + (('ishr', 0, a), 0), + (('ishr', a, 0), a), + (('ushr', 0, a), 0), + (('ushr', a, 0), a), + # Exponential/logarithmic identities + (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a + (('flog2', ('fexp2', a)), a), # lg2(2^a) = a + (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b) + (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b + (('fpow', a, 1.0), a), + (('fpow', a, 2.0), ('fmul', a, a)), + (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), + (('fpow', 2.0, a), ('fexp2', a)), + (('fpow', ('fpow', a, 2.2), 0.454545), a), + (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)), + (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), + (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), + (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), + (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))), + (('flog2', ('frcp', a)), ('fneg', ('flog2', a))), + (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), + (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), + (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), + (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), + (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), + # Division and reciprocal + (('fdiv', 1.0, a), ('frcp', a)), + (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), + (('frcp', ('frcp', a)), a), + (('frcp', ('fsqrt', a)), ('frsq', a)), + (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'), + (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'), + # Boolean simplifications + (('ieq', 'a@bool', True), a), + (('ine', 'a@bool', True), ('inot', a)), + (('ine', 'a@bool', False), a), + (('ieq', 'a@bool', False), ('inot', 'a')), + (('bcsel', a, True, False), ('ine', a, 0)), + (('bcsel', a, False, True), ('ieq', a, 0)), + (('bcsel', True, b, c), b), + (('bcsel', False, b, c), c), + # The result of this should be hit by constant propagation and, in the + # next round of opt_algebraic, get picked up by one of the above two. + (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)), + + (('bcsel', a, b, b), b), + (('fcsel', a, b, b), b), + + # Conversions + (('i2b', ('b2i', a)), a), + (('f2i', ('ftrunc', a)), ('f2i', a)), + (('f2u', ('ftrunc', a)), ('f2u', a)), + + # Subtracts + (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), + (('isub', a, ('isub', 0, b)), ('iadd', a, b)), + (('ussub_4x8', a, 0), a), + (('ussub_4x8', a, ~0), 0), + (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'), + (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'), + (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), + (('ineg', a), ('isub', 0, a), 'options->lower_negate'), + (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)), + (('iadd', a, ('isub', 0, b)), ('isub', a, b)), + (('fabs', ('fsub', 0.0, a)), ('fabs', a)), + (('iabs', ('isub', 0, a)), ('iabs', a)), + + # Misc. lowering + (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'), + (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'), + (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'), + + (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), + ('bcsel', ('ilt', 31, 'bits'), 'insert', + ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')), + 'options->lower_bitfield_insert'), + + (('ibitfield_extract', 'value', 'offset', 'bits'), + ('bcsel', ('ilt', 31, 'bits'), 'value', + ('ibfe', 'value', 'offset', 'bits')), + 'options->lower_bitfield_extract'), + + (('ubitfield_extract', 'value', 'offset', 'bits'), + ('bcsel', ('ult', 31, 'bits'), 'value', + ('ubfe', 'value', 'offset', 'bits')), + 'options->lower_bitfield_extract'), +] + +# Add optimizations to handle the case where the result of a ternary is +# compared to a constant. This way we can take things like +# +# (a ? 0 : 1) > 0 +# +# and turn it into +# +# a ? (0 > 0) : (1 > 0) +# +# which constant folding will eat for lunch. The resulting ternary will +# further get cleaned up by the boolean reductions above and we will be +# left with just the original variable "a". +for op in ['flt', 'fge', 'feq', 'fne', + 'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']: + optimizations += [ + ((op, ('bcsel', 'a', '#b', '#c'), '#d'), + ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))), + ((op, '#d', ('bcsel', a, '#b', '#c')), + ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), + ] + +# This section contains "late" optimizations that should be run after the +# regular optimizations have finished. Optimizations should go here if +# they help code generation but do not necessarily produce code that is +# more easily optimizable. +late_optimizations = [ + (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), + (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), + (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), + (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), + (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'), + (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'), + (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'), + (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'), +] + +print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render() +print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late", + late_optimizations).render() diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c new file mode 100644 index 00000000000..28a73f86f95 --- /dev/null +++ b/src/compiler/nir/nir_opt_constant_folding.c @@ -0,0 +1,201 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_constant_expressions.h" +#include + +/* + * Implements SSA-based constant folding. + */ + +struct constant_fold_state { + void *mem_ctx; + nir_function_impl *impl; + bool progress; +}; + +static bool +constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) +{ + nir_const_value src[4]; + + if (!instr->dest.dest.is_ssa) + return false; + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (!instr->src[i].src.is_ssa) + return false; + + nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; + + if (src_instr->type != nir_instr_type_load_const) + return false; + nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr); + + for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i); + j++) { + src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]]; + } + + /* We shouldn't have any source modifiers in the optimization loop. */ + assert(!instr->src[i].abs && !instr->src[i].negate); + } + + /* We shouldn't have any saturate modifiers in the optimization loop. */ + assert(!instr->dest.saturate); + + nir_const_value dest = + nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, + src); + + nir_load_const_instr *new_instr = + nir_load_const_instr_create(mem_ctx, + instr->dest.dest.ssa.num_components); + + new_instr->value = dest; + + nir_instr_insert_before(&instr->instr, &new_instr->instr); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, + nir_src_for_ssa(&new_instr->def)); + + nir_instr_remove(&instr->instr); + ralloc_free(instr); + + return true; +} + +static bool +constant_fold_deref(nir_instr *instr, nir_deref_var *deref) +{ + bool progress = false; + + for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { + if (tail->deref_type != nir_deref_type_array) + continue; + + nir_deref_array *arr = nir_deref_as_array(tail); + + if (arr->deref_array_type == nir_deref_array_type_indirect && + arr->indirect.is_ssa && + arr->indirect.ssa->parent_instr->type == nir_instr_type_load_const) { + nir_load_const_instr *indirect = + nir_instr_as_load_const(arr->indirect.ssa->parent_instr); + + arr->base_offset += indirect->value.u[0]; + + /* Clear out the source */ + nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL)); + + arr->deref_array_type = nir_deref_array_type_direct; + + progress = true; + } + } + + return progress; +} + +static bool +constant_fold_intrinsic_instr(nir_intrinsic_instr *instr) +{ + bool progress = false; + + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + progress |= constant_fold_deref(&instr->instr, instr->variables[i]); + } + + return progress; +} + +static bool +constant_fold_tex_instr(nir_tex_instr *instr) +{ + if (instr->sampler) + return constant_fold_deref(&instr->instr, instr->sampler); + else + return false; +} + +static bool +constant_fold_block(nir_block *block, void *void_state) +{ + struct constant_fold_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_alu: + state->progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), + state->mem_ctx); + break; + case nir_instr_type_intrinsic: + state->progress |= + constant_fold_intrinsic_instr(nir_instr_as_intrinsic(instr)); + break; + case nir_instr_type_tex: + state->progress |= constant_fold_tex_instr(nir_instr_as_tex(instr)); + break; + default: + /* Don't know how to constant fold */ + break; + } + } + + return true; +} + +static bool +nir_opt_constant_folding_impl(nir_function_impl *impl) +{ + struct constant_fold_state state; + + state.mem_ctx = ralloc_parent(impl); + state.impl = impl; + state.progress = false; + + nir_foreach_block(impl, constant_fold_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return state.progress; +} + +bool +nir_opt_constant_folding(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress |= nir_opt_constant_folding_impl(function->impl); + } + + return progress; +} diff --git a/src/compiler/nir/nir_opt_copy_propagate.c b/src/compiler/nir/nir_opt_copy_propagate.c new file mode 100644 index 00000000000..d99f78ddb36 --- /dev/null +++ b/src/compiler/nir/nir_opt_copy_propagate.c @@ -0,0 +1,290 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include
+ +/** + * SSA-based copy propagation + */ + +static bool is_move(nir_alu_instr *instr) +{ + if (instr->op != nir_op_fmov && + instr->op != nir_op_imov) + return false; + + if (instr->dest.saturate) + return false; + + /* we handle modifiers in a separate pass */ + + if (instr->src[0].abs || instr->src[0].negate) + return false; + + if (!instr->src[0].src.is_ssa) + return false; + + return true; + +} + +static bool is_vec(nir_alu_instr *instr) +{ + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (!instr->src[i].src.is_ssa) + return false; + + /* we handle modifiers in a separate pass */ + if (instr->src[i].abs || instr->src[i].negate) + return false; + } + + return instr->op == nir_op_vec2 || + instr->op == nir_op_vec3 || + instr->op == nir_op_vec4; +} + +static bool +is_swizzleless_move(nir_alu_instr *instr) +{ + if (is_move(instr)) { + for (unsigned i = 0; i < 4; i++) { + if (!((instr->dest.write_mask >> i) & 1)) + break; + if (instr->src[0].swizzle[i] != i) + return false; + } + return true; + } else if (is_vec(instr)) { + nir_ssa_def *def = NULL; + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (instr->src[i].swizzle[0] != i) + return false; + + if (def == NULL) { + def = instr->src[i].src.ssa; + } else if (instr->src[i].src.ssa != def) { + return false; + } + } + return true; + } else { + return false; + } +} + +static bool +copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) +{ + if (!src->is_ssa) { + if (src->reg.indirect) + return copy_prop_src(src, parent_instr, parent_if); + return false; + } + + nir_instr *src_instr = src->ssa->parent_instr; + if (src_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr); + if (!is_swizzleless_move(alu_instr)) + return false; + + /* Don't let copy propagation land us with a phi that has more + * components in its source than it has in its destination. That badly + * messes up out-of-ssa. + */ + if (parent_instr && parent_instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(parent_instr); + assert(phi->dest.is_ssa); + if (phi->dest.ssa.num_components != + alu_instr->src[0].src.ssa->num_components) + return false; + } + + if (parent_instr) { + nir_instr_rewrite_src(parent_instr, src, + nir_src_for_ssa(alu_instr->src[0].src.ssa)); + } else { + assert(src == &parent_if->condition); + nir_if_rewrite_condition(parent_if, + nir_src_for_ssa(alu_instr->src[0].src.ssa)); + } + + return true; +} + +static bool +copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) +{ + nir_alu_src *src = &parent_alu_instr->src[index]; + if (!src->src.is_ssa) { + if (src->src.reg.indirect) + return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr, + NULL); + return false; + } + + nir_instr *src_instr = src->src.ssa->parent_instr; + if (src_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr); + if (!is_move(alu_instr) && !is_vec(alu_instr)) + return false; + + nir_ssa_def *def; + unsigned new_swizzle[4] = {0, 0, 0, 0}; + + if (alu_instr->op == nir_op_fmov || + alu_instr->op == nir_op_imov) { + for (unsigned i = 0; i < 4; i++) + new_swizzle[i] = alu_instr->src[0].swizzle[src->swizzle[i]]; + def = alu_instr->src[0].src.ssa; + } else { + def = NULL; + + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(parent_alu_instr, index, i)) + continue; + + nir_ssa_def *new_def = alu_instr->src[src->swizzle[i]].src.ssa; + if (def == NULL) + def = new_def; + else { + if (def != new_def) + return false; + } + new_swizzle[i] = alu_instr->src[src->swizzle[i]].swizzle[0]; + } + } + + for (unsigned i = 0; i < 4; i++) + src->swizzle[i] = new_swizzle[i]; + + nir_instr_rewrite_src(&parent_alu_instr->instr, &src->src, + nir_src_for_ssa(def)); + + return true; +} + +typedef struct { + nir_instr *parent_instr; + bool progress; +} copy_prop_state; + +static bool +copy_prop_src_cb(nir_src *src, void *_state) +{ + copy_prop_state *state = (copy_prop_state *) _state; + while (copy_prop_src(src, state->parent_instr, NULL)) + state->progress = true; + + return true; +} + +static bool +copy_prop_instr(nir_instr *instr) +{ + if (instr->type == nir_instr_type_alu) { + nir_alu_instr *alu_instr = nir_instr_as_alu(instr); + bool progress = false; + + for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) + while (copy_prop_alu_src(alu_instr, i)) + progress = true; + + if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect) + while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL)) + progress = true; + + return progress; + } + + copy_prop_state state; + state.parent_instr = instr; + state.progress = false; + nir_foreach_src(instr, copy_prop_src_cb, &state); + + return state.progress; +} + +static bool +copy_prop_if(nir_if *if_stmt) +{ + return copy_prop_src(&if_stmt->condition, NULL, if_stmt); +} + +static bool +copy_prop_block(nir_block *block, void *_state) +{ + bool *progress = (bool *) _state; + + nir_foreach_instr(block, instr) { + if (copy_prop_instr(instr)) + *progress = true; + } + + if (block->cf_node.node.next != NULL && /* check that we aren't the end node */ + !nir_cf_node_is_last(&block->cf_node) && + nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) { + nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node)); + if (copy_prop_if(if_stmt)) + *progress = true; + } + + return true; +} + +static bool +nir_copy_prop_impl(nir_function_impl *impl) +{ + bool progress = false; + + nir_foreach_block(impl, copy_prop_block, &progress); + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + return progress; +} + +bool +nir_copy_prop(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl && nir_copy_prop_impl(function->impl)) + progress = true; + } + + return progress; +} diff --git a/src/compiler/nir/nir_opt_cse.c b/src/compiler/nir/nir_opt_cse.c new file mode 100644 index 00000000000..364fb023dce --- /dev/null +++ b/src/compiler/nir/nir_opt_cse.c @@ -0,0 +1,93 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir_instr_set.h" + +/* + * Implements common subexpression elimination + */ + +/* + * Visits and CSE's the given block and all its descendants in the dominance + * tree recursively. Note that the instr_set is guaranteed to only ever + * contain instructions that dominate the current block. + */ + +static bool +cse_block(nir_block *block, struct set *instr_set) +{ + bool progress = false; + + nir_foreach_instr_safe(block, instr) { + if (nir_instr_set_add_or_rewrite(instr_set, instr)) { + progress = true; + nir_instr_remove(instr); + } + } + + for (unsigned i = 0; i < block->num_dom_children; i++) { + nir_block *child = block->dom_children[i]; + progress |= cse_block(child, instr_set); + } + + nir_foreach_instr(block, instr) + nir_instr_set_remove(instr_set, instr); + + return progress; +} + +static bool +nir_opt_cse_impl(nir_function_impl *impl) +{ + struct set *instr_set = nir_instr_set_create(NULL); + + nir_metadata_require(impl, nir_metadata_dominance); + + bool progress = cse_block(nir_start_block(impl), instr_set); + + if (progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + nir_instr_set_destroy(instr_set); + return progress; +} + +bool +nir_opt_cse(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress |= nir_opt_cse_impl(function->impl); + } + + return progress; +} + diff --git a/src/compiler/nir/nir_opt_dce.c b/src/compiler/nir/nir_opt_dce.c new file mode 100644 index 00000000000..32436c18b60 --- /dev/null +++ b/src/compiler/nir/nir_opt_dce.c @@ -0,0 +1,183 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +/* SSA-based mark-and-sweep dead code elimination */ + +typedef struct { + struct exec_node node; + nir_instr *instr; +} worklist_elem; + +static void +worklist_push(struct exec_list *worklist, nir_instr *instr) +{ + worklist_elem *elem = ralloc(worklist, worklist_elem); + elem->instr = instr; + instr->pass_flags = 1; + exec_list_push_tail(worklist, &elem->node); +} + +static nir_instr * +worklist_pop(struct exec_list *worklist) +{ + struct exec_node *node = exec_list_pop_head(worklist); + worklist_elem *elem = exec_node_data(worklist_elem, node, node); + return elem->instr; +} + +static bool +mark_live_cb(nir_src *src, void *_state) +{ + struct exec_list *worklist = (struct exec_list *) _state; + + if (src->is_ssa && !src->ssa->parent_instr->pass_flags) { + worklist_push(worklist, src->ssa->parent_instr); + } + + return true; +} + +static void +init_instr(nir_instr *instr, struct exec_list *worklist) +{ + nir_alu_instr *alu_instr; + nir_intrinsic_instr *intrin_instr; + nir_tex_instr *tex_instr; + + /* We use the pass_flags to store the live/dead information. In DCE, we + * just treat it as a zero/non-zerl boolean for whether or not the + * instruction is live. + */ + instr->pass_flags = 0; + + switch (instr->type) { + case nir_instr_type_call: + case nir_instr_type_jump: + worklist_push(worklist, instr); + break; + + case nir_instr_type_alu: + alu_instr = nir_instr_as_alu(instr); + if (!alu_instr->dest.dest.is_ssa) + worklist_push(worklist, instr); + break; + + case nir_instr_type_intrinsic: + intrin_instr = nir_instr_as_intrinsic(instr); + if (nir_intrinsic_infos[intrin_instr->intrinsic].flags & + NIR_INTRINSIC_CAN_ELIMINATE) { + if (nir_intrinsic_infos[intrin_instr->intrinsic].has_dest && + !intrin_instr->dest.is_ssa) { + worklist_push(worklist, instr); + } + } else { + worklist_push(worklist, instr); + } + break; + + case nir_instr_type_tex: + tex_instr = nir_instr_as_tex(instr); + if (!tex_instr->dest.is_ssa) + worklist_push(worklist, instr); + break; + + default: + break; + } +} + +static bool +init_block_cb(nir_block *block, void *_state) +{ + struct exec_list *worklist = (struct exec_list *) _state; + + nir_foreach_instr(block, instr) + init_instr(instr, worklist); + + nir_if *following_if = nir_block_get_following_if(block); + if (following_if) { + if (following_if->condition.is_ssa && + !following_if->condition.ssa->parent_instr->pass_flags) + worklist_push(worklist, following_if->condition.ssa->parent_instr); + } + + return true; +} + +static bool +delete_block_cb(nir_block *block, void *_state) +{ + bool *progress = (bool *) _state; + + nir_foreach_instr_safe(block, instr) { + if (!instr->pass_flags) { + nir_instr_remove(instr); + *progress = true; + } + } + + return true; +} + +static bool +nir_opt_dce_impl(nir_function_impl *impl) +{ + struct exec_list *worklist = ralloc(NULL, struct exec_list); + exec_list_make_empty(worklist); + + nir_foreach_block(impl, init_block_cb, worklist); + + while (!exec_list_is_empty(worklist)) { + nir_instr *instr = worklist_pop(worklist); + nir_foreach_src(instr, mark_live_cb, worklist); + } + + ralloc_free(worklist); + + bool progress = false; + nir_foreach_block(impl, delete_block_cb, &progress); + + if (progress) + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + return progress; +} + +bool +nir_opt_dce(nir_shader *shader) +{ + bool progress = false; + nir_foreach_function(shader, function) { + if (function->impl && nir_opt_dce_impl(function->impl)) + progress = true; + } + + return progress; +} diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c new file mode 100644 index 00000000000..4cc6798702b --- /dev/null +++ b/src/compiler/nir/nir_opt_dead_cf.c @@ -0,0 +1,358 @@ +/* + * Copyright © 2014 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include "nir_control_flow.h" + +/* + * This file implements an optimization that deletes statically + * unreachable/dead code. In NIR, one way this can happen if if an if + * statement has a constant condition: + * + * if (true) { + * ... + * } + * + * We delete the if statement and paste the contents of the always-executed + * branch into the surrounding control flow, possibly removing more code if + * the branch had a jump at the end. + * + * Another way is that control flow can end in a jump so that code after it + * never gets executed. In particular, this can happen after optimizing + * something like: + * + * if (true) { + * ... + * break; + * } + * ... + * + * We also consider the case where both branches of an if end in a jump, e.g.: + * + * if (...) { + * break; + * } else { + * continue; + * } + * ... + * + * Finally, we also handle removing useless loops, i.e. loops with no side + * effects and without any definitions that are used elsewhere. This case is a + * little different from the first two in that the code is actually run (it + * just never does anything), but there are similar issues with needing to + * be careful with restarting after deleting the cf_node (see dead_cf_list()) + * so this is a convenient place to remove them. + */ + +static void +remove_after_cf_node(nir_cf_node *node) +{ + nir_cf_node *end = node; + while (!nir_cf_node_is_last(end)) + end = nir_cf_node_next(end); + + nir_cf_list list; + nir_cf_extract(&list, nir_after_cf_node(node), nir_after_cf_node(end)); + nir_cf_delete(&list); +} + +static void +opt_constant_if(nir_if *if_stmt, bool condition) +{ + /* First, we need to remove any phi nodes after the if by rewriting uses to + * point to the correct source. + */ + nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node)); + nir_block *last_block = + nir_cf_node_as_block(condition ? nir_if_last_then_node(if_stmt) + : nir_if_last_else_node(if_stmt)); + + nir_foreach_instr_safe(after, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_ssa_def *def = NULL; + nir_foreach_phi_src(phi, phi_src) { + if (phi_src->pred != last_block) + continue; + + assert(phi_src->src.is_ssa); + def = phi_src->src.ssa; + } + + assert(def); + assert(phi->dest.is_ssa); + nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def)); + nir_instr_remove(instr); + } + + /* The control flow list we're about to paste in may include a jump at the + * end, and in that case we have to delete the rest of the control flow + * list after the if since it's unreachable and the validator will balk if + * we don't. + */ + + if (!exec_list_is_empty(&last_block->instr_list)) { + nir_instr *last_instr = nir_block_last_instr(last_block); + if (last_instr->type == nir_instr_type_jump) + remove_after_cf_node(&if_stmt->cf_node); + } + + /* Finally, actually paste in the then or else branch and delete the if. */ + struct exec_list *cf_list = condition ? &if_stmt->then_list + : &if_stmt->else_list; + + nir_cf_list list; + nir_cf_extract(&list, nir_before_cf_list(cf_list), + nir_after_cf_list(cf_list)); + nir_cf_reinsert(&list, nir_after_cf_node(&if_stmt->cf_node)); + nir_cf_node_remove(&if_stmt->cf_node); +} + +static bool +block_has_no_side_effects(nir_block *block, void *state) +{ + (void) state; + + nir_foreach_instr(block, instr) { + if (instr->type == nir_instr_type_call) + return false; + + /* Return instructions can cause us to skip over other side-effecting + * instructions after the loop, so consider them to have side effects + * here. + */ + + if (instr->type == nir_instr_type_jump && + nir_instr_as_jump(instr)->type == nir_jump_return) + return false; + + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (!nir_intrinsic_infos[intrin->intrinsic].flags & + NIR_INTRINSIC_CAN_ELIMINATE) + return false; + } + + return true; +} + +static bool +def_not_live_out(nir_ssa_def *def, void *state) +{ + nir_block *after = state; + + return !BITSET_TEST(after->live_in, def->live_index); +} + +/* + * Test if a loop is dead. A loop is dead if: + * + * 1) It has no side effects (i.e. intrinsics which could possibly affect the + * state of the program aside from producing an SSA value, indicated by a lack + * of NIR_INTRINSIC_CAN_ELIMINATE). + * + * 2) It has no phi nodes after it, since those indicate values inside the + * loop being used after the loop. + * + * 3) If there are no phi nodes after the loop, then the only way a value + * defined inside the loop can be used outside the loop is if its definition + * dominates the block after the loop. If none of the definitions that + * dominate the loop exit are used outside the loop, then the loop is dead + * and it can be deleted. + */ + +static bool +loop_is_dead(nir_loop *loop) +{ + nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); + nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); + + if (!exec_list_is_empty(&after->instr_list) && + nir_block_first_instr(after)->type == nir_instr_type_phi) + return false; + + if (!nir_foreach_block_in_cf_node(&loop->cf_node, block_has_no_side_effects, + NULL)) + return false; + + nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node); + nir_metadata_require(impl, nir_metadata_live_ssa_defs | + nir_metadata_dominance); + + for (nir_block *cur = after->imm_dom; cur != before; cur = cur->imm_dom) { + nir_foreach_instr(cur, instr) { + if (!nir_foreach_ssa_def(instr, def_not_live_out, after)) + return false; + } + } + + return true; +} + +static bool +dead_cf_block(nir_block *block) +{ + nir_if *following_if = nir_block_get_following_if(block); + if (following_if) { + nir_const_value *const_value = + nir_src_as_const_value(following_if->condition); + + if (!const_value) + return false; + + opt_constant_if(following_if, const_value->u[0] != 0); + return true; + } + + nir_loop *following_loop = nir_block_get_following_loop(block); + if (!following_loop) + return false; + + if (!loop_is_dead(following_loop)) + return false; + + nir_cf_node_remove(&following_loop->cf_node); + return true; +} + +static bool +ends_in_jump(nir_block *block) +{ + if (exec_list_is_empty(&block->instr_list)) + return false; + + nir_instr *instr = nir_block_last_instr(block); + return instr->type == nir_instr_type_jump; +} + +static bool +dead_cf_list(struct exec_list *list, bool *list_ends_in_jump) +{ + bool progress = false; + *list_ends_in_jump = false; + + nir_cf_node *prev = NULL; + + foreach_list_typed(nir_cf_node, cur, node, list) { + switch (cur->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(cur); + if (dead_cf_block(block)) { + /* We just deleted the if or loop after this block, so we may have + * deleted the block before or after it -- which one is an + * implementation detail. Therefore, to recover the place we were + * at, we have to use the previous cf_node. + */ + + if (prev) { + cur = nir_cf_node_next(prev); + } else { + cur = exec_node_data(nir_cf_node, exec_list_get_head(list), + node); + } + + block = nir_cf_node_as_block(cur); + + progress = true; + } + + if (ends_in_jump(block)) { + *list_ends_in_jump = true; + + if (!exec_node_is_tail_sentinel(cur->node.next)) { + remove_after_cf_node(cur); + return true; + } + } + + break; + } + + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(cur); + bool then_ends_in_jump, else_ends_in_jump; + progress |= dead_cf_list(&if_stmt->then_list, &then_ends_in_jump); + progress |= dead_cf_list(&if_stmt->else_list, &else_ends_in_jump); + + if (then_ends_in_jump && else_ends_in_jump) { + *list_ends_in_jump = true; + nir_block *next = nir_cf_node_as_block(nir_cf_node_next(cur)); + if (!exec_list_is_empty(&next->instr_list) || + !exec_node_is_tail_sentinel(next->cf_node.node.next)) { + remove_after_cf_node(cur); + return true; + } + } + + break; + } + + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(cur); + bool dummy; + progress |= dead_cf_list(&loop->body, &dummy); + + break; + } + + default: + unreachable("unknown cf node type"); + } + + prev = cur; + } + + return progress; +} + +static bool +opt_dead_cf_impl(nir_function_impl *impl) +{ + bool dummy; + bool progress = dead_cf_list(&impl->body, &dummy); + + if (progress) + nir_metadata_preserve(impl, nir_metadata_none); + + return progress; +} + +bool +nir_opt_dead_cf(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) + if (function->impl) + progress |= opt_dead_cf_impl(function->impl); + + return progress; +} diff --git a/src/compiler/nir/nir_opt_gcm.c b/src/compiler/nir/nir_opt_gcm.c new file mode 100644 index 00000000000..a8779ce5b84 --- /dev/null +++ b/src/compiler/nir/nir_opt_gcm.c @@ -0,0 +1,494 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements Global Code Motion. A description of GCM can be found in + * "Global Code Motion; Global Value Numbering" by Cliff Click. + * Unfortunately, the algorithm presented in the paper is broken in a + * number of ways. The algorithm used here differs substantially from the + * one in the paper but it is, in my opinion, much easier to read and + * verify correcness. + */ + +struct gcm_block_info { + /* Number of loops this block is inside */ + unsigned loop_depth; + + /* The last instruction inserted into this block. This is used as we + * traverse the instructions and insert them back into the program to + * put them in the right order. + */ + nir_instr *last_instr; +}; + +/* Flags used in the instr->pass_flags field for various instruction states */ +enum { + GCM_INSTR_PINNED = (1 << 0), + GCM_INSTR_SCHEDULED_EARLY = (1 << 1), + GCM_INSTR_SCHEDULED_LATE = (1 << 2), + GCM_INSTR_PLACED = (1 << 3), +}; + +struct gcm_state { + nir_function_impl *impl; + nir_instr *instr; + + /* The list of non-pinned instructions. As we do the late scheduling, + * we pull non-pinned instructions out of their blocks and place them in + * this list. This saves us from having linked-list problems when we go + * to put instructions back in their blocks. + */ + struct exec_list instrs; + + struct gcm_block_info *blocks; +}; + +/* Recursively walks the CFG and builds the block_info structure */ +static void +gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state, + unsigned loop_depth) +{ + foreach_list_typed(nir_cf_node, node, node, cf_list) { + switch (node->type) { + case nir_cf_node_block: { + nir_block *block = nir_cf_node_as_block(node); + state->blocks[block->index].loop_depth = loop_depth; + break; + } + case nir_cf_node_if: { + nir_if *if_stmt = nir_cf_node_as_if(node); + gcm_build_block_info(&if_stmt->then_list, state, loop_depth); + gcm_build_block_info(&if_stmt->else_list, state, loop_depth); + break; + } + case nir_cf_node_loop: { + nir_loop *loop = nir_cf_node_as_loop(node); + gcm_build_block_info(&loop->body, state, loop_depth + 1); + break; + } + default: + unreachable("Invalid CF node type"); + } + } +} + +/* Walks the instruction list and marks immovable instructions as pinned + * + * This function also serves to initialize the instr->pass_flags field. + * After this is completed, all instructions' pass_flags fields will be set + * to either GCM_INSTR_PINNED or 0. + */ +static bool +gcm_pin_instructions_block(nir_block *block, void *void_state) +{ + struct gcm_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + switch (instr->type) { + case nir_instr_type_alu: + switch (nir_instr_as_alu(instr)->op) { + case nir_op_fddx: + case nir_op_fddy: + case nir_op_fddx_fine: + case nir_op_fddy_fine: + case nir_op_fddx_coarse: + case nir_op_fddy_coarse: + /* These can only go in uniform control flow; pin them for now */ + instr->pass_flags = GCM_INSTR_PINNED; + break; + + default: + instr->pass_flags = 0; + break; + } + break; + + case nir_instr_type_tex: + switch (nir_instr_as_tex(instr)->op) { + case nir_texop_tex: + case nir_texop_txb: + case nir_texop_lod: + /* These two take implicit derivatives so they need to be pinned */ + instr->pass_flags = GCM_INSTR_PINNED; + break; + + default: + instr->pass_flags = 0; + break; + } + break; + + case nir_instr_type_load_const: + instr->pass_flags = 0; + break; + + case nir_instr_type_intrinsic: { + const nir_intrinsic_info *info = + &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; + + if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && + (info->flags & NIR_INTRINSIC_CAN_REORDER)) { + instr->pass_flags = 0; + } else { + instr->pass_flags = GCM_INSTR_PINNED; + } + break; + } + + case nir_instr_type_jump: + case nir_instr_type_ssa_undef: + case nir_instr_type_phi: + instr->pass_flags = GCM_INSTR_PINNED; + break; + + default: + unreachable("Invalid instruction type in GCM"); + } + + if (!(instr->pass_flags & GCM_INSTR_PINNED)) { + /* If this is an unpinned instruction, go ahead and pull it out of + * the program and put it on the instrs list. This has a couple + * of benifits. First, it makes the scheduling algorithm more + * efficient because we can avoid walking over basic blocks and + * pinned instructions. Second, it keeps us from causing linked + * list confusion when we're trying to put everything in its + * proper place at the end of the pass. + * + * Note that we don't use nir_instr_remove here because that also + * cleans up uses and defs and we want to keep that information. + */ + exec_node_remove(&instr->node); + exec_list_push_tail(&state->instrs, &instr->node); + } + } + + return true; +} + +static void +gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state); + +/** Update an instructions schedule for the given source + * + * This function is called iteratively as we walk the sources of an + * instruction. It ensures that the given source instruction has been + * scheduled and then update this instruction's block if the source + * instruction is lower down the tree. + */ +static bool +gcm_schedule_early_src(nir_src *src, void *void_state) +{ + struct gcm_state *state = void_state; + nir_instr *instr = state->instr; + + assert(src->is_ssa); + + gcm_schedule_early_instr(src->ssa->parent_instr, void_state); + + /* While the index isn't a proper dominance depth, it does have the + * property that if A dominates B then A->index <= B->index. Since we + * know that this instruction must have been dominated by all of its + * sources at some point (even if it's gone through value-numbering), + * all of the sources must lie on the same branch of the dominance tree. + * Therefore, we can just go ahead and just compare indices. + */ + if (instr->block->index < src->ssa->parent_instr->block->index) + instr->block = src->ssa->parent_instr->block; + + /* We need to restore the state instruction because it may have been + * changed through the gcm_schedule_early_instr call above. Since we + * may still be iterating through sources and future calls to + * gcm_schedule_early_src for the same instruction will still need it. + */ + state->instr = instr; + + return true; +} + +/** Schedules an instruction early + * + * This function performs a recursive depth-first search starting at the + * given instruction and proceeding through the sources to schedule + * instructions as early as they can possibly go in the dominance tree. + * The instructions are "scheduled" by updating their instr->block field. + */ +static void +gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state) +{ + if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY) + return; + + instr->pass_flags |= GCM_INSTR_SCHEDULED_EARLY; + + /* Pinned instructions are already scheduled so we don't need to do + * anything. Also, bailing here keeps us from ever following the + * sources of phi nodes which can be back-edges. + */ + if (instr->pass_flags & GCM_INSTR_PINNED) + return; + + /* Start with the instruction at the top. As we iterate over the + * sources, it will get moved down as needed. + */ + instr->block = nir_start_block(state->impl); + state->instr = instr; + + nir_foreach_src(instr, gcm_schedule_early_src, state); +} + +static void +gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state); + +/** Schedules the instruction associated with the given SSA def late + * + * This function works by first walking all of the uses of the given SSA + * definition, ensuring that they are scheduled, and then computing the LCA + * (least common ancestor) of its uses. It then schedules this instruction + * as close to the LCA as possible while trying to stay out of loops. + */ +static bool +gcm_schedule_late_def(nir_ssa_def *def, void *void_state) +{ + struct gcm_state *state = void_state; + + nir_block *lca = NULL; + + nir_foreach_use(def, use_src) { + nir_instr *use_instr = use_src->parent_instr; + + gcm_schedule_late_instr(use_instr, state); + + /* Phi instructions are a bit special. SSA definitions don't have to + * dominate the sources of the phi nodes that use them; instead, they + * have to dominate the predecessor block corresponding to the phi + * source. We handle this by looking through the sources, finding + * any that are usingg this SSA def, and using those blocks instead + * of the one the phi lives in. + */ + if (use_instr->type == nir_instr_type_phi) { + nir_phi_instr *phi = nir_instr_as_phi(use_instr); + + nir_foreach_phi_src(phi, phi_src) { + if (phi_src->src.ssa == def) + lca = nir_dominance_lca(lca, phi_src->pred); + } + } else { + lca = nir_dominance_lca(lca, use_instr->block); + } + } + + nir_foreach_if_use(def, use_src) { + nir_if *if_stmt = use_src->parent_if; + + /* For if statements, we consider the block to be the one immediately + * preceding the if CF node. + */ + nir_block *pred_block = + nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node)); + + lca = nir_dominance_lca(lca, pred_block); + } + + /* Some instructions may never be used. We'll just leave them scheduled + * early and let dead code clean them up. + */ + if (lca == NULL) + return true; + + /* We know have the LCA of all of the uses. If our invariants hold, + * this is dominated by the block that we chose when scheduling early. + * We now walk up the dominance tree and pick the lowest block that is + * as far outside loops as we can get. + */ + nir_block *best = lca; + while (lca != def->parent_instr->block) { + assert(lca); + if (state->blocks[lca->index].loop_depth < + state->blocks[best->index].loop_depth) + best = lca; + lca = lca->imm_dom; + } + def->parent_instr->block = best; + + return true; +} + +/** Schedules an instruction late + * + * This function performs a depth-first search starting at the given + * instruction and proceeding through its uses to schedule instructions as + * late as they can reasonably go in the dominance tree. The instructions + * are "scheduled" by updating their instr->block field. + * + * The name of this function is actually a bit of a misnomer as it doesn't + * schedule them "as late as possible" as the paper implies. Instead, it + * first finds the lates possible place it can schedule the instruction and + * then possibly schedules it earlier than that. The actual location is as + * far down the tree as we can go while trying to stay out of loops. + */ +static void +gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state) +{ + if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE) + return; + + instr->pass_flags |= GCM_INSTR_SCHEDULED_LATE; + + /* Pinned instructions are already scheduled so we don't need to do + * anything. Also, bailing here keeps us from ever following phi nodes + * which can be back-edges. + */ + if (instr->pass_flags & GCM_INSTR_PINNED) + return; + + nir_foreach_ssa_def(instr, gcm_schedule_late_def, state); +} + +static void +gcm_place_instr(nir_instr *instr, struct gcm_state *state); + +static bool +gcm_place_instr_def(nir_ssa_def *def, void *state) +{ + nir_foreach_use(def, use_src) + gcm_place_instr(use_src->parent_instr, state); + + return false; +} + +/** Places an instrution back into the program + * + * The earlier passes of GCM simply choose blocks for each instruction and + * otherwise leave them alone. This pass actually places the instructions + * into their chosen blocks. + * + * To do so, we use a standard post-order depth-first search linearization + * algorithm. We walk over the uses of the given instruction and ensure + * that they are placed and then place this instruction. Because we are + * working on multiple blocks at a time, we keep track of the last inserted + * instruction per-block in the state structure's block_info array. When + * we insert an instruction in a block we insert it before the last + * instruction inserted in that block rather than the last instruction + * inserted globally. + */ +static void +gcm_place_instr(nir_instr *instr, struct gcm_state *state) +{ + if (instr->pass_flags & GCM_INSTR_PLACED) + return; + + instr->pass_flags |= GCM_INSTR_PLACED; + + /* Phi nodes are our once source of back-edges. Since right now we are + * only doing scheduling within blocks, we don't need to worry about + * them since they are always at the top. Just skip them completely. + */ + if (instr->type == nir_instr_type_phi) { + assert(instr->pass_flags & GCM_INSTR_PINNED); + return; + } + + nir_foreach_ssa_def(instr, gcm_place_instr_def, state); + + if (instr->pass_flags & GCM_INSTR_PINNED) { + /* Pinned instructions have an implicit dependence on the pinned + * instructions that come after them in the block. Since the pinned + * instructions will naturally "chain" together, we only need to + * explicitly visit one of them. + */ + for (nir_instr *after = nir_instr_next(instr); + after; + after = nir_instr_next(after)) { + if (after->pass_flags & GCM_INSTR_PINNED) { + gcm_place_instr(after, state); + break; + } + } + } + + struct gcm_block_info *block_info = &state->blocks[instr->block->index]; + if (!(instr->pass_flags & GCM_INSTR_PINNED)) { + exec_node_remove(&instr->node); + + if (block_info->last_instr) { + exec_node_insert_node_before(&block_info->last_instr->node, + &instr->node); + } else { + /* Schedule it at the end of the block */ + nir_instr *jump_instr = nir_block_last_instr(instr->block); + if (jump_instr && jump_instr->type == nir_instr_type_jump) { + exec_node_insert_node_before(&jump_instr->node, &instr->node); + } else { + exec_list_push_tail(&instr->block->instr_list, &instr->node); + } + } + } + + block_info->last_instr = instr; +} + +static void +opt_gcm_impl(nir_function_impl *impl) +{ + struct gcm_state state; + + state.impl = impl; + state.instr = NULL; + exec_list_make_empty(&state.instrs); + state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks); + + nir_metadata_require(impl, nir_metadata_block_index | + nir_metadata_dominance); + + gcm_build_block_info(&impl->body, &state, 0); + nir_foreach_block(impl, gcm_pin_instructions_block, &state); + + foreach_list_typed(nir_instr, instr, node, &state.instrs) + gcm_schedule_early_instr(instr, &state); + + foreach_list_typed(nir_instr, instr, node, &state.instrs) + gcm_schedule_late_instr(instr, &state); + + while (!exec_list_is_empty(&state.instrs)) { + nir_instr *instr = exec_node_data(nir_instr, + state.instrs.tail_pred, node); + gcm_place_instr(instr, &state); + } + + ralloc_free(state.blocks); +} + +void +nir_opt_gcm(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + opt_gcm_impl(function->impl); + } +} diff --git a/src/compiler/nir/nir_opt_global_to_local.c b/src/compiler/nir/nir_opt_global_to_local.c new file mode 100644 index 00000000000..bccb45b6237 --- /dev/null +++ b/src/compiler/nir/nir_opt_global_to_local.c @@ -0,0 +1,102 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +static bool +global_to_local(nir_register *reg) +{ + nir_function_impl *impl = NULL; + + assert(reg->is_global); + + nir_foreach_def(reg, def_dest) { + nir_instr *instr = def_dest->reg.parent_instr; + nir_function_impl *instr_impl = + nir_cf_node_get_function(&instr->block->cf_node); + if (impl != NULL) { + if (impl != instr_impl) + return false; + } else { + impl = instr_impl; + } + } + + nir_foreach_use(reg, use_src) { + nir_instr *instr = use_src->parent_instr; + nir_function_impl *instr_impl = + nir_cf_node_get_function(&instr->block->cf_node); + if (impl != NULL) { + if (impl != instr_impl) + return false; + } else { + impl = instr_impl; + } + } + + nir_foreach_if_use(reg, use_src) { + nir_if *if_stmt = use_src->parent_if; + nir_function_impl *if_impl = nir_cf_node_get_function(&if_stmt->cf_node); + if (impl != NULL) { + if (impl != if_impl) + return false; + } else { + impl = if_impl; + } + } + + if (impl == NULL) { + /* this instruction is never used/defined, delete it */ + nir_reg_remove(reg); + return true; + } + + /* + * if we've gotten to this point, the register is always used/defined in + * the same implementation so we can move it to be local to that + * implementation. + */ + + exec_node_remove(®->node); + exec_list_push_tail(&impl->registers, ®->node); + reg->index = impl->reg_alloc++; + reg->is_global = false; + return true; +} + +bool +nir_opt_global_to_local(nir_shader *shader) +{ + bool progress = false; + + foreach_list_typed_safe(nir_register, reg, node, &shader->registers) { + if (global_to_local(reg)) + progress = true; + } + + return progress; +} diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c new file mode 100644 index 00000000000..0fc658df861 --- /dev/null +++ b/src/compiler/nir/nir_opt_peephole_select.c @@ -0,0 +1,256 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" +#include "nir_control_flow.h" + +/* + * Implements a small peephole optimization that looks for + * + * if (cond) { + * + * } else { + * + * } + * phi + * ... + * phi + * + * and replaces it with a series of selects. It can also handle the case + * where, instead of being empty, the if may contain some move operations + * whose only use is one of the following phi nodes. This happens all the + * time when the SSA form comes from a conditional assignment with a + * swizzle. + */ + +struct peephole_select_state { + void *mem_ctx; + bool progress; +}; + +static bool +block_check_for_allowed_instrs(nir_block *block) +{ + nir_foreach_instr(block, instr) { + switch (instr->type) { + case nir_instr_type_intrinsic: { + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + switch (intrin->intrinsic) { + case nir_intrinsic_load_var: + switch (intrin->variables[0]->var->data.mode) { + case nir_var_shader_in: + case nir_var_uniform: + break; + + default: + return false; + } + break; + + default: + return false; + } + + break; + } + + case nir_instr_type_load_const: + break; + + case nir_instr_type_alu: { + nir_alu_instr *mov = nir_instr_as_alu(instr); + switch (mov->op) { + case nir_op_fmov: + case nir_op_imov: + case nir_op_fneg: + case nir_op_ineg: + case nir_op_fabs: + case nir_op_iabs: + case nir_op_vec2: + case nir_op_vec3: + case nir_op_vec4: + /* It must be a move-like operation. */ + break; + default: + return false; + } + + /* Can't handle saturate */ + if (mov->dest.saturate) + return false; + + /* It must be SSA */ + if (!mov->dest.dest.is_ssa) + return false; + + /* It cannot have any if-uses */ + if (!list_empty(&mov->dest.dest.ssa.if_uses)) + return false; + + /* The only uses of this definition must be phi's in the successor */ + nir_foreach_use(&mov->dest.dest.ssa, use) { + if (use->parent_instr->type != nir_instr_type_phi || + use->parent_instr->block != block->successors[0]) + return false; + } + break; + } + + default: + return false; + } + } + + return true; +} + +static bool +nir_opt_peephole_select_block(nir_block *block, void *void_state) +{ + struct peephole_select_state *state = void_state; + + /* If the block is empty, then it certainly doesn't have any phi nodes, + * so we can skip it. This also ensures that we do an early skip on the + * end block of the function which isn't actually attached to the CFG. + */ + if (exec_list_is_empty(&block->instr_list)) + return true; + + if (nir_cf_node_is_first(&block->cf_node)) + return true; + + nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node); + if (prev_node->type != nir_cf_node_if) + return true; + + nir_if *if_stmt = nir_cf_node_as_if(prev_node); + nir_cf_node *then_node = nir_if_first_then_node(if_stmt); + nir_cf_node *else_node = nir_if_first_else_node(if_stmt); + + /* We can only have one block in each side ... */ + if (nir_if_last_then_node(if_stmt) != then_node || + nir_if_last_else_node(if_stmt) != else_node) + return true; + + nir_block *then_block = nir_cf_node_as_block(then_node); + nir_block *else_block = nir_cf_node_as_block(else_node); + + /* ... and those blocks must only contain "allowed" instructions. */ + if (!block_check_for_allowed_instrs(then_block) || + !block_check_for_allowed_instrs(else_block)) + return true; + + /* At this point, we know that the previous CFG node is an if-then + * statement containing only moves to phi nodes in this block. We can + * just remove that entire CF node and replace all of the phi nodes with + * selects. + */ + + nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node)); + assert(prev_block->cf_node.type == nir_cf_node_block); + + /* First, we move the remaining instructions from the blocks to the + * block before. We have already guaranteed that this is safe by + * calling block_check_for_allowed_instrs() + */ + nir_foreach_instr_safe(then_block, instr) { + exec_node_remove(&instr->node); + instr->block = prev_block; + exec_list_push_tail(&prev_block->instr_list, &instr->node); + } + + nir_foreach_instr_safe(else_block, instr) { + exec_node_remove(&instr->node); + instr->block = prev_block; + exec_list_push_tail(&prev_block->instr_list, &instr->node); + } + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel); + nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel); + /* Splat the condition to all channels */ + memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle); + + assert(exec_list_length(&phi->srcs) == 2); + nir_foreach_phi_src(phi, src) { + assert(src->pred == then_block || src->pred == else_block); + assert(src->src.is_ssa); + + unsigned idx = src->pred == then_block ? 1 : 2; + nir_src_copy(&sel->src[idx].src, &src->src, sel); + } + + nir_ssa_dest_init(&sel->instr, &sel->dest.dest, + phi->dest.ssa.num_components, phi->dest.ssa.name); + sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; + + nir_ssa_def_rewrite_uses(&phi->dest.ssa, + nir_src_for_ssa(&sel->dest.dest.ssa)); + + nir_instr_insert_before(&phi->instr, &sel->instr); + nir_instr_remove(&phi->instr); + } + + nir_cf_node_remove(&if_stmt->cf_node); + state->progress = true; + + return true; +} + +static bool +nir_opt_peephole_select_impl(nir_function_impl *impl) +{ + struct peephole_select_state state; + + state.mem_ctx = ralloc_parent(impl); + state.progress = false; + + nir_foreach_block(impl, nir_opt_peephole_select_block, &state); + + if (state.progress) + nir_metadata_preserve(impl, nir_metadata_none); + + return state.progress; +} + +bool +nir_opt_peephole_select(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress |= nir_opt_peephole_select_impl(function->impl); + } + + return progress; +} diff --git a/src/compiler/nir/nir_opt_remove_phis.c b/src/compiler/nir/nir_opt_remove_phis.c new file mode 100644 index 00000000000..646183707bd --- /dev/null +++ b/src/compiler/nir/nir_opt_remove_phis.c @@ -0,0 +1,130 @@ +/* + * Copyright © 2015 Connor Abbott + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +/* + * This is a pass for removing phi nodes that look like: + * a = phi(b, b, b, ...) + * + * Note that we can't ignore undef sources here, or else we may create a + * situation where the definition of b isn't dominated by its uses. We're + * allowed to do this since the definition of b must dominate all of the + * phi node's predecessors, which means it must dominate the phi node as well + * as all of the phi node's uses. In essence, the phi node acts as a copy + * instruction. b can't be another phi node in the same block, since the only + * time when phi nodes can source other phi nodes defined in the same block is + * at the loop header, and in that case one of the sources of the phi has to + * be from before the loop and that source can't be b. + */ + +static bool +remove_phis_block(nir_block *block, void *state) +{ + bool *progress = state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi = nir_instr_as_phi(instr); + + nir_ssa_def *def = NULL; + bool srcs_same = true; + + nir_foreach_phi_src(phi, src) { + assert(src->src.is_ssa); + + /* For phi nodes at the beginning of loops, we may encounter some + * sources from backedges that point back to the destination of the + * same phi, i.e. something like: + * + * a = phi(a, b, ...) + * + * We can safely ignore these sources, since if all of the normal + * sources point to the same definition, then that definition must + * still dominate the phi node, and the phi will still always take + * the value of that definition. + */ + if (src->src.ssa == &phi->dest.ssa) + continue; + + if (def == NULL) { + def = src->src.ssa; + } else { + if (src->src.ssa != def) { + srcs_same = false; + break; + } + } + } + + if (!srcs_same) + continue; + + /* We must have found at least one definition, since there must be at + * least one forward edge. + */ + assert(def != NULL); + + assert(phi->dest.is_ssa); + nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def)); + nir_instr_remove(instr); + + *progress = true; + } + + return true; +} + +static bool +remove_phis_impl(nir_function_impl *impl) +{ + bool progress = false; + + nir_foreach_block(impl, remove_phis_block, &progress); + + if (progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + return progress; +} + +bool +nir_opt_remove_phis(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) + if (function->impl) + progress = remove_phis_impl(function->impl) || progress; + + return progress; +} + diff --git a/src/compiler/nir/nir_opt_undef.c b/src/compiler/nir/nir_opt_undef.c new file mode 100644 index 00000000000..374564d34c5 --- /dev/null +++ b/src/compiler/nir/nir_opt_undef.c @@ -0,0 +1,104 @@ +/* + * Copyright © 2015 Broadcom + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +/** @file nir_opt_undef.c + * + * Handles optimization of operations involving ssa_undef. For now, we just + * make sure that csels between undef and some other value just give the other + * value (on the assumption that the condition's going to be choosing the + * defined value). This reduces work after if flattening when each side of + * the if is defining a variable. + * + * Some day, we may find some use for making other operations consuming an + * undef arg output undef, but I don't know of any cases currently. + */ + +static bool +opt_undef_alu(nir_alu_instr *instr) +{ + if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel) + return false; + + assert(instr->dest.dest.is_ssa); + + for (int i = 1; i <= 2; i++) { + if (!instr->src[i].src.is_ssa) + continue; + + nir_instr *parent = instr->src[i].src.ssa->parent_instr; + if (parent->type != nir_instr_type_ssa_undef) + continue; + + /* We can't just use nir_alu_src_copy, because we need the def/use + * updated. + */ + nir_instr_rewrite_src(&instr->instr, &instr->src[0].src, + instr->src[i == 1 ? 2 : 1].src); + nir_alu_src_copy(&instr->src[0], &instr->src[i == 1 ? 2 : 1], + ralloc_parent(instr)); + + nir_src empty_src; + memset(&empty_src, 0, sizeof(empty_src)); + nir_instr_rewrite_src(&instr->instr, &instr->src[1].src, empty_src); + nir_instr_rewrite_src(&instr->instr, &instr->src[2].src, empty_src); + instr->op = nir_op_imov; + + return true; + } + + return false; +} + +static bool +opt_undef_block(nir_block *block, void *data) +{ + bool *progress = data; + + nir_foreach_instr_safe(block, instr) { + if (instr->type == nir_instr_type_alu) + if (opt_undef_alu(nir_instr_as_alu(instr))) + (*progress) = true; + } + + return true; +} + +bool +nir_opt_undef(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) { + nir_foreach_block(function->impl, opt_undef_block, &progress); + if (progress) + nir_metadata_preserve(function->impl, + nir_metadata_block_index | + nir_metadata_dominance); + } + } + + return progress; +} diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c new file mode 100644 index 00000000000..48ecb48a620 --- /dev/null +++ b/src/compiler/nir/nir_print.c @@ -0,0 +1,1069 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include "compiler/shader_enums.h" +#include +#include + +static void +print_tabs(unsigned num_tabs, FILE *fp) +{ + for (unsigned i = 0; i < num_tabs; i++) + fprintf(fp, "\t"); +} + +typedef struct { + FILE *fp; + nir_shader *shader; + /** map from nir_variable -> printable name */ + struct hash_table *ht; + + /** set of names used so far for nir_variables */ + struct set *syms; + + /* an index used to make new non-conflicting names */ + unsigned index; +} print_state; + +static void +print_register(nir_register *reg, print_state *state) +{ + FILE *fp = state->fp; + if (reg->name != NULL) + fprintf(fp, "/* %s */ ", reg->name); + if (reg->is_global) + fprintf(fp, "gr%u", reg->index); + else + fprintf(fp, "r%u", reg->index); +} + +static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" }; + +static void +print_register_decl(nir_register *reg, print_state *state) +{ + FILE *fp = state->fp; + fprintf(fp, "decl_reg %s ", sizes[reg->num_components]); + if (reg->is_packed) + fprintf(fp, "(packed) "); + print_register(reg, state); + if (reg->num_array_elems != 0) + fprintf(fp, "[%u]", reg->num_array_elems); + fprintf(fp, "\n"); +} + +static void +print_ssa_def(nir_ssa_def *def, print_state *state) +{ + FILE *fp = state->fp; + if (def->name != NULL) + fprintf(fp, "/* %s */ ", def->name); + fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index); +} + +static void +print_ssa_use(nir_ssa_def *def, print_state *state) +{ + FILE *fp = state->fp; + if (def->name != NULL) + fprintf(fp, "/* %s */ ", def->name); + fprintf(fp, "ssa_%u", def->index); +} + +static void print_src(nir_src *src, print_state *state); + +static void +print_reg_src(nir_reg_src *src, print_state *state) +{ + FILE *fp = state->fp; + print_register(src->reg, state); + if (src->reg->num_array_elems != 0) { + fprintf(fp, "[%u", src->base_offset); + if (src->indirect != NULL) { + fprintf(fp, " + "); + print_src(src->indirect, state); + } + fprintf(fp, "]"); + } +} + +static void +print_reg_dest(nir_reg_dest *dest, print_state *state) +{ + FILE *fp = state->fp; + print_register(dest->reg, state); + if (dest->reg->num_array_elems != 0) { + fprintf(fp, "[%u", dest->base_offset); + if (dest->indirect != NULL) { + fprintf(fp, " + "); + print_src(dest->indirect, state); + } + fprintf(fp, "]"); + } +} + +static void +print_src(nir_src *src, print_state *state) +{ + if (src->is_ssa) + print_ssa_use(src->ssa, state); + else + print_reg_src(&src->reg, state); +} + +static void +print_dest(nir_dest *dest, print_state *state) +{ + if (dest->is_ssa) + print_ssa_def(&dest->ssa, state); + else + print_reg_dest(&dest->reg, state); +} + +static void +print_alu_src(nir_alu_instr *instr, unsigned src, print_state *state) +{ + FILE *fp = state->fp; + + if (instr->src[src].negate) + fprintf(fp, "-"); + if (instr->src[src].abs) + fprintf(fp, "abs("); + + print_src(&instr->src[src].src, state); + + bool print_swizzle = false; + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; + + if (instr->src[src].swizzle[i] != i) { + print_swizzle = true; + break; + } + } + + if (print_swizzle) { + fprintf(fp, "."); + for (unsigned i = 0; i < 4; i++) { + if (!nir_alu_instr_channel_used(instr, src, i)) + continue; + + fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]); + } + } + + if (instr->src[src].abs) + fprintf(fp, ")"); +} + +static void +print_alu_dest(nir_alu_dest *dest, print_state *state) +{ + FILE *fp = state->fp; + /* we're going to print the saturate modifier later, after the opcode */ + + print_dest(&dest->dest, state); + + if (!dest->dest.is_ssa && + dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) { + fprintf(fp, "."); + for (unsigned i = 0; i < 4; i++) + if ((dest->write_mask >> i) & 1) + fprintf(fp, "%c", "xyzw"[i]); + } +} + +static void +print_alu_instr(nir_alu_instr *instr, print_state *state) +{ + FILE *fp = state->fp; + + print_alu_dest(&instr->dest, state); + + fprintf(fp, " = %s", nir_op_infos[instr->op].name); + if (instr->dest.saturate) + fprintf(fp, ".sat"); + fprintf(fp, " "); + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_alu_src(instr, i, state); + } +} + +static void +print_constant(nir_constant *c, const struct glsl_type *type, print_state *state) +{ + FILE *fp = state->fp; + unsigned total_elems = glsl_get_components(type); + unsigned i; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + for (i = 0; i < total_elems; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "0x%08x", c->value.u[i]); + } + break; + + case GLSL_TYPE_FLOAT: + for (i = 0; i < total_elems; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "%f", c->value.f[i]); + } + break; + + case GLSL_TYPE_STRUCT: + for (i = 0; i < c->num_elements; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "{ "); + print_constant(c->elements[i], glsl_get_struct_field(type, i), state); + fprintf(fp, " }"); + } + break; + + case GLSL_TYPE_ARRAY: + for (i = 0; i < c->num_elements; i++) { + if (i > 0) fprintf(fp, ", "); + fprintf(fp, "{ "); + print_constant(c->elements[i], glsl_get_array_element(type), state); + fprintf(fp, " }"); + } + break; + + default: + unreachable("not reached"); + } +} + +static void +print_var_decl(nir_variable *var, print_state *state) +{ + FILE *fp = state->fp; + + fprintf(fp, "decl_var "); + + const char *const cent = (var->data.centroid) ? "centroid " : ""; + const char *const samp = (var->data.sample) ? "sample " : ""; + const char *const patch = (var->data.patch) ? "patch " : ""; + const char *const inv = (var->data.invariant) ? "invariant " : ""; + const char *const mode[] = { "shader_in ", "shader_out ", "", "", + "uniform ", "shader_storage", "system " }; + + fprintf(fp, "%s%s%s%s%s%s ", + cent, samp, patch, inv, mode[var->data.mode], + glsl_interp_qualifier_name(var->data.interpolation)); + + glsl_print_type(var->type, fp); + + struct set_entry *entry = NULL; + if (state->syms) + entry = _mesa_set_search(state->syms, var->name); + + char *name; + + if (entry != NULL) { + /* we have a collision with another name, append an @ + a unique index */ + name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++); + } else { + name = var->name; + } + + fprintf(fp, " %s", name); + + if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out || + var->data.mode == nir_var_uniform || + var->data.mode == nir_var_shader_storage) { + const char *loc = NULL; + char buf[4]; + + switch (state->shader->stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == nir_var_shader_in) + loc = gl_vert_attrib_name(var->data.location); + else if (var->data.mode == nir_var_shader_out) + loc = gl_varying_slot_name(var->data.location); + break; + case MESA_SHADER_GEOMETRY: + if ((var->data.mode == nir_var_shader_in) || + (var->data.mode == nir_var_shader_out)) + loc = gl_varying_slot_name(var->data.location); + break; + case MESA_SHADER_FRAGMENT: + if (var->data.mode == nir_var_shader_in) + loc = gl_varying_slot_name(var->data.location); + else if (var->data.mode == nir_var_shader_out) + loc = gl_frag_result_name(var->data.location); + break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_COMPUTE: + default: + /* TODO */ + break; + } + + if (!loc) { + snprintf(buf, sizeof(buf), "%u", var->data.location); + loc = buf; + } + + fprintf(fp, " (%s, %u)", loc, var->data.driver_location); + } + + if (var->constant_initializer) { + fprintf(fp, " = { "); + print_constant(var->constant_initializer, var->type, state); + fprintf(fp, " }"); + } + + fprintf(fp, "\n"); + + if (state->syms) { + _mesa_set_add(state->syms, name); + _mesa_hash_table_insert(state->ht, var, name); + } +} + +static void +print_var(nir_variable *var, print_state *state) +{ + FILE *fp = state->fp; + const char *name; + if (state->ht) { + struct hash_entry *entry = _mesa_hash_table_search(state->ht, var); + + assert(entry != NULL); + name = entry->data; + } else { + name = var->name; + } + + fprintf(fp, "%s", name); +} + +static void +print_deref_var(nir_deref_var *deref, print_state *state) +{ + print_var(deref->var, state); +} + +static void +print_deref_array(nir_deref_array *deref, print_state *state) +{ + FILE *fp = state->fp; + fprintf(fp, "["); + switch (deref->deref_array_type) { + case nir_deref_array_type_direct: + fprintf(fp, "%u", deref->base_offset); + break; + case nir_deref_array_type_indirect: + if (deref->base_offset != 0) + fprintf(fp, "%u + ", deref->base_offset); + print_src(&deref->indirect, state); + break; + case nir_deref_array_type_wildcard: + fprintf(fp, "*"); + break; + } + fprintf(fp, "]"); +} + +static void +print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type, + print_state *state) +{ + FILE *fp = state->fp; + fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index)); +} + +static void +print_deref(nir_deref_var *deref, print_state *state) +{ + nir_deref *tail = &deref->deref; + nir_deref *pretail = NULL; + while (tail != NULL) { + switch (tail->deref_type) { + case nir_deref_type_var: + assert(pretail == NULL); + assert(tail == &deref->deref); + print_deref_var(deref, state); + break; + + case nir_deref_type_array: + assert(pretail != NULL); + print_deref_array(nir_deref_as_array(tail), state); + break; + + case nir_deref_type_struct: + assert(pretail != NULL); + print_deref_struct(nir_deref_as_struct(tail), + pretail->type, state); + break; + + default: + unreachable("Invalid deref type"); + } + + pretail = tail; + tail = pretail->child; + } +} + +static void +print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) +{ + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + FILE *fp = state->fp; + + if (nir_intrinsic_infos[instr->intrinsic].has_dest) { + print_dest(&instr->dest, state); + fprintf(fp, " = "); + } + + fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name); + + for (unsigned i = 0; i < num_srcs; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_src(&instr->src[i], state); + } + + fprintf(fp, ") ("); + + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + + for (unsigned i = 0; i < num_vars; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_deref(instr->variables[i], state); + } + + fprintf(fp, ") ("); + + unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices; + + for (unsigned i = 0; i < num_indices; i++) { + if (i != 0) + fprintf(fp, ", "); + + fprintf(fp, "%d", instr->const_index[i]); + } + + fprintf(fp, ")"); + + if (!state->shader) + return; + + struct exec_list *var_list = NULL; + + switch (instr->intrinsic) { + case nir_intrinsic_load_uniform: + var_list = &state->shader->uniforms; + break; + case nir_intrinsic_load_input: + case nir_intrinsic_load_per_vertex_input: + var_list = &state->shader->inputs; + break; + case nir_intrinsic_load_output: + case nir_intrinsic_store_output: + case nir_intrinsic_store_per_vertex_output: + var_list = &state->shader->outputs; + break; + default: + return; + } + + nir_foreach_variable(var, var_list) { + if ((var->data.driver_location == instr->const_index[0]) && + var->name) { + fprintf(fp, "\t/* %s */", var->name); + break; + } + } +} + +static void +print_tex_instr(nir_tex_instr *instr, print_state *state) +{ + FILE *fp = state->fp; + + print_dest(&instr->dest, state); + + fprintf(fp, " = "); + + switch (instr->op) { + case nir_texop_tex: + fprintf(fp, "tex "); + break; + case nir_texop_txb: + fprintf(fp, "txb "); + break; + case nir_texop_txl: + fprintf(fp, "txl "); + break; + case nir_texop_txd: + fprintf(fp, "txd "); + break; + case nir_texop_txf: + fprintf(fp, "txf "); + break; + case nir_texop_txf_ms: + fprintf(fp, "txf_ms "); + break; + case nir_texop_txs: + fprintf(fp, "txs "); + break; + case nir_texop_lod: + fprintf(fp, "lod "); + break; + case nir_texop_tg4: + fprintf(fp, "tg4 "); + break; + case nir_texop_query_levels: + fprintf(fp, "query_levels "); + break; + case nir_texop_texture_samples: + fprintf(fp, "texture_samples "); + break; + case nir_texop_samples_identical: + fprintf(fp, "samples_identical "); + break; + default: + unreachable("Invalid texture operation"); + break; + } + + for (unsigned i = 0; i < instr->num_srcs; i++) { + print_src(&instr->src[i].src, state); + + fprintf(fp, " "); + + switch(instr->src[i].src_type) { + case nir_tex_src_coord: + fprintf(fp, "(coord)"); + break; + case nir_tex_src_projector: + fprintf(fp, "(projector)"); + break; + case nir_tex_src_comparitor: + fprintf(fp, "(comparitor)"); + break; + case nir_tex_src_offset: + fprintf(fp, "(offset)"); + break; + case nir_tex_src_bias: + fprintf(fp, "(bias)"); + break; + case nir_tex_src_lod: + fprintf(fp, "(lod)"); + break; + case nir_tex_src_ms_index: + fprintf(fp, "(ms_index)"); + break; + case nir_tex_src_ddx: + fprintf(fp, "(ddx)"); + break; + case nir_tex_src_ddy: + fprintf(fp, "(ddy)"); + break; + case nir_tex_src_sampler_offset: + fprintf(fp, "(sampler_offset)"); + break; + + default: + unreachable("Invalid texture source type"); + break; + } + + fprintf(fp, ", "); + } + + bool has_nonzero_offset = false; + for (unsigned i = 0; i < 4; i++) { + if (instr->const_offset[i] != 0) { + has_nonzero_offset = true; + break; + } + } + + if (has_nonzero_offset) { + fprintf(fp, "[%i %i %i %i] (offset), ", + instr->const_offset[0], instr->const_offset[1], + instr->const_offset[2], instr->const_offset[3]); + } + + if (instr->op == nir_texop_tg4) { + fprintf(fp, "%u (gather_component), ", instr->component); + } + + if (instr->sampler) { + print_deref(instr->sampler, state); + } else { + fprintf(fp, "%u", instr->sampler_index); + } + + fprintf(fp, " (sampler)"); +} + +static void +print_call_instr(nir_call_instr *instr, print_state *state) +{ + FILE *fp = state->fp; + + fprintf(fp, "call %s ", instr->callee->name); + + for (unsigned i = 0; i < instr->num_params; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_deref(instr->params[i], state); + } + + if (instr->return_deref != NULL) { + if (instr->num_params != 0) + fprintf(fp, ", "); + fprintf(fp, "returning "); + print_deref(instr->return_deref, state); + } +} + +static void +print_load_const_instr(nir_load_const_instr *instr, print_state *state) +{ + FILE *fp = state->fp; + + print_ssa_def(&instr->def, state); + + fprintf(fp, " = load_const ("); + + for (unsigned i = 0; i < instr->def.num_components; i++) { + if (i != 0) + fprintf(fp, ", "); + + /* + * we don't really know the type of the constant (if it will be used as a + * float or an int), so just print the raw constant in hex for fidelity + * and then print the float in a comment for readability. + */ + + fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]); + } + + fprintf(fp, ")"); +} + +static void +print_jump_instr(nir_jump_instr *instr, print_state *state) +{ + FILE *fp = state->fp; + + switch (instr->type) { + case nir_jump_break: + fprintf(fp, "break"); + break; + + case nir_jump_continue: + fprintf(fp, "continue"); + break; + + case nir_jump_return: + fprintf(fp, "return"); + break; + } +} + +static void +print_ssa_undef_instr(nir_ssa_undef_instr* instr, print_state *state) +{ + FILE *fp = state->fp; + print_ssa_def(&instr->def, state); + fprintf(fp, " = undefined"); +} + +static void +print_phi_instr(nir_phi_instr *instr, print_state *state) +{ + FILE *fp = state->fp; + print_dest(&instr->dest, state); + fprintf(fp, " = phi "); + nir_foreach_phi_src(instr, src) { + if (&src->node != exec_list_get_head(&instr->srcs)) + fprintf(fp, ", "); + + fprintf(fp, "block_%u: ", src->pred->index); + print_src(&src->src, state); + } +} + +static void +print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state) +{ + FILE *fp = state->fp; + nir_foreach_parallel_copy_entry(instr, entry) { + if (&entry->node != exec_list_get_head(&instr->entries)) + fprintf(fp, "; "); + + print_dest(&entry->dest, state); + fprintf(fp, " = "); + print_src(&entry->src, state); + } +} + +static void +print_instr(const nir_instr *instr, print_state *state, unsigned tabs) +{ + FILE *fp = state->fp; + print_tabs(tabs, fp); + + switch (instr->type) { + case nir_instr_type_alu: + print_alu_instr(nir_instr_as_alu(instr), state); + break; + + case nir_instr_type_call: + print_call_instr(nir_instr_as_call(instr), state); + break; + + case nir_instr_type_intrinsic: + print_intrinsic_instr(nir_instr_as_intrinsic(instr), state); + break; + + case nir_instr_type_tex: + print_tex_instr(nir_instr_as_tex(instr), state); + break; + + case nir_instr_type_load_const: + print_load_const_instr(nir_instr_as_load_const(instr), state); + break; + + case nir_instr_type_jump: + print_jump_instr(nir_instr_as_jump(instr), state); + break; + + case nir_instr_type_ssa_undef: + print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state); + break; + + case nir_instr_type_phi: + print_phi_instr(nir_instr_as_phi(instr), state); + break; + + case nir_instr_type_parallel_copy: + print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), state); + break; + + default: + unreachable("Invalid instruction type"); + break; + } +} + +static int +compare_block_index(const void *p1, const void *p2) +{ + const nir_block *block1 = *((const nir_block **) p1); + const nir_block *block2 = *((const nir_block **) p2); + + return (int) block1->index - (int) block2->index; +} + +static void print_cf_node(nir_cf_node *node, print_state *state, + unsigned tabs); + +static void +print_block(nir_block *block, print_state *state, unsigned tabs) +{ + FILE *fp = state->fp; + + print_tabs(tabs, fp); + fprintf(fp, "block block_%u:\n", block->index); + + /* sort the predecessors by index so we consistently print the same thing */ + + nir_block **preds = + malloc(block->predecessors->entries * sizeof(nir_block *)); + + struct set_entry *entry; + unsigned i = 0; + set_foreach(block->predecessors, entry) { + preds[i++] = (nir_block *) entry->key; + } + + qsort(preds, block->predecessors->entries, sizeof(nir_block *), + compare_block_index); + + print_tabs(tabs, fp); + fprintf(fp, "/* preds: "); + for (unsigned i = 0; i < block->predecessors->entries; i++) { + fprintf(fp, "block_%u ", preds[i]->index); + } + fprintf(fp, "*/\n"); + + free(preds); + + nir_foreach_instr(block, instr) { + print_instr(instr, state, tabs); + fprintf(fp, "\n"); + } + + print_tabs(tabs, fp); + fprintf(fp, "/* succs: "); + for (unsigned i = 0; i < 2; i++) + if (block->successors[i]) { + fprintf(fp, "block_%u ", block->successors[i]->index); + } + fprintf(fp, "*/\n"); +} + +static void +print_if(nir_if *if_stmt, print_state *state, unsigned tabs) +{ + FILE *fp = state->fp; + + print_tabs(tabs, fp); + fprintf(fp, "if "); + print_src(&if_stmt->condition, state); + fprintf(fp, " {\n"); + foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) { + print_cf_node(node, state, tabs + 1); + } + print_tabs(tabs, fp); + fprintf(fp, "} else {\n"); + foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) { + print_cf_node(node, state, tabs + 1); + } + print_tabs(tabs, fp); + fprintf(fp, "}\n"); +} + +static void +print_loop(nir_loop *loop, print_state *state, unsigned tabs) +{ + FILE *fp = state->fp; + + print_tabs(tabs, fp); + fprintf(fp, "loop {\n"); + foreach_list_typed(nir_cf_node, node, node, &loop->body) { + print_cf_node(node, state, tabs + 1); + } + print_tabs(tabs, fp); + fprintf(fp, "}\n"); +} + +static void +print_cf_node(nir_cf_node *node, print_state *state, unsigned int tabs) +{ + switch (node->type) { + case nir_cf_node_block: + print_block(nir_cf_node_as_block(node), state, tabs); + break; + + case nir_cf_node_if: + print_if(nir_cf_node_as_if(node), state, tabs); + break; + + case nir_cf_node_loop: + print_loop(nir_cf_node_as_loop(node), state, tabs); + break; + + default: + unreachable("Invalid CFG node type"); + } +} + +static void +print_function_impl(nir_function_impl *impl, print_state *state) +{ + FILE *fp = state->fp; + + fprintf(fp, "\nimpl %s ", impl->function->name); + + for (unsigned i = 0; i < impl->num_params; i++) { + if (i != 0) + fprintf(fp, ", "); + + print_var(impl->params[i], state); + } + + if (impl->return_var != NULL) { + if (impl->num_params != 0) + fprintf(fp, ", "); + fprintf(fp, "returning "); + print_var(impl->return_var, state); + } + + fprintf(fp, "{\n"); + + nir_foreach_variable(var, &impl->locals) { + fprintf(fp, "\t"); + print_var_decl(var, state); + } + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + fprintf(fp, "\t"); + print_register_decl(reg, state); + } + + nir_index_blocks(impl); + + foreach_list_typed(nir_cf_node, node, node, &impl->body) { + print_cf_node(node, state, 1); + } + + fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index); +} + +static void +print_function(nir_function *function, print_state *state) +{ + FILE *fp = state->fp; + + fprintf(fp, "decl_function %s ", function->name); + + for (unsigned i = 0; i < function->num_params; i++) { + if (i != 0) + fprintf(fp, ", "); + + switch (function->params[i].param_type) { + case nir_parameter_in: + fprintf(fp, "in "); + break; + case nir_parameter_out: + fprintf(fp, "out "); + break; + case nir_parameter_inout: + fprintf(fp, "inout "); + break; + default: + unreachable("Invalid parameter type"); + } + + glsl_print_type(function->params[i].type, fp); + } + + if (function->return_type != NULL) { + if (function->num_params != 0) + fprintf(fp, ", "); + fprintf(fp, "returning "); + glsl_print_type(function->return_type, fp); + } + + fprintf(fp, "\n"); + + if (function->impl != NULL) { + print_function_impl(function->impl, state); + return; + } +} + +static void +init_print_state(print_state *state, nir_shader *shader, FILE *fp) +{ + state->fp = fp; + state->shader = shader; + state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->syms = _mesa_set_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal); + state->index = 0; +} + +static void +destroy_print_state(print_state *state) +{ + _mesa_hash_table_destroy(state->ht, NULL); + _mesa_set_destroy(state->syms, NULL); +} + +void +nir_print_shader(nir_shader *shader, FILE *fp) +{ + print_state state; + init_print_state(&state, shader, fp); + + fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage)); + + if (shader->info.name) + fprintf(fp, "name: %s\n", shader->info.name); + + if (shader->info.label) + fprintf(fp, "label: %s\n", shader->info.label); + + fprintf(fp, "inputs: %u\n", shader->num_inputs); + fprintf(fp, "outputs: %u\n", shader->num_outputs); + fprintf(fp, "uniforms: %u\n", shader->num_uniforms); + + nir_foreach_variable(var, &shader->uniforms) { + print_var_decl(var, &state); + } + + nir_foreach_variable(var, &shader->inputs) { + print_var_decl(var, &state); + } + + nir_foreach_variable(var, &shader->outputs) { + print_var_decl(var, &state); + } + + nir_foreach_variable(var, &shader->globals) { + print_var_decl(var, &state); + } + + nir_foreach_variable(var, &shader->system_values) { + print_var_decl(var, &state); + } + + foreach_list_typed(nir_register, reg, node, &shader->registers) { + print_register_decl(reg, &state); + } + + foreach_list_typed(nir_function, func, node, &shader->functions) { + print_function(func, &state); + } + + destroy_print_state(&state); +} + +void +nir_print_instr(const nir_instr *instr, FILE *fp) +{ + print_state state = { + .fp = fp, + }; + print_instr(instr, &state, 0); + +} diff --git a/src/compiler/nir/nir_remove_dead_variables.c b/src/compiler/nir/nir_remove_dead_variables.c new file mode 100644 index 00000000000..db754e56b1c --- /dev/null +++ b/src/compiler/nir/nir_remove_dead_variables.c @@ -0,0 +1,141 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" + +static void +add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live) +{ + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + nir_variable *var = instr->variables[i]->var; + _mesa_set_add(live, var); + } +} + +static void +add_var_use_call(nir_call_instr *instr, struct set *live) +{ + if (instr->return_deref != NULL) { + nir_variable *var = instr->return_deref->var; + _mesa_set_add(live, var); + } + + for (unsigned i = 0; i < instr->num_params; i++) { + nir_variable *var = instr->params[i]->var; + _mesa_set_add(live, var); + } +} + +static void +add_var_use_tex(nir_tex_instr *instr, struct set *live) +{ + if (instr->sampler != NULL) { + nir_variable *var = instr->sampler->var; + _mesa_set_add(live, var); + } +} + +static bool +add_var_use_block(nir_block *block, void *state) +{ + struct set *live = state; + + nir_foreach_instr(block, instr) { + switch(instr->type) { + case nir_instr_type_intrinsic: + add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live); + break; + + case nir_instr_type_call: + add_var_use_call(nir_instr_as_call(instr), live); + break; + + case nir_instr_type_tex: + add_var_use_tex(nir_instr_as_tex(instr), live); + break; + + default: + break; + } + } + + return true; +} + +static void +add_var_use_shader(nir_shader *shader, struct set *live) +{ + nir_foreach_function(shader, function) { + if (function->impl) { + nir_foreach_block(function->impl, add_var_use_block, live); + } + } +} + +static bool +remove_dead_vars(struct exec_list *var_list, struct set *live) +{ + bool progress = false; + + foreach_list_typed_safe(nir_variable, var, node, var_list) { + struct set_entry *entry = _mesa_set_search(live, var); + if (entry == NULL) { + exec_node_remove(&var->node); + ralloc_free(var); + progress = true; + } + } + + return progress; +} + +bool +nir_remove_dead_variables(nir_shader *shader) +{ + bool progress = false; + struct set *live = + _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + + add_var_use_shader(shader, live); + + progress = remove_dead_vars(&shader->globals, live) || progress; + + nir_foreach_function(shader, function) { + if (function->impl) { + if (remove_dead_vars(&function->impl->locals, live)) { + nir_metadata_preserve(function->impl, nir_metadata_block_index | + nir_metadata_dominance | + nir_metadata_live_ssa_defs); + progress = true; + } + } + } + + _mesa_set_destroy(live, NULL); + return progress; +} diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c new file mode 100644 index 00000000000..56d7e8162f3 --- /dev/null +++ b/src/compiler/nir/nir_search.c @@ -0,0 +1,379 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_search.h" + +struct match_state { + unsigned variables_seen; + nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES]; +}; + +static bool +match_expression(const nir_search_expression *expr, nir_alu_instr *instr, + unsigned num_components, const uint8_t *swizzle, + struct match_state *state); + +static const uint8_t identity_swizzle[] = { 0, 1, 2, 3 }; + +static bool alu_instr_is_bool(nir_alu_instr *instr); + +static bool +src_is_bool(nir_src src) +{ + if (!src.is_ssa) + return false; + if (src.ssa->parent_instr->type != nir_instr_type_alu) + return false; + return alu_instr_is_bool(nir_instr_as_alu(src.ssa->parent_instr)); +} + +static bool +alu_instr_is_bool(nir_alu_instr *instr) +{ + switch (instr->op) { + case nir_op_iand: + case nir_op_ior: + case nir_op_ixor: + return src_is_bool(instr->src[0].src) && src_is_bool(instr->src[1].src); + case nir_op_inot: + return src_is_bool(instr->src[0].src); + default: + return nir_op_infos[instr->op].output_type == nir_type_bool; + } +} + +static bool +match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, + unsigned num_components, const uint8_t *swizzle, + struct match_state *state) +{ + uint8_t new_swizzle[4]; + + /* If the source is an explicitly sized source, then we need to reset + * both the number of components and the swizzle. + */ + if (nir_op_infos[instr->op].input_sizes[src] != 0) { + num_components = nir_op_infos[instr->op].input_sizes[src]; + swizzle = identity_swizzle; + } + + for (unsigned i = 0; i < num_components; ++i) + new_swizzle[i] = instr->src[src].swizzle[swizzle[i]]; + + switch (value->type) { + case nir_search_value_expression: + if (!instr->src[src].src.is_ssa) + return false; + + if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu) + return false; + + return match_expression(nir_search_value_as_expression(value), + nir_instr_as_alu(instr->src[src].src.ssa->parent_instr), + num_components, new_swizzle, state); + + case nir_search_value_variable: { + nir_search_variable *var = nir_search_value_as_variable(value); + assert(var->variable < NIR_SEARCH_MAX_VARIABLES); + + if (state->variables_seen & (1 << var->variable)) { + if (!nir_srcs_equal(state->variables[var->variable].src, + instr->src[src].src)) + return false; + + assert(!instr->src[src].abs && !instr->src[src].negate); + + for (unsigned i = 0; i < num_components; ++i) { + if (state->variables[var->variable].swizzle[i] != new_swizzle[i]) + return false; + } + + return true; + } else { + if (var->is_constant && + instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const) + return false; + + if (var->type != nir_type_invalid) { + if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu) + return false; + + nir_alu_instr *src_alu = + nir_instr_as_alu(instr->src[src].src.ssa->parent_instr); + + if (nir_op_infos[src_alu->op].output_type != var->type && + !(var->type == nir_type_bool && alu_instr_is_bool(src_alu))) + return false; + } + + state->variables_seen |= (1 << var->variable); + state->variables[var->variable].src = instr->src[src].src; + state->variables[var->variable].abs = false; + state->variables[var->variable].negate = false; + + for (unsigned i = 0; i < 4; ++i) { + if (i < num_components) + state->variables[var->variable].swizzle[i] = new_swizzle[i]; + else + state->variables[var->variable].swizzle[i] = 0; + } + + return true; + } + } + + case nir_search_value_constant: { + nir_search_constant *const_val = nir_search_value_as_constant(value); + + if (!instr->src[src].src.is_ssa) + return false; + + if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const) + return false; + + nir_load_const_instr *load = + nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr); + + switch (nir_op_infos[instr->op].input_types[src]) { + case nir_type_float: + for (unsigned i = 0; i < num_components; ++i) { + if (load->value.f[new_swizzle[i]] != const_val->data.f) + return false; + } + return true; + case nir_type_int: + case nir_type_uint: + case nir_type_bool: + for (unsigned i = 0; i < num_components; ++i) { + if (load->value.i[new_swizzle[i]] != const_val->data.i) + return false; + } + return true; + default: + unreachable("Invalid alu source type"); + } + } + + default: + unreachable("Invalid search value type"); + } +} + +static bool +match_expression(const nir_search_expression *expr, nir_alu_instr *instr, + unsigned num_components, const uint8_t *swizzle, + struct match_state *state) +{ + if (instr->op != expr->opcode) + return false; + + assert(!instr->dest.saturate); + assert(nir_op_infos[instr->op].num_inputs > 0); + + /* If we have an explicitly sized destination, we can only handle the + * identity swizzle. While dot(vec3(a, b, c).zxy) is a valid + * expression, we don't have the information right now to propagate that + * swizzle through. We can only properly propagate swizzles if the + * instruction is vectorized. + */ + if (nir_op_infos[instr->op].output_size != 0) { + for (unsigned i = 0; i < num_components; i++) { + if (swizzle[i] != i) + return false; + } + } + + /* Stash off the current variables_seen bitmask. This way we can + * restore it prior to matching in the commutative case below. + */ + unsigned variables_seen_stash = state->variables_seen; + + bool matched = true; + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (!match_value(expr->srcs[i], instr, i, num_components, + swizzle, state)) { + matched = false; + break; + } + } + + if (matched) + return true; + + if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { + assert(nir_op_infos[instr->op].num_inputs == 2); + + /* Restore the variables_seen bitmask. If we don't do this, then we + * could end up with an erroneous failure due to variables found in the + * first match attempt above not matching those in the second. + */ + state->variables_seen = variables_seen_stash; + + if (!match_value(expr->srcs[0], instr, 1, num_components, + swizzle, state)) + return false; + + return match_value(expr->srcs[1], instr, 0, num_components, + swizzle, state); + } else { + return false; + } +} + +static nir_alu_src +construct_value(const nir_search_value *value, nir_alu_type type, + unsigned num_components, struct match_state *state, + nir_instr *instr, void *mem_ctx) +{ + switch (value->type) { + case nir_search_value_expression: { + const nir_search_expression *expr = nir_search_value_as_expression(value); + + if (nir_op_infos[expr->opcode].output_size != 0) + num_components = nir_op_infos[expr->opcode].output_size; + + nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode); + nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL); + alu->dest.write_mask = (1 << num_components) - 1; + alu->dest.saturate = false; + + for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) { + /* If the source is an explicitly sized source, then we need to reset + * the number of components to match. + */ + if (nir_op_infos[alu->op].input_sizes[i] != 0) + num_components = nir_op_infos[alu->op].input_sizes[i]; + + alu->src[i] = construct_value(expr->srcs[i], + nir_op_infos[alu->op].input_types[i], + num_components, + state, instr, mem_ctx); + } + + nir_instr_insert_before(instr, &alu->instr); + + nir_alu_src val; + val.src = nir_src_for_ssa(&alu->dest.dest.ssa); + val.negate = false; + val.abs = false, + memcpy(val.swizzle, identity_swizzle, sizeof val.swizzle); + + return val; + } + + case nir_search_value_variable: { + const nir_search_variable *var = nir_search_value_as_variable(value); + assert(state->variables_seen & (1 << var->variable)); + + nir_alu_src val = { NIR_SRC_INIT }; + nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx); + + assert(!var->is_constant); + + return val; + } + + case nir_search_value_constant: { + const nir_search_constant *c = nir_search_value_as_constant(value); + nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1); + + switch (type) { + case nir_type_float: + load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f); + load->value.f[0] = c->data.f; + break; + case nir_type_int: + load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i); + load->value.i[0] = c->data.i; + break; + case nir_type_uint: + case nir_type_bool: + load->value.u[0] = c->data.u; + break; + default: + unreachable("Invalid alu source type"); + } + + nir_instr_insert_before(instr, &load->instr); + + nir_alu_src val; + val.src = nir_src_for_ssa(&load->def); + val.negate = false; + val.abs = false, + memset(val.swizzle, 0, sizeof val.swizzle); + + return val; + } + + default: + unreachable("Invalid search value type"); + } +} + +nir_alu_instr * +nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search, + const nir_search_value *replace, void *mem_ctx) +{ + uint8_t swizzle[4] = { 0, 0, 0, 0 }; + + for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; ++i) + swizzle[i] = i; + + assert(instr->dest.dest.is_ssa); + + struct match_state state; + state.variables_seen = 0; + + if (!match_expression(search, instr, instr->dest.dest.ssa.num_components, + swizzle, &state)) + return NULL; + + /* Inserting a mov may be unnecessary. However, it's much easier to + * simply let copy propagation clean this up than to try to go through + * and rewrite swizzles ourselves. + */ + nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov); + mov->dest.write_mask = instr->dest.write_mask; + nir_ssa_dest_init(&mov->instr, &mov->dest.dest, + instr->dest.dest.ssa.num_components, NULL); + + mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type, + instr->dest.dest.ssa.num_components, &state, + &instr->instr, mem_ctx); + nir_instr_insert_before(&instr->instr, &mov->instr); + + nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, + nir_src_for_ssa(&mov->dest.dest.ssa)); + + /* We know this one has no more uses because we just rewrote them all, + * so we can remove it. The rest of the matched expression, however, we + * don't know so much about. We'll just let dead code clean them up. + */ + nir_instr_remove(&instr->instr); + + return mov; +} diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h new file mode 100644 index 00000000000..7d47792945e --- /dev/null +++ b/src/compiler/nir/nir_search.h @@ -0,0 +1,99 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#ifndef _NIR_SEARCH_ +#define _NIR_SEARCH_ + +#include "nir.h" + +#define NIR_SEARCH_MAX_VARIABLES 16 + +typedef enum { + nir_search_value_expression, + nir_search_value_variable, + nir_search_value_constant, +} nir_search_value_type; + +typedef struct { + nir_search_value_type type; +} nir_search_value; + +typedef struct { + nir_search_value value; + + /** The variable index; Must be less than NIR_SEARCH_MAX_VARIABLES */ + unsigned variable; + + /** Indicates that the given variable must be a constant + * + * This is only alloed in search expressions and indicates that the + * given variable is only allowed to match constant values. + */ + bool is_constant; + + /** Indicates that the given variable must have a certain type + * + * This is only allowed in search expressions and indicates that the + * given variable is only allowed to match values that come from an ALU + * instruction with the given output type. A type of nir_type_void + * means it can match any type. + * + * Note: A variable that is both constant and has a non-void type will + * never match anything. + */ + nir_alu_type type; +} nir_search_variable; + +typedef struct { + nir_search_value value; + + union { + uint32_t u; + int32_t i; + float f; + } data; +} nir_search_constant; + +typedef struct { + nir_search_value value; + + nir_op opcode; + const nir_search_value *srcs[4]; +} nir_search_expression; + +NIR_DEFINE_CAST(nir_search_value_as_variable, nir_search_value, + nir_search_variable, value) +NIR_DEFINE_CAST(nir_search_value_as_constant, nir_search_value, + nir_search_constant, value) +NIR_DEFINE_CAST(nir_search_value_as_expression, nir_search_value, + nir_search_expression, value) + +nir_alu_instr * +nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search, + const nir_search_value *replace, void *mem_ctx); + +#endif /* _NIR_SEARCH_ */ diff --git a/src/compiler/nir/nir_split_var_copies.c b/src/compiler/nir/nir_split_var_copies.c new file mode 100644 index 00000000000..6fdaefa32c8 --- /dev/null +++ b/src/compiler/nir/nir_split_var_copies.c @@ -0,0 +1,285 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir.h" + +/* + * Implements "copy splitting" which is similar to structure splitting only + * it works on copy operations rather than the datatypes themselves. The + * GLSL language allows you to copy one variable to another an entire + * structure (which may contain arrays or other structures) at a time. + * Normally, in a language such as C this would be handled by a "structure + * splitting" pass that breaks up the structures. Unfortunately for us, + * structures used in inputs or outputs can't be split. Therefore, + * regardlesss of what we do, we have to be able to copy to/from + * structures. + * + * The primary purpose of structure splitting is to allow you to better + * optimize variable access and lower things to registers where you can. + * The primary issue here is that, if you lower the copy to a bunch of + * loads and stores, you loose a lot of information about the copy + * operation that you would like to keep around. To solve this problem, we + * have a "copy splitting" pass that, instead of splitting the structures + * or lowering the copy into loads and storres, splits the copy operation + * into a bunch of copy operations one for each leaf of the structure tree. + * If an intermediate array is encountered, it is referenced with a + * wildcard reference to indicate that the entire array is to be copied. + * + * As things become direct, array copies may be able to be losslessly + * lowered to having fewer and fewer wildcards. However, until that + * happens we want to keep the information about the arrays intact. + * + * Prior to the copy splitting pass, there are no wildcard references but + * there may be incomplete references where the tail of the deref chain is + * an array or a structure and not a specific element. After the copy + * splitting pass has completed, every variable deref will be a full-length + * dereference pointing to a single leaf in the structure type tree with + * possibly a few wildcard array dereferences. + */ + +struct split_var_copies_state { + void *mem_ctx; + void *dead_ctx; + bool progress; +}; + +/* Recursively constructs deref chains to split a copy instruction into + * multiple (if needed) copy instructions with full-length deref chains. + * External callers of this function should pass the tail and head of the + * deref chains found as the source and destination of the copy instruction + * into this function. + * + * \param old_copy The copy instruction we are splitting + * \param dest_head The head of the destination deref chain we are building + * \param src_head The head of the source deref chain we are building + * \param dest_tail The tail of the destination deref chain we are building + * \param src_tail The tail of the source deref chain we are building + * \param state The current split_var_copies_state object + */ +static void +split_var_copy_instr(nir_intrinsic_instr *old_copy, + nir_deref *dest_head, nir_deref *src_head, + nir_deref *dest_tail, nir_deref *src_tail, + struct split_var_copies_state *state) +{ + assert(src_tail->type == dest_tail->type); + + /* Make sure these really are the tails of the deref chains */ + assert(dest_tail->child == NULL); + assert(src_tail->child == NULL); + + switch (glsl_get_base_type(src_tail->type)) { + case GLSL_TYPE_ARRAY: { + /* Make a wildcard dereference */ + nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); + deref->deref.type = glsl_get_array_element(src_tail->type); + deref->deref_array_type = nir_deref_array_type_wildcard; + + /* Set the tail of both as the newly created wildcard deref. It is + * safe to use the same wildcard in both places because a) we will be + * copying it before we put it in an actual instruction and b) + * everything that will potentially add another link in the deref + * chain will also add the same thing to both chains. + */ + src_tail->child = &deref->deref; + dest_tail->child = &deref->deref; + + split_var_copy_instr(old_copy, dest_head, src_head, + dest_tail->child, src_tail->child, state); + + /* Set it back to the way we found it */ + src_tail->child = NULL; + dest_tail->child = NULL; + break; + } + + case GLSL_TYPE_STRUCT: + /* This is the only part that actually does any interesting + * splitting. For array types, we just use wildcards and resolve + * them later. For structure types, we need to emit one copy + * instruction for every structure element. Because we may have + * structs inside structs, we just recurse and let the next level + * take care of any additional structures. + */ + for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) { + nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i); + deref->deref.type = glsl_get_struct_field(src_tail->type, i); + + /* Set the tail of both as the newly created structure deref. It + * is safe to use the same wildcard in both places because a) we + * will be copying it before we put it in an actual instruction + * and b) everything that will potentially add another link in the + * deref chain will also add the same thing to both chains. + */ + src_tail->child = &deref->deref; + dest_tail->child = &deref->deref; + + split_var_copy_instr(old_copy, dest_head, src_head, + dest_tail->child, src_tail->child, state); + } + /* Set it back to the way we found it */ + src_tail->child = NULL; + dest_tail->child = NULL; + break; + + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(src_tail->type)) { + nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); + deref->deref.type = glsl_get_column_type(src_tail->type); + deref->deref_array_type = nir_deref_array_type_wildcard; + + /* Set the tail of both as the newly created wildcard deref. It + * is safe to use the same wildcard in both places because a) we + * will be copying it before we put it in an actual instruction + * and b) everything that will potentially add another link in the + * deref chain will also add the same thing to both chains. + */ + src_tail->child = &deref->deref; + dest_tail->child = &deref->deref; + + split_var_copy_instr(old_copy, dest_head, src_head, + dest_tail->child, src_tail->child, state); + + /* Set it back to the way we found it */ + src_tail->child = NULL; + dest_tail->child = NULL; + } else { + /* At this point, we have fully built our deref chains and can + * actually add the new copy instruction. + */ + nir_intrinsic_instr *new_copy = + nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var); + + /* We need to make copies because a) this deref chain actually + * belongs to the copy instruction and b) the deref chains may + * have some of the same links due to the way we constructed them + */ + nir_deref *src = nir_copy_deref(new_copy, src_head); + nir_deref *dest = nir_copy_deref(new_copy, dest_head); + + new_copy->variables[0] = nir_deref_as_var(dest); + new_copy->variables[1] = nir_deref_as_var(src); + + /* Emit the copy instruction after the old instruction. We'll + * remove the old one later. + */ + nir_instr_insert_after(&old_copy->instr, &new_copy->instr); + state->progress = true; + } + break; + + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_INTERFACE: + default: + unreachable("Cannot copy these types"); + } +} + +static bool +split_var_copies_block(nir_block *block, void *void_state) +{ + struct split_var_copies_state *state = void_state; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); + if (intrinsic->intrinsic != nir_intrinsic_copy_var) + continue; + + nir_deref *dest_head = &intrinsic->variables[0]->deref; + nir_deref *src_head = &intrinsic->variables[1]->deref; + nir_deref *dest_tail = nir_deref_tail(dest_head); + nir_deref *src_tail = nir_deref_tail(src_head); + + switch (glsl_get_base_type(src_tail->type)) { + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: + split_var_copy_instr(intrinsic, dest_head, src_head, + dest_tail, src_tail, state); + nir_instr_remove(&intrinsic->instr); + ralloc_steal(state->dead_ctx, instr); + break; + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(src_tail->type)) { + split_var_copy_instr(intrinsic, dest_head, src_head, + dest_tail, src_tail, state); + nir_instr_remove(&intrinsic->instr); + ralloc_steal(state->dead_ctx, instr); + } + break; + default: + unreachable("Invalid type"); + break; + } + } + + return true; +} + +static bool +split_var_copies_impl(nir_function_impl *impl) +{ + struct split_var_copies_state state; + + state.mem_ctx = ralloc_parent(impl); + state.dead_ctx = ralloc_context(NULL); + state.progress = false; + + nir_foreach_block(impl, split_var_copies_block, &state); + + ralloc_free(state.dead_ctx); + + if (state.progress) { + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + } + + return state.progress; +} + +bool +nir_split_var_copies(nir_shader *shader) +{ + bool progress = false; + + nir_foreach_function(shader, function) { + if (function->impl) + progress = split_var_copies_impl(function->impl) || progress; + } + + return progress; +} diff --git a/src/compiler/nir/nir_sweep.c b/src/compiler/nir/nir_sweep.c new file mode 100644 index 00000000000..0710bdba7c7 --- /dev/null +++ b/src/compiler/nir/nir_sweep.c @@ -0,0 +1,173 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "nir.h" + +/** + * \file nir_sweep.c + * + * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated + * memory - anything still connected to the program will be kept, and any dead memory + * we dropped on the floor will be freed. + * + * The expectation is that drivers should call this when finished compiling the shader + * (after any optimization, lowering, and so on). However, it's also fine to call it + * earlier, and even many times, trading CPU cycles for memory savings. + */ + +#define steal_list(mem_ctx, type, list) \ + foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); } + +static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node); + +static bool +sweep_src_indirect(nir_src *src, void *nir) +{ + if (!src->is_ssa && src->reg.indirect) + ralloc_steal(nir, src->reg.indirect); + + return true; +} + +static bool +sweep_dest_indirect(nir_dest *dest, void *nir) +{ + if (!dest->is_ssa && dest->reg.indirect) + ralloc_steal(nir, dest->reg.indirect); + + return true; +} + +static void +sweep_block(nir_shader *nir, nir_block *block) +{ + ralloc_steal(nir, block); + + nir_foreach_instr(block, instr) { + ralloc_steal(nir, instr); + + nir_foreach_src(instr, sweep_src_indirect, nir); + nir_foreach_dest(instr, sweep_dest_indirect, nir); + } +} + +static void +sweep_if(nir_shader *nir, nir_if *iff) +{ + ralloc_steal(nir, iff); + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) { + sweep_cf_node(nir, cf_node); + } + + foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) { + sweep_cf_node(nir, cf_node); + } +} + +static void +sweep_loop(nir_shader *nir, nir_loop *loop) +{ + ralloc_steal(nir, loop); + + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { + sweep_cf_node(nir, cf_node); + } +} + +static void +sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node) +{ + switch (cf_node->type) { + case nir_cf_node_block: + sweep_block(nir, nir_cf_node_as_block(cf_node)); + break; + case nir_cf_node_if: + sweep_if(nir, nir_cf_node_as_if(cf_node)); + break; + case nir_cf_node_loop: + sweep_loop(nir, nir_cf_node_as_loop(cf_node)); + break; + default: + unreachable("Invalid CF node type"); + } +} + +static void +sweep_impl(nir_shader *nir, nir_function_impl *impl) +{ + ralloc_steal(nir, impl); + + ralloc_steal(nir, impl->params); + ralloc_steal(nir, impl->return_var); + steal_list(nir, nir_variable, &impl->locals); + steal_list(nir, nir_register, &impl->registers); + + foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) { + sweep_cf_node(nir, cf_node); + } + + sweep_block(nir, impl->end_block); + + /* Wipe out all the metadata, if any. */ + nir_metadata_preserve(impl, nir_metadata_none); +} + +static void +sweep_function(nir_shader *nir, nir_function *f) +{ + ralloc_steal(nir, f); + ralloc_steal(nir, f->params); + + if (f->impl) + sweep_impl(nir, f->impl); +} + +void +nir_sweep(nir_shader *nir) +{ + void *rubbish = ralloc_context(NULL); + + /* First, move ownership of all the memory to a temporary context; assume dead. */ + ralloc_adopt(rubbish, nir); + + ralloc_steal(nir, (char *)nir->info.name); + if (nir->info.label) + ralloc_steal(nir, (char *)nir->info.label); + + /* Variables and registers are not dead. Steal them back. */ + steal_list(nir, nir_variable, &nir->uniforms); + steal_list(nir, nir_variable, &nir->inputs); + steal_list(nir, nir_variable, &nir->outputs); + steal_list(nir, nir_variable, &nir->globals); + steal_list(nir, nir_variable, &nir->system_values); + steal_list(nir, nir_register, &nir->registers); + + /* Recurse into functions, stealing their contents back. */ + foreach_list_typed(nir_function, func, node, &nir->functions) { + sweep_function(nir, func); + } + + /* Free everything we didn't steal back. */ + ralloc_free(rubbish); +} diff --git a/src/compiler/nir/nir_to_ssa.c b/src/compiler/nir/nir_to_ssa.c new file mode 100644 index 00000000000..44a50547738 --- /dev/null +++ b/src/compiler/nir/nir_to_ssa.c @@ -0,0 +1,536 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include +#include + +/* + * Implements the classic to-SSA algorithm described by Cytron et. al. in + * "Efficiently Computing Static Single Assignment Form and the Control + * Dependence Graph." + */ + +/* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */ + +static void +insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx) +{ + nir_phi_instr *instr = nir_phi_instr_create(mem_ctx); + + instr->dest.reg.reg = reg; + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + nir_phi_src *src = ralloc(instr, nir_phi_src); + src->pred = pred; + src->src.is_ssa = false; + src->src.reg.base_offset = 0; + src->src.reg.indirect = NULL; + src->src.reg.reg = reg; + exec_list_push_tail(&instr->srcs, &src->node); + } + + nir_instr_insert_before_block(block, &instr->instr); +} + +static void +insert_phi_nodes(nir_function_impl *impl) +{ + void *mem_ctx = ralloc_parent(impl); + + unsigned *work = calloc(impl->num_blocks, sizeof(unsigned)); + unsigned *has_already = calloc(impl->num_blocks, sizeof(unsigned)); + + /* + * Since the work flags already prevent us from inserting a node that has + * ever been inserted into W, we don't need to use a set to represent W. + * Also, since no block can ever be inserted into W more than once, we know + * that the maximum size of W is the number of basic blocks in the + * function. So all we need to handle W is an array and a pointer to the + * next element to be inserted and the next element to be removed. + */ + nir_block **W = malloc(impl->num_blocks * sizeof(nir_block *)); + unsigned w_start, w_end; + + unsigned iter_count = 0; + + nir_index_blocks(impl); + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + if (reg->num_array_elems != 0) + continue; + + w_start = w_end = 0; + iter_count++; + + nir_foreach_def(reg, dest) { + nir_instr *def = dest->reg.parent_instr; + if (work[def->block->index] < iter_count) + W[w_end++] = def->block; + work[def->block->index] = iter_count; + } + + while (w_start != w_end) { + nir_block *cur = W[w_start++]; + struct set_entry *entry; + set_foreach(cur->dom_frontier, entry) { + nir_block *next = (nir_block *) entry->key; + + /* + * If there's more than one return statement, then the end block + * can be a join point for some definitions. However, there are + * no instructions in the end block, so nothing would use those + * phi nodes. Of course, we couldn't place those phi nodes + * anyways due to the restriction of having no instructions in the + * end block... + */ + if (next == impl->end_block) + continue; + + if (has_already[next->index] < iter_count) { + insert_trivial_phi(reg, next, mem_ctx); + has_already[next->index] = iter_count; + if (work[next->index] < iter_count) { + work[next->index] = iter_count; + W[w_end++] = next; + } + } + } + } + } + + free(work); + free(has_already); + free(W); +} + +typedef struct { + nir_ssa_def **stack; + int index; + unsigned num_defs; /** < used to add indices to debug names */ +#ifndef NDEBUG + unsigned stack_size; +#endif +} reg_state; + +typedef struct { + reg_state *states; + void *mem_ctx; + nir_instr *parent_instr; + nir_if *parent_if; + nir_function_impl *impl; + + /* map from SSA value -> original register */ + struct hash_table *ssa_map; +} rewrite_state; + +static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state) +{ + unsigned index = reg->index; + + if (state->states[index].index == -1) { + /* + * We're using an undefined register, create a new undefined SSA value + * to preserve the information that this source is undefined + */ + nir_ssa_undef_instr *instr = + nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components); + + /* + * We could just insert the undefined instruction before the instruction + * we're rewriting, but we could be rewriting a phi source in which case + * we can't do that, so do the next easiest thing - insert it at the + * beginning of the program. In the end, it doesn't really matter where + * the undefined instructions are because they're going to be ignored + * in the backend. + */ + nir_instr_insert_before_cf_list(&state->impl->body, &instr->instr); + return &instr->def; + } + + return state->states[index].stack[state->states[index].index]; +} + +static bool +rewrite_use(nir_src *src, void *_state) +{ + rewrite_state *state = (rewrite_state *) _state; + + if (src->is_ssa) + return true; + + unsigned index = src->reg.reg->index; + + if (state->states[index].stack == NULL) + return true; + + nir_ssa_def *def = get_ssa_src(src->reg.reg, state); + if (state->parent_instr) + nir_instr_rewrite_src(state->parent_instr, src, nir_src_for_ssa(def)); + else + nir_if_rewrite_condition(state->parent_if, nir_src_for_ssa(def)); + + return true; +} + +static bool +rewrite_def_forwards(nir_dest *dest, void *_state) +{ + rewrite_state *state = (rewrite_state *) _state; + + if (dest->is_ssa) + return true; + + nir_register *reg = dest->reg.reg; + unsigned index = reg->index; + + if (state->states[index].stack == NULL) + return true; + + char *name = NULL; + if (dest->reg.reg->name) + name = ralloc_asprintf(state->mem_ctx, "%s_%u", dest->reg.reg->name, + state->states[index].num_defs); + + list_del(&dest->reg.def_link); + nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name); + + /* push our SSA destination on the stack */ + state->states[index].index++; + assert(state->states[index].index < state->states[index].stack_size); + state->states[index].stack[state->states[index].index] = &dest->ssa; + state->states[index].num_defs++; + + _mesa_hash_table_insert(state->ssa_map, &dest->ssa, reg); + + return true; +} + +static void +rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state) +{ + state->parent_instr = &instr->instr; + + nir_foreach_src(&instr->instr, rewrite_use, state); + + if (instr->dest.dest.is_ssa) + return; + + nir_register *reg = instr->dest.dest.reg.reg; + unsigned index = reg->index; + + if (state->states[index].stack == NULL) + return; + + unsigned write_mask = instr->dest.write_mask; + if (write_mask != (1 << instr->dest.dest.reg.reg->num_components) - 1) { + /* + * Calculate the number of components the final instruction, which for + * per-component things is the number of output components of the + * instruction and non-per-component things is the number of enabled + * channels in the write mask. + */ + unsigned num_components; + if (nir_op_infos[instr->op].output_size == 0) { + unsigned temp = (write_mask & 0x5) + ((write_mask >> 1) & 0x5); + num_components = (temp & 0x3) + ((temp >> 2) & 0x3); + } else { + num_components = nir_op_infos[instr->op].output_size; + } + + char *name = NULL; + if (instr->dest.dest.reg.reg->name) + name = ralloc_asprintf(state->mem_ctx, "%s_%u", + reg->name, state->states[index].num_defs); + + instr->dest.write_mask = (1 << num_components) - 1; + list_del(&instr->dest.dest.reg.def_link); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name); + + if (nir_op_infos[instr->op].output_size == 0) { + /* + * When we change the output writemask, we need to change the + * swizzles for per-component inputs too + */ + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + if (nir_op_infos[instr->op].input_sizes[i] != 0) + continue; + + unsigned new_swizzle[4] = {0, 0, 0, 0}; + + /* + * We keep two indices: + * 1. The index of the original (non-SSA) component + * 2. The index of the post-SSA, compacted, component + * + * We need to map the swizzle component at index 1 to the swizzle + * component at index 2. + */ + + unsigned ssa_index = 0; + for (unsigned index = 0; index < 4; index++) { + if (!((write_mask >> index) & 1)) + continue; + + new_swizzle[ssa_index] = instr->src[i].swizzle[index]; + ssa_index++; + } + + for (unsigned j = 0; j < 4; j++) + instr->src[i].swizzle[j] = new_swizzle[j]; + } + } + + nir_op op; + switch (reg->num_components) { + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("not reached"); + } + + nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, op); + + vec->dest.dest.reg.reg = reg; + vec->dest.write_mask = (1 << reg->num_components) - 1; + + nir_ssa_def *old_src = get_ssa_src(reg, state); + nir_ssa_def *new_src = &instr->dest.dest.ssa; + + unsigned ssa_index = 0; + for (unsigned i = 0; i < reg->num_components; i++) { + vec->src[i].src.is_ssa = true; + if ((write_mask >> i) & 1) { + vec->src[i].src.ssa = new_src; + if (nir_op_infos[instr->op].output_size == 0) + vec->src[i].swizzle[0] = ssa_index; + else + vec->src[i].swizzle[0] = i; + ssa_index++; + } else { + vec->src[i].src.ssa = old_src; + vec->src[i].swizzle[0] = i; + } + } + + nir_instr_insert_after(&instr->instr, &vec->instr); + + state->parent_instr = &vec->instr; + rewrite_def_forwards(&vec->dest.dest, state); + } else { + rewrite_def_forwards(&instr->dest.dest, state); + } +} + +static void +rewrite_phi_instr(nir_phi_instr *instr, rewrite_state *state) +{ + state->parent_instr = &instr->instr; + rewrite_def_forwards(&instr->dest, state); +} + +static void +rewrite_instr_forward(nir_instr *instr, rewrite_state *state) +{ + if (instr->type == nir_instr_type_alu) { + rewrite_alu_instr_forward(nir_instr_as_alu(instr), state); + return; + } + + if (instr->type == nir_instr_type_phi) { + rewrite_phi_instr(nir_instr_as_phi(instr), state); + return; + } + + state->parent_instr = instr; + + nir_foreach_src(instr, rewrite_use, state); + nir_foreach_dest(instr, rewrite_def_forwards, state); +} + +static void +rewrite_phi_sources(nir_block *block, nir_block *pred, rewrite_state *state) +{ + nir_foreach_instr(block, instr) { + if (instr->type != nir_instr_type_phi) + break; + + nir_phi_instr *phi_instr = nir_instr_as_phi(instr); + + state->parent_instr = instr; + + nir_foreach_phi_src(phi_instr, src) { + if (src->pred == pred) { + rewrite_use(&src->src, state); + break; + } + } + } +} + +static bool +rewrite_def_backwards(nir_dest *dest, void *_state) +{ + rewrite_state *state = (rewrite_state *) _state; + + if (!dest->is_ssa) + return true; + + struct hash_entry *entry = + _mesa_hash_table_search(state->ssa_map, &dest->ssa); + + if (!entry) + return true; + + nir_register *reg = (nir_register *) entry->data; + unsigned index = reg->index; + + state->states[index].index--; + assert(state->states[index].index >= -1); + + return true; +} + +static void +rewrite_instr_backwards(nir_instr *instr, rewrite_state *state) +{ + nir_foreach_dest(instr, rewrite_def_backwards, state); +} + +static void +rewrite_block(nir_block *block, rewrite_state *state) +{ + /* This will skip over any instructions after the current one, which is + * what we want because those instructions (vector gather, conditional + * select) will already be in SSA form. + */ + nir_foreach_instr_safe(block, instr) { + rewrite_instr_forward(instr, state); + } + + if (block != state->impl->end_block && + !nir_cf_node_is_last(&block->cf_node) && + nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) { + nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node)); + state->parent_instr = NULL; + state->parent_if = if_stmt; + rewrite_use(&if_stmt->condition, state); + } + + if (block->successors[0]) + rewrite_phi_sources(block->successors[0], block, state); + if (block->successors[1]) + rewrite_phi_sources(block->successors[1], block, state); + + for (unsigned i = 0; i < block->num_dom_children; i++) + rewrite_block(block->dom_children[i], state); + + nir_foreach_instr_reverse(block, instr) { + rewrite_instr_backwards(instr, state); + } +} + +static void +remove_unused_regs(nir_function_impl *impl, rewrite_state *state) +{ + foreach_list_typed_safe(nir_register, reg, node, &impl->registers) { + if (state->states[reg->index].stack != NULL) + exec_node_remove(®->node); + } +} + +static void +init_rewrite_state(nir_function_impl *impl, rewrite_state *state) +{ + state->impl = impl; + state->mem_ctx = ralloc_parent(impl); + state->ssa_map = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->states = ralloc_array(NULL, reg_state, impl->reg_alloc); + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + assert(reg->index < impl->reg_alloc); + if (reg->num_array_elems > 0) { + state->states[reg->index].stack = NULL; + } else { + /* + * Calculate a conservative estimate of the stack size based on the + * number of definitions there are. Note that this function *must* be + * called after phi nodes are inserted so we can count phi node + * definitions too. + */ + unsigned stack_size = list_length(®->defs); + + state->states[reg->index].stack = ralloc_array(state->states, + nir_ssa_def *, + stack_size); +#ifndef NDEBUG + state->states[reg->index].stack_size = stack_size; +#endif + state->states[reg->index].index = -1; + state->states[reg->index].num_defs = 0; + } + } +} + +static void +destroy_rewrite_state(rewrite_state *state) +{ + _mesa_hash_table_destroy(state->ssa_map, NULL); + ralloc_free(state->states); +} + +void +nir_convert_to_ssa_impl(nir_function_impl *impl) +{ + nir_metadata_require(impl, nir_metadata_dominance); + + insert_phi_nodes(impl); + + rewrite_state state; + init_rewrite_state(impl, &state); + + rewrite_block(nir_start_block(impl), &state); + + remove_unused_regs(impl, &state); + + nir_metadata_preserve(impl, nir_metadata_block_index | + nir_metadata_dominance); + + destroy_rewrite_state(&state); +} + +void +nir_convert_to_ssa(nir_shader *shader) +{ + nir_foreach_function(shader, function) { + if (function->impl) + nir_convert_to_ssa_impl(function->impl); + } +} diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c new file mode 100644 index 00000000000..e4db68db3c0 --- /dev/null +++ b/src/compiler/nir/nir_validate.c @@ -0,0 +1,1071 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Connor Abbott (cwabbott0@gmail.com) + * + */ + +#include "nir.h" +#include + +/* + * This file checks for invalid IR indicating a bug somewhere in the compiler. + */ + +/* Since this file is just a pile of asserts, don't bother compiling it if + * we're not building a debug build. + */ +#ifdef DEBUG + +/* + * Per-register validation state. + */ + +typedef struct { + /* + * equivalent to the uses and defs in nir_register, but built up by the + * validator. At the end, we verify that the sets have the same entries. + */ + struct set *uses, *if_uses, *defs; + nir_function_impl *where_defined; /* NULL for global registers */ +} reg_validate_state; + +typedef struct { + /* + * equivalent to the uses in nir_ssa_def, but built up by the validator. + * At the end, we verify that the sets have the same entries. + */ + struct set *uses, *if_uses; + nir_function_impl *where_defined; +} ssa_def_validate_state; + +typedef struct { + /* map of register -> validation state (struct above) */ + struct hash_table *regs; + + /* the current shader being validated */ + nir_shader *shader; + + /* the current instruction being validated */ + nir_instr *instr; + + /* the current basic block being validated */ + nir_block *block; + + /* the current if statement being validated */ + nir_if *if_stmt; + + /* the current loop being visited */ + nir_loop *loop; + + /* the parent of the current cf node being visited */ + nir_cf_node *parent_node; + + /* the current function implementation being validated */ + nir_function_impl *impl; + + /* map of SSA value -> function implementation where it is defined */ + struct hash_table *ssa_defs; + + /* bitset of ssa definitions we have found; used to check uniqueness */ + BITSET_WORD *ssa_defs_found; + + /* bitset of registers we have currently found; used to check uniqueness */ + BITSET_WORD *regs_found; + + /* map of local variable -> function implementation where it is defined */ + struct hash_table *var_defs; +} validate_state; + +static void validate_src(nir_src *src, validate_state *state); + +static void +validate_reg_src(nir_src *src, validate_state *state) +{ + assert(src->reg.reg != NULL); + + struct hash_entry *entry; + entry = _mesa_hash_table_search(state->regs, src->reg.reg); + assert(entry); + + reg_validate_state *reg_state = (reg_validate_state *) entry->data; + + if (state->instr) { + _mesa_set_add(reg_state->uses, src); + } else { + assert(state->if_stmt); + _mesa_set_add(reg_state->if_uses, src); + } + + if (!src->reg.reg->is_global) { + assert(reg_state->where_defined == state->impl && + "using a register declared in a different function"); + } + + assert((src->reg.reg->num_array_elems == 0 || + src->reg.base_offset < src->reg.reg->num_array_elems) && + "definitely out-of-bounds array access"); + + if (src->reg.indirect) { + assert(src->reg.reg->num_array_elems != 0); + assert((src->reg.indirect->is_ssa || + src->reg.indirect->reg.indirect == NULL) && + "only one level of indirection allowed"); + validate_src(src->reg.indirect, state); + } +} + +static void +validate_ssa_src(nir_src *src, validate_state *state) +{ + assert(src->ssa != NULL); + + struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, src->ssa); + + assert(entry); + + ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; + + assert(def_state->where_defined == state->impl && + "using an SSA value defined in a different function"); + + if (state->instr) { + _mesa_set_add(def_state->uses, src); + } else { + assert(state->if_stmt); + _mesa_set_add(def_state->if_uses, src); + } + + /* TODO validate that the use is dominated by the definition */ +} + +static void +validate_src(nir_src *src, validate_state *state) +{ + if (state->instr) + assert(src->parent_instr == state->instr); + else + assert(src->parent_if == state->if_stmt); + + if (src->is_ssa) + validate_ssa_src(src, state); + else + validate_reg_src(src, state); +} + +static void +validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state) +{ + nir_alu_src *src = &instr->src[index]; + + unsigned num_components; + if (src->src.is_ssa) + num_components = src->src.ssa->num_components; + else { + if (src->src.reg.reg->is_packed) + num_components = 4; /* can't check anything */ + else + num_components = src->src.reg.reg->num_components; + } + for (unsigned i = 0; i < 4; i++) { + assert(src->swizzle[i] < 4); + + if (nir_alu_instr_channel_used(instr, index, i)) + assert(src->swizzle[i] < num_components); + } + + validate_src(&src->src, state); +} + +static void +validate_reg_dest(nir_reg_dest *dest, validate_state *state) +{ + assert(dest->reg != NULL); + + assert(dest->parent_instr == state->instr); + + struct hash_entry *entry2; + entry2 = _mesa_hash_table_search(state->regs, dest->reg); + + assert(entry2); + + reg_validate_state *reg_state = (reg_validate_state *) entry2->data; + _mesa_set_add(reg_state->defs, dest); + + if (!dest->reg->is_global) { + assert(reg_state->where_defined == state->impl && + "writing to a register declared in a different function"); + } + + assert((dest->reg->num_array_elems == 0 || + dest->base_offset < dest->reg->num_array_elems) && + "definitely out-of-bounds array access"); + + if (dest->indirect) { + assert(dest->reg->num_array_elems != 0); + assert((dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) && + "only one level of indirection allowed"); + validate_src(dest->indirect, state); + } +} + +static void +validate_ssa_def(nir_ssa_def *def, validate_state *state) +{ + assert(def->index < state->impl->ssa_alloc); + assert(!BITSET_TEST(state->ssa_defs_found, def->index)); + BITSET_SET(state->ssa_defs_found, def->index); + + assert(def->parent_instr == state->instr); + + assert(def->num_components <= 4); + + list_validate(&def->uses); + list_validate(&def->if_uses); + + ssa_def_validate_state *def_state = ralloc(state->ssa_defs, + ssa_def_validate_state); + def_state->where_defined = state->impl; + def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + _mesa_hash_table_insert(state->ssa_defs, def, def_state); +} + +static void +validate_dest(nir_dest *dest, validate_state *state) +{ + if (dest->is_ssa) + validate_ssa_def(&dest->ssa, state); + else + validate_reg_dest(&dest->reg, state); +} + +static void +validate_alu_dest(nir_alu_dest *dest, validate_state *state) +{ + unsigned dest_size = + dest->dest.is_ssa ? dest->dest.ssa.num_components + : dest->dest.reg.reg->num_components; + bool is_packed = !dest->dest.is_ssa && dest->dest.reg.reg->is_packed; + /* + * validate that the instruction doesn't write to components not in the + * register/SSA value + */ + assert(is_packed || !(dest->write_mask & ~((1 << dest_size) - 1))); + + /* validate that saturate is only ever used on instructions with + * destinations of type float + */ + nir_alu_instr *alu = nir_instr_as_alu(state->instr); + assert(nir_op_infos[alu->op].output_type == nir_type_float || + !dest->saturate); + + validate_dest(&dest->dest, state); +} + +static void +validate_alu_instr(nir_alu_instr *instr, validate_state *state) +{ + assert(instr->op < nir_num_opcodes); + + for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { + validate_alu_src(instr, i, state); + } + + validate_alu_dest(&instr->dest, state); +} + +static void +validate_deref_chain(nir_deref *deref, validate_state *state) +{ + assert(deref->child == NULL || ralloc_parent(deref->child) == deref); + + nir_deref *parent = NULL; + while (deref != NULL) { + switch (deref->deref_type) { + case nir_deref_type_array: + assert(deref->type == glsl_get_array_element(parent->type)); + if (nir_deref_as_array(deref)->deref_array_type == + nir_deref_array_type_indirect) + validate_src(&nir_deref_as_array(deref)->indirect, state); + break; + + case nir_deref_type_struct: + assert(deref->type == + glsl_get_struct_field(parent->type, + nir_deref_as_struct(deref)->index)); + break; + + case nir_deref_type_var: + break; + + default: + assert(!"Invalid deref type"); + break; + } + + parent = deref; + deref = deref->child; + } +} + +static void +validate_var_use(nir_variable *var, validate_state *state) +{ + if (var->data.mode == nir_var_local) { + struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var); + + assert(entry); + assert((nir_function_impl *) entry->data == state->impl); + } +} + +static void +validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state) +{ + assert(deref != NULL); + assert(ralloc_parent(deref) == parent_mem_ctx); + assert(deref->deref.type == deref->var->type); + + validate_var_use(deref->var, state); + + validate_deref_chain(&deref->deref, state); +} + +static void +validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) +{ + unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; + for (unsigned i = 0; i < num_srcs; i++) { + unsigned components_read = + nir_intrinsic_infos[instr->intrinsic].src_components[i]; + if (components_read == 0) + components_read = instr->num_components; + + assert(components_read > 0); + + if (instr->src[i].is_ssa) { + assert(components_read <= instr->src[i].ssa->num_components); + } else if (!instr->src[i].reg.reg->is_packed) { + assert(components_read <= instr->src[i].reg.reg->num_components); + } + + validate_src(&instr->src[i], state); + } + + unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; + for (unsigned i = 0; i < num_vars; i++) { + validate_deref_var(instr, instr->variables[i], state); + } + + if (nir_intrinsic_infos[instr->intrinsic].has_dest) { + unsigned components_written = + nir_intrinsic_infos[instr->intrinsic].dest_components; + if (components_written == 0) + components_written = instr->num_components; + + assert(components_written > 0); + + if (instr->dest.is_ssa) { + assert(components_written <= instr->dest.ssa.num_components); + } else if (!instr->dest.reg.reg->is_packed) { + assert(components_written <= instr->dest.reg.reg->num_components); + } + + validate_dest(&instr->dest, state); + } + + switch (instr->intrinsic) { + case nir_intrinsic_load_var: { + const struct glsl_type *type = + nir_deref_tail(&instr->variables[0]->deref)->type; + assert(glsl_type_is_vector_or_scalar(type) || + (instr->variables[0]->var->data.mode == nir_var_uniform && + glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE)); + assert(instr->num_components == glsl_get_vector_elements(type)); + break; + } + case nir_intrinsic_store_var: { + const struct glsl_type *type = + nir_deref_tail(&instr->variables[0]->deref)->type; + assert(glsl_type_is_vector_or_scalar(type) || + (instr->variables[0]->var->data.mode == nir_var_uniform && + glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE)); + assert(instr->num_components == glsl_get_vector_elements(type)); + assert(instr->variables[0]->var->data.mode != nir_var_shader_in && + instr->variables[0]->var->data.mode != nir_var_uniform && + instr->variables[0]->var->data.mode != nir_var_shader_storage); + assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0); + break; + } + case nir_intrinsic_copy_var: + assert(nir_deref_tail(&instr->variables[0]->deref)->type == + nir_deref_tail(&instr->variables[1]->deref)->type); + assert(instr->variables[0]->var->data.mode != nir_var_shader_in && + instr->variables[0]->var->data.mode != nir_var_uniform && + instr->variables[0]->var->data.mode != nir_var_shader_storage); + break; + default: + break; + } +} + +static void +validate_tex_instr(nir_tex_instr *instr, validate_state *state) +{ + bool src_type_seen[nir_num_tex_src_types]; + for (unsigned i = 0; i < nir_num_tex_src_types; i++) + src_type_seen[i] = false; + + for (unsigned i = 0; i < instr->num_srcs; i++) { + assert(!src_type_seen[instr->src[i].src_type]); + src_type_seen[instr->src[i].src_type] = true; + validate_src(&instr->src[i].src, state); + } + + if (instr->sampler != NULL) + validate_deref_var(instr, instr->sampler, state); + + validate_dest(&instr->dest, state); +} + +static void +validate_call_instr(nir_call_instr *instr, validate_state *state) +{ + if (instr->return_deref == NULL) + assert(glsl_type_is_void(instr->callee->return_type)); + else + assert(instr->return_deref->deref.type == instr->callee->return_type); + + assert(instr->num_params == instr->callee->num_params); + + for (unsigned i = 0; i < instr->num_params; i++) { + assert(instr->callee->params[i].type == instr->params[i]->deref.type); + validate_deref_var(instr, instr->params[i], state); + } + + validate_deref_var(instr, instr->return_deref, state); +} + +static void +validate_load_const_instr(nir_load_const_instr *instr, validate_state *state) +{ + validate_ssa_def(&instr->def, state); +} + +static void +validate_ssa_undef_instr(nir_ssa_undef_instr *instr, validate_state *state) +{ + validate_ssa_def(&instr->def, state); +} + +static void +validate_phi_instr(nir_phi_instr *instr, validate_state *state) +{ + /* + * don't validate the sources until we get to them from their predecessor + * basic blocks, to avoid validating an SSA use before its definition. + */ + + validate_dest(&instr->dest, state); + + exec_list_validate(&instr->srcs); + assert(exec_list_length(&instr->srcs) == + state->block->predecessors->entries); +} + +static void +validate_instr(nir_instr *instr, validate_state *state) +{ + assert(instr->block == state->block); + + state->instr = instr; + + switch (instr->type) { + case nir_instr_type_alu: + validate_alu_instr(nir_instr_as_alu(instr), state); + break; + + case nir_instr_type_call: + validate_call_instr(nir_instr_as_call(instr), state); + break; + + case nir_instr_type_intrinsic: + validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state); + break; + + case nir_instr_type_tex: + validate_tex_instr(nir_instr_as_tex(instr), state); + break; + + case nir_instr_type_load_const: + validate_load_const_instr(nir_instr_as_load_const(instr), state); + break; + + case nir_instr_type_phi: + validate_phi_instr(nir_instr_as_phi(instr), state); + break; + + case nir_instr_type_ssa_undef: + validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state); + break; + + case nir_instr_type_jump: + break; + + default: + assert(!"Invalid ALU instruction type"); + break; + } + + state->instr = NULL; +} + +static void +validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state) +{ + state->instr = &instr->instr; + + assert(instr->dest.is_ssa); + + exec_list_validate(&instr->srcs); + nir_foreach_phi_src(instr, src) { + if (src->pred == pred) { + assert(src->src.is_ssa); + assert(src->src.ssa->num_components == + instr->dest.ssa.num_components); + + validate_src(&src->src, state); + state->instr = NULL; + return; + } + } + + abort(); +} + +static void +validate_phi_srcs(nir_block *block, nir_block *succ, validate_state *state) +{ + nir_foreach_instr(succ, instr) { + if (instr->type != nir_instr_type_phi) + break; + + validate_phi_src(nir_instr_as_phi(instr), block, state); + } +} + +static void validate_cf_node(nir_cf_node *node, validate_state *state); + +static void +validate_block(nir_block *block, validate_state *state) +{ + assert(block->cf_node.parent == state->parent_node); + + state->block = block; + + exec_list_validate(&block->instr_list); + nir_foreach_instr(block, instr) { + if (instr->type == nir_instr_type_phi) { + assert(instr == nir_block_first_instr(block) || + nir_instr_prev(instr)->type == nir_instr_type_phi); + } + + if (instr->type == nir_instr_type_jump) { + assert(instr == nir_block_last_instr(block)); + } + + validate_instr(instr, state); + } + + assert(block->successors[0] != NULL); + assert(block->successors[0] != block->successors[1]); + + for (unsigned i = 0; i < 2; i++) { + if (block->successors[i] != NULL) { + struct set_entry *entry = + _mesa_set_search(block->successors[i]->predecessors, block); + assert(entry); + + validate_phi_srcs(block, block->successors[i], state); + } + } + + struct set_entry *entry; + set_foreach(block->predecessors, entry) { + const nir_block *pred = entry->key; + assert(pred->successors[0] == block || + pred->successors[1] == block); + } + + if (!exec_list_is_empty(&block->instr_list) && + nir_block_last_instr(block)->type == nir_instr_type_jump) { + assert(block->successors[1] == NULL); + nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block)); + switch (jump->type) { + case nir_jump_break: { + nir_block *after = + nir_cf_node_as_block(nir_cf_node_next(&state->loop->cf_node)); + assert(block->successors[0] == after); + break; + } + + case nir_jump_continue: { + nir_block *first = + nir_cf_node_as_block(nir_loop_first_cf_node(state->loop)); + assert(block->successors[0] == first); + break; + } + + case nir_jump_return: + assert(block->successors[0] == state->impl->end_block); + break; + + default: + unreachable("bad jump type"); + } + } else { + nir_cf_node *next = nir_cf_node_next(&block->cf_node); + if (next == NULL) { + switch (state->parent_node->type) { + case nir_cf_node_loop: { + nir_block *first = + nir_cf_node_as_block(nir_loop_first_cf_node(state->loop)); + assert(block->successors[0] == first); + /* due to the hack for infinite loops, block->successors[1] may + * point to the block after the loop. + */ + break; + } + + case nir_cf_node_if: { + nir_block *after = + nir_cf_node_as_block(nir_cf_node_next(state->parent_node)); + assert(block->successors[0] == after); + assert(block->successors[1] == NULL); + break; + } + + case nir_cf_node_function: + assert(block->successors[0] == state->impl->end_block); + assert(block->successors[1] == NULL); + break; + + default: + unreachable("unknown control flow node type"); + } + } else { + if (next->type == nir_cf_node_if) { + nir_if *if_stmt = nir_cf_node_as_if(next); + assert(&block->successors[0]->cf_node == + nir_if_first_then_node(if_stmt)); + assert(&block->successors[1]->cf_node == + nir_if_first_else_node(if_stmt)); + } else { + assert(next->type == nir_cf_node_loop); + nir_loop *loop = nir_cf_node_as_loop(next); + assert(&block->successors[0]->cf_node == + nir_loop_first_cf_node(loop)); + assert(block->successors[1] == NULL); + } + } + } +} + +static void +validate_if(nir_if *if_stmt, validate_state *state) +{ + state->if_stmt = if_stmt; + + assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev)); + nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); + assert(prev_node->type == nir_cf_node_block); + + assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next)); + nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node); + assert(next_node->type == nir_cf_node_block); + + validate_src(&if_stmt->condition, state); + + assert(!exec_list_is_empty(&if_stmt->then_list)); + assert(!exec_list_is_empty(&if_stmt->else_list)); + + nir_cf_node *old_parent = state->parent_node; + state->parent_node = &if_stmt->cf_node; + + exec_list_validate(&if_stmt->then_list); + foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->then_list) { + validate_cf_node(cf_node, state); + } + + exec_list_validate(&if_stmt->else_list); + foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->else_list) { + validate_cf_node(cf_node, state); + } + + state->parent_node = old_parent; + state->if_stmt = NULL; +} + +static void +validate_loop(nir_loop *loop, validate_state *state) +{ + assert(!exec_node_is_head_sentinel(loop->cf_node.node.prev)); + nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node); + assert(prev_node->type == nir_cf_node_block); + + assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next)); + nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node); + assert(next_node->type == nir_cf_node_block); + + assert(!exec_list_is_empty(&loop->body)); + + nir_cf_node *old_parent = state->parent_node; + state->parent_node = &loop->cf_node; + nir_loop *old_loop = state->loop; + state->loop = loop; + + exec_list_validate(&loop->body); + foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { + validate_cf_node(cf_node, state); + } + + state->parent_node = old_parent; + state->loop = old_loop; +} + +static void +validate_cf_node(nir_cf_node *node, validate_state *state) +{ + assert(node->parent == state->parent_node); + + switch (node->type) { + case nir_cf_node_block: + validate_block(nir_cf_node_as_block(node), state); + break; + + case nir_cf_node_if: + validate_if(nir_cf_node_as_if(node), state); + break; + + case nir_cf_node_loop: + validate_loop(nir_cf_node_as_loop(node), state); + break; + + default: + unreachable("Invalid CF node type"); + } +} + +static void +prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state) +{ + assert(reg->is_global == is_global); + + if (is_global) + assert(reg->index < state->shader->reg_alloc); + else + assert(reg->index < state->impl->reg_alloc); + assert(!BITSET_TEST(state->regs_found, reg->index)); + BITSET_SET(state->regs_found, reg->index); + + list_validate(®->uses); + list_validate(®->defs); + list_validate(®->if_uses); + + reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state); + reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + reg_state->where_defined = is_global ? NULL : state->impl; + + _mesa_hash_table_insert(state->regs, reg, reg_state); +} + +static void +postvalidate_reg_decl(nir_register *reg, validate_state *state) +{ + struct hash_entry *entry = _mesa_hash_table_search(state->regs, reg); + + reg_validate_state *reg_state = (reg_validate_state *) entry->data; + + nir_foreach_use(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->uses, src); + assert(entry); + _mesa_set_remove(reg_state->uses, entry); + } + + if (reg_state->uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(reg_state->uses, entry) + printf("%p\n", entry->key); + + abort(); + } + + nir_foreach_if_use(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->if_uses, src); + assert(entry); + _mesa_set_remove(reg_state->if_uses, entry); + } + + if (reg_state->if_uses->entries != 0) { + printf("extra entries in register if_uses:\n"); + struct set_entry *entry; + set_foreach(reg_state->if_uses, entry) + printf("%p\n", entry->key); + + abort(); + } + + nir_foreach_def(reg, src) { + struct set_entry *entry = _mesa_set_search(reg_state->defs, src); + assert(entry); + _mesa_set_remove(reg_state->defs, entry); + } + + if (reg_state->defs->entries != 0) { + printf("extra entries in register defs:\n"); + struct set_entry *entry; + set_foreach(reg_state->defs, entry) + printf("%p\n", entry->key); + + abort(); + } +} + +static void +validate_var_decl(nir_variable *var, bool is_global, validate_state *state) +{ + assert(is_global != (var->data.mode == nir_var_local)); + + /* + * TODO validate some things ir_validate.cpp does (requires more GLSL type + * support) + */ + + if (!is_global) { + _mesa_hash_table_insert(state->var_defs, var, state->impl); + } +} + +static bool +postvalidate_ssa_def(nir_ssa_def *def, void *void_state) +{ + validate_state *state = void_state; + + struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); + ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; + + nir_foreach_use(def, src) { + struct set_entry *entry = _mesa_set_search(def_state->uses, src); + assert(entry); + _mesa_set_remove(def_state->uses, entry); + } + + if (def_state->uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(def_state->uses, entry) + printf("%p\n", entry->key); + + abort(); + } + + nir_foreach_if_use(def, src) { + struct set_entry *entry = _mesa_set_search(def_state->if_uses, src); + assert(entry); + _mesa_set_remove(def_state->if_uses, entry); + } + + if (def_state->if_uses->entries != 0) { + printf("extra entries in register uses:\n"); + struct set_entry *entry; + set_foreach(def_state->if_uses, entry) + printf("%p\n", entry->key); + + abort(); + } + + return true; +} + +static bool +postvalidate_ssa_defs_block(nir_block *block, void *state) +{ + nir_foreach_instr(block, instr) + nir_foreach_ssa_def(instr, postvalidate_ssa_def, state); + + return true; +} + +static void +validate_function_impl(nir_function_impl *impl, validate_state *state) +{ + assert(impl->function->impl == impl); + assert(impl->cf_node.parent == NULL); + + assert(impl->num_params == impl->function->num_params); + for (unsigned i = 0; i < impl->num_params; i++) + assert(impl->params[i]->type == impl->function->params[i].type); + + if (glsl_type_is_void(impl->function->return_type)) + assert(impl->return_var == NULL); + else + assert(impl->return_var->type == impl->function->return_type); + + assert(exec_list_is_empty(&impl->end_block->instr_list)); + assert(impl->end_block->successors[0] == NULL); + assert(impl->end_block->successors[1] == NULL); + + state->impl = impl; + state->parent_node = &impl->cf_node; + + exec_list_validate(&impl->locals); + nir_foreach_variable(var, &impl->locals) { + validate_var_decl(var, false, state); + } + + state->regs_found = realloc(state->regs_found, + BITSET_WORDS(impl->reg_alloc) * + sizeof(BITSET_WORD)); + memset(state->regs_found, 0, BITSET_WORDS(impl->reg_alloc) * + sizeof(BITSET_WORD)); + exec_list_validate(&impl->registers); + foreach_list_typed(nir_register, reg, node, &impl->registers) { + prevalidate_reg_decl(reg, false, state); + } + + state->ssa_defs_found = realloc(state->ssa_defs_found, + BITSET_WORDS(impl->ssa_alloc) * + sizeof(BITSET_WORD)); + memset(state->ssa_defs_found, 0, BITSET_WORDS(impl->ssa_alloc) * + sizeof(BITSET_WORD)); + exec_list_validate(&impl->body); + foreach_list_typed(nir_cf_node, node, node, &impl->body) { + validate_cf_node(node, state); + } + + foreach_list_typed(nir_register, reg, node, &impl->registers) { + postvalidate_reg_decl(reg, state); + } + + nir_foreach_block(impl, postvalidate_ssa_defs_block, state); +} + +static void +validate_function(nir_function *func, validate_state *state) +{ + if (func->impl != NULL) { + assert(func->impl->function == func); + validate_function_impl(func->impl, state); + } +} + +static void +init_validate_state(validate_state *state) +{ + state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->ssa_defs_found = NULL; + state->regs_found = NULL; + state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + state->loop = NULL; +} + +static void +destroy_validate_state(validate_state *state) +{ + _mesa_hash_table_destroy(state->regs, NULL); + _mesa_hash_table_destroy(state->ssa_defs, NULL); + free(state->ssa_defs_found); + free(state->regs_found); + _mesa_hash_table_destroy(state->var_defs, NULL); +} + +void +nir_validate_shader(nir_shader *shader) +{ + validate_state state; + init_validate_state(&state); + + state.shader = shader; + + exec_list_validate(&shader->uniforms); + nir_foreach_variable(var, &shader->uniforms) { + validate_var_decl(var, true, &state); + } + + exec_list_validate(&shader->inputs); + nir_foreach_variable(var, &shader->inputs) { + validate_var_decl(var, true, &state); + } + + exec_list_validate(&shader->outputs); + nir_foreach_variable(var, &shader->outputs) { + validate_var_decl(var, true, &state); + } + + exec_list_validate(&shader->globals); + nir_foreach_variable(var, &shader->globals) { + validate_var_decl(var, true, &state); + } + + exec_list_validate(&shader->system_values); + nir_foreach_variable(var, &shader->system_values) { + validate_var_decl(var, true, &state); + } + + state.regs_found = realloc(state.regs_found, + BITSET_WORDS(shader->reg_alloc) * + sizeof(BITSET_WORD)); + memset(state.regs_found, 0, BITSET_WORDS(shader->reg_alloc) * + sizeof(BITSET_WORD)); + exec_list_validate(&shader->registers); + foreach_list_typed(nir_register, reg, node, &shader->registers) { + prevalidate_reg_decl(reg, true, &state); + } + + exec_list_validate(&shader->functions); + foreach_list_typed(nir_function, func, node, &shader->functions) { + validate_function(func, &state); + } + + foreach_list_typed(nir_register, reg, node, &shader->registers) { + postvalidate_reg_decl(reg, &state); + } + + destroy_validate_state(&state); +} + +#endif /* NDEBUG */ diff --git a/src/compiler/nir/nir_vla.h b/src/compiler/nir/nir_vla.h new file mode 100644 index 00000000000..753783316a2 --- /dev/null +++ b/src/compiler/nir/nir_vla.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2015 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#pragma once + + +#include "c99_alloca.h" + + +/* Declare a variable length array, with no initialization */ +#define NIR_VLA(_type, _name, _length) \ + _type *_name = alloca((_length) * sizeof *_name) + + +/* Declare a variable length array, and initialize it with the given byte. + * + * _length is evaluated twice, so expressions with side-effects must be + * avoided. + */ +#define NIR_VLA_FILL(_type, _name, _length, _byte) \ + _type *_name = memset(alloca((_length) * sizeof *_name), _byte, (_length) * sizeof *_name) + + +/* Declare a variable length array, and zero it. + * + * Just like NIR_VLA_FILL, _length is evaluated twice, so expressions with + * side-effects must be avoided. + */ +#define NIR_VLA_ZERO(_type, _name, _length) \ + NIR_VLA_FILL(_type, _name, _length, 0) diff --git a/src/compiler/nir/nir_worklist.c b/src/compiler/nir/nir_worklist.c new file mode 100644 index 00000000000..3087a1d2354 --- /dev/null +++ b/src/compiler/nir/nir_worklist.c @@ -0,0 +1,144 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "nir_worklist.h" + +void +nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks, + void *mem_ctx) +{ + w->size = num_blocks; + w->count = 0; + w->start = 0; + + w->blocks_present = rzalloc_array(mem_ctx, BITSET_WORD, + BITSET_WORDS(num_blocks)); + w->blocks = ralloc_array(mem_ctx, nir_block *, num_blocks); +} + +void +nir_block_worklist_fini(nir_block_worklist *w) +{ + ralloc_free(w->blocks_present); + ralloc_free(w->blocks); +} + +static bool +worklist_add_block(nir_block *block, void *w) +{ + nir_block_worklist_push_tail(w, block); + + return true; +} + +void +nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl) +{ + nir_foreach_block(impl, worklist_add_block, w); +} + +void +nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block) +{ + /* Pushing a block we already have is a no-op */ + if (BITSET_TEST(w->blocks_present, block->index)) + return; + + assert(w->count < w->size); + + if (w->start == 0) + w->start = w->size - 1; + else + w->start--; + + w->count++; + + w->blocks[w->start] = block; + BITSET_SET(w->blocks_present, block->index); +} + +nir_block * +nir_block_worklist_peek_head(const nir_block_worklist *w) +{ + assert(w->count > 0); + + return w->blocks[w->start]; +} + +nir_block * +nir_block_worklist_pop_head(nir_block_worklist *w) +{ + assert(w->count > 0); + + unsigned head = w->start; + + w->start = (w->start + 1) % w->size; + w->count--; + + BITSET_CLEAR(w->blocks_present, w->blocks[head]->index); + return w->blocks[head]; +} + +void +nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block) +{ + /* Pushing a block we already have is a no-op */ + if (BITSET_TEST(w->blocks_present, block->index)) + return; + + assert(w->count < w->size); + + w->count++; + + unsigned tail = (w->start + w->count - 1) % w->size; + + w->blocks[tail] = block; + BITSET_SET(w->blocks_present, block->index); +} + +nir_block * +nir_block_worklist_peek_tail(const nir_block_worklist *w) +{ + assert(w->count > 0); + + unsigned tail = (w->start + w->count - 1) % w->size; + + return w->blocks[tail]; +} + +nir_block * +nir_block_worklist_pop_tail(nir_block_worklist *w) +{ + assert(w->count > 0); + + unsigned tail = (w->start + w->count - 1) % w->size; + + w->count--; + + BITSET_CLEAR(w->blocks_present, w->blocks[tail]->index); + return w->blocks[tail]; +} diff --git a/src/compiler/nir/nir_worklist.h b/src/compiler/nir/nir_worklist.h new file mode 100644 index 00000000000..829bff24a55 --- /dev/null +++ b/src/compiler/nir/nir_worklist.h @@ -0,0 +1,91 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#pragma once + +#ifndef _NIR_WORKLIST_ +#define _NIR_WORKLIST_ + +#include "nir.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** Represents a double-ended queue of unique blocks + * + * The worklist datastructure guarantees that eacy block is in the queue at + * most once. Pushing a block onto either end of the queue is a no-op if + * the block is already in the queue. In order for this to work, the + * caller must ensure that the blocks are properly indexed. + */ +typedef struct { + /* The total size of the worklist */ + unsigned size; + + /* The number of blocks currently in the worklist */ + unsigned count; + + /* The offset in the array of blocks at which the list starts */ + unsigned start; + + /* A bitset of all of the blocks currently present in the worklist */ + BITSET_WORD *blocks_present; + + /* The actual worklist */ + nir_block **blocks; +} nir_block_worklist; + +void nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks, + void *mem_ctx); +void nir_block_worklist_fini(nir_block_worklist *w); + +void nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl); + +static inline bool +nir_block_worklist_is_empty(const nir_block_worklist *w) +{ + return w->count == 0; +} + +void nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block); + +nir_block *nir_block_worklist_peek_head(const nir_block_worklist *w); + +nir_block *nir_block_worklist_pop_head(nir_block_worklist *w); + +void nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block); + +nir_block *nir_block_worklist_peek_tail(const nir_block_worklist *w); + +nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* _NIR_WORKLIST_ */ diff --git a/src/compiler/nir/tests/control_flow_tests.cpp b/src/compiler/nir/tests/control_flow_tests.cpp new file mode 100644 index 00000000000..b9379ef3b06 --- /dev/null +++ b/src/compiler/nir/tests/control_flow_tests.cpp @@ -0,0 +1,148 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "nir.h" +#include "nir_builder.h" + +class nir_cf_test : public ::testing::Test { +protected: + nir_cf_test(); + ~nir_cf_test(); + + nir_builder b; +}; + +nir_cf_test::nir_cf_test() +{ + static const nir_shader_compiler_options options = { }; + nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, &options); +} + +nir_cf_test::~nir_cf_test() +{ + ralloc_free(b.shader); +} + +TEST_F(nir_cf_test, delete_break_in_loop) +{ + /* Create IR: + * + * while (...) { break; } + */ + nir_loop *loop = nir_loop_create(b.shader); + nir_cf_node_insert(nir_after_cf_list(&b.impl->body), &loop->cf_node); + + b.cursor = nir_after_cf_list(&loop->body); + + nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break); + nir_builder_instr_insert(&b, &jump->instr); + + /* At this point, we should have: + * + * impl main { + * block block_0: + * // preds: + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 + * break + * // succs: block_2 + * } + * block block_2: + * // preds: block_1 + * // succs: block_3 + * block block_3: + * } + */ + nir_block *block_0 = nir_start_block(b.impl); + nir_block *block_1 = nir_cf_node_as_block(nir_loop_first_cf_node(loop)); + nir_block *block_2 = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); + nir_block *block_3 = b.impl->end_block; + ASSERT_EQ(nir_cf_node_block, block_0->cf_node.type); + ASSERT_EQ(nir_cf_node_block, block_1->cf_node.type); + ASSERT_EQ(nir_cf_node_block, block_2->cf_node.type); + ASSERT_EQ(nir_cf_node_block, block_3->cf_node.type); + + /* Verify the successors and predecessors. */ + EXPECT_EQ(block_1, block_0->successors[0]); + EXPECT_EQ(NULL, block_0->successors[1]); + EXPECT_EQ(block_2, block_1->successors[0]); + EXPECT_EQ(NULL, block_1->successors[1]); + EXPECT_EQ(block_3, block_2->successors[0]); + EXPECT_EQ(NULL, block_2->successors[1]); + EXPECT_EQ(NULL, block_3->successors[0]); + EXPECT_EQ(NULL, block_3->successors[1]); + EXPECT_EQ(0, block_0->predecessors->entries); + EXPECT_EQ(1, block_1->predecessors->entries); + EXPECT_EQ(1, block_2->predecessors->entries); + EXPECT_EQ(1, block_3->predecessors->entries); + EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0)); + EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1)); + EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2)); + + nir_print_shader(b.shader, stderr); + + /* Now remove the break. */ + nir_instr_remove(&jump->instr); + + nir_print_shader(b.shader, stderr); + + /* At this point, we should have: + * + * impl main { + * block block_0: + * // preds: + * // succs: block_1 + * loop { + * block block_1: + * // preds: block_0 block_1 + * // succs: block_1 + * } + * block block_2: + * // preds: block_1 + * // succs: block_3 + * block block_3: + * } + * + * Re-verify the predecessors and successors. + */ + EXPECT_EQ(block_1, block_0->successors[0]); + EXPECT_EQ(NULL, block_0->successors[1]); + EXPECT_EQ(block_1, block_1->successors[0]); /* back to itself */ + EXPECT_EQ(block_2, block_1->successors[1]); /* fake successor */ + EXPECT_EQ(block_3, block_2->successors[0]); + EXPECT_EQ(NULL, block_2->successors[1]); + EXPECT_EQ(NULL, block_3->successors[0]); + EXPECT_EQ(NULL, block_3->successors[1]); + EXPECT_EQ(0, block_0->predecessors->entries); + EXPECT_EQ(2, block_1->predecessors->entries); + EXPECT_EQ(1, block_2->predecessors->entries); + EXPECT_EQ(1, block_3->predecessors->entries); + EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0)); + EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_1)); + EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1)); + EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2)); + + nir_metadata_require(b.impl, nir_metadata_dominance); +} diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am index bcdf297030f..0ac5c9802f2 100644 --- a/src/gallium/auxiliary/Makefile.am +++ b/src/gallium/auxiliary/Makefile.am @@ -18,7 +18,7 @@ libgallium_nir_la_SOURCES = \ $(NIR_SOURCES) libgallium_nir_la_CFLAGS = \ - -I$(top_builddir)/src/glsl/nir \ + -I$(top_builddir)/src/compiler/nir \ $(GALLIUM_CFLAGS) \ $(VISIBILITY_CFLAGS) \ $(MSVC2013_COMPAT_CFLAGS) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 0d6eb76eabd..6649e403d08 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -23,13 +23,13 @@ */ #include "util/ralloc.h" -#include "glsl/nir/nir.h" -#include "glsl/nir/nir_control_flow.h" -#include "glsl/nir/nir_builder.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_control_flow.h" +#include "compiler/nir/nir_builder.h" #include "glsl/list.h" #include "compiler/shader_enums.h" -#include "nir/tgsi_to_nir.h" +#include "tgsi_to_nir.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_info.h" diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.h b/src/gallium/auxiliary/nir/tgsi_to_nir.h index 1a185a83219..0651870ea80 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.h +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.h @@ -21,7 +21,7 @@ * IN THE SOFTWARE. */ -#include "glsl/nir/nir.h" +#include "compiler/nir/nir.h" struct nir_shader_compiler_options *options; diff --git a/src/gallium/drivers/freedreno/Makefile.am b/src/gallium/drivers/freedreno/Makefile.am index 3de8e0fd5ad..329a4204c3a 100644 --- a/src/gallium/drivers/freedreno/Makefile.am +++ b/src/gallium/drivers/freedreno/Makefile.am @@ -4,7 +4,7 @@ include $(top_srcdir)/src/gallium/Automake.inc AM_CFLAGS = \ -Wno-packed-bitfield-compat \ -I$(top_srcdir)/src/gallium/drivers/freedreno/ir3 \ - -I$(top_builddir)/src/glsl/nir \ + -I$(top_builddir)/src/compiler/nir \ $(GALLIUM_DRIVER_CFLAGS) \ $(FREEDRENO_CFLAGS) @@ -27,7 +27,7 @@ ir3_compiler_SOURCES = \ ir3_compiler_LDADD = \ libfreedreno.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ - $(top_builddir)/src/glsl/libnir.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ $(top_builddir)/src/util/libmesautil.la \ $(GALLIUM_COMMON_LIB_DEPS) \ $(FREEDRENO_LIBS) diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir.h b/src/gallium/drivers/freedreno/ir3/ir3_nir.h index 311a40338e5..e2d88596094 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir.h +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir.h @@ -29,7 +29,7 @@ #ifndef IR3_NIR_H_ #define IR3_NIR_H_ -#include "glsl/nir/nir.h" +#include "compiler/nir/nir.h" #include "compiler/shader_enums.h" #include "ir3_shader.h" diff --git a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c index 6eee2ebbab6..8815ac981eb 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_nir_lower_if_else.c @@ -28,8 +28,8 @@ */ #include "ir3_nir.h" -#include "glsl/nir/nir_builder.h" -#include "glsl/nir/nir_control_flow.h" +#include "compiler/nir/nir_builder.h" +#include "compiler/nir/nir_control_flow.h" /* Based on nir_opt_peephole_select, and hacked up to more aggressively * flatten anything that can be flattened diff --git a/src/gallium/drivers/vc4/Makefile.am b/src/gallium/drivers/vc4/Makefile.am index a3bf72fc72a..caa5059fbd3 100644 --- a/src/gallium/drivers/vc4/Makefile.am +++ b/src/gallium/drivers/vc4/Makefile.am @@ -27,7 +27,7 @@ SIM_LDFLAGS = -lsimpenrose endif AM_CFLAGS = \ - -I$(top_builddir)/src/glsl/nir \ + -I$(top_builddir)/src/compiler/nir \ $(LIBDRM_CFLAGS) \ $(GALLIUM_DRIVER_CFLAGS) \ $(SIM_CFLAGS) \ diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c index 6d9a624c9b0..a13e309985a 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_blend.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_blend.c @@ -41,7 +41,7 @@ */ #include "util/u_format.h" #include "vc4_qir.h" -#include "glsl/nir/nir_builder.h" +#include "compiler/nir/nir_builder.h" #include "vc4_context.h" static bool diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_io.c b/src/gallium/drivers/vc4/vc4_nir_lower_io.c index bf6631e944e..d47e3bf52b0 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_io.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_io.c @@ -22,7 +22,7 @@ */ #include "vc4_qir.h" -#include "glsl/nir/nir_builder.h" +#include "compiler/nir/nir_builder.h" #include "util/u_format.h" /** diff --git a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c index 2490819c297..6a952c62d5f 100644 --- a/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c +++ b/src/gallium/drivers/vc4/vc4_nir_lower_txf_ms.c @@ -24,7 +24,7 @@ #include "vc4_qir.h" #include "kernel/vc4_packet.h" #include "tgsi/tgsi_info.h" -#include "glsl/nir/nir_builder.h" +#include "compiler/nir/nir_builder.h" /** @file vc4_nir_lower_txf_ms.c * Walks the NIR generated by TGSI-to-NIR to lower its nir_texop_txf_ms diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 3e402d048ba..7db1b03cbc1 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -32,8 +32,8 @@ #include "tgsi/tgsi_dump.h" #include "tgsi/tgsi_lowering.h" #include "tgsi/tgsi_parse.h" -#include "glsl/nir/nir.h" -#include "glsl/nir/nir_builder.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" #include "nir/tgsi_to_nir.h" #include "vc4_context.h" #include "vc4_qpu.h" diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 4ab4d35d0ca..bae31768bd8 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -32,7 +32,7 @@ #include #include "util/macros.h" -#include "glsl/nir/nir.h" +#include "compiler/nir/nir.h" #include "util/list.h" #include "util/u_math.h" diff --git a/src/gallium/targets/pipe-loader/Makefile.am b/src/gallium/targets/pipe-loader/Makefile.am index 4bc3b55f26b..0b516de0b5b 100644 --- a/src/gallium/targets/pipe-loader/Makefile.am +++ b/src/gallium/targets/pipe-loader/Makefile.am @@ -53,7 +53,7 @@ endif PIPE_LIBS += \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ - $(top_builddir)/src/glsl/libnir.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ $(top_builddir)/src/util/libmesautil.la \ $(top_builddir)/src/gallium/drivers/rbug/librbug.la \ $(top_builddir)/src/gallium/drivers/trace/libtrace.la \ diff --git a/src/gallium/targets/xa/Makefile.am b/src/gallium/targets/xa/Makefile.am index a63fd6903a4..cdd9a862e4a 100644 --- a/src/gallium/targets/xa/Makefile.am +++ b/src/gallium/targets/xa/Makefile.am @@ -37,7 +37,7 @@ libxatracker_la_LIBADD = \ $(top_builddir)/src/gallium/state_trackers/xa/libxatracker.la \ $(top_builddir)/src/gallium/auxiliary/libgalliumvl_stub.la \ $(top_builddir)/src/gallium/auxiliary/libgallium.la \ - $(top_builddir)/src/glsl/libnir.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ $(top_builddir)/src/util/libmesautil.la \ $(LIBDRM_LIBS) \ $(GALLIUM_COMMON_LIB_DEPS) diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk index 59cc8577a6e..c5741b40bc5 100644 --- a/src/glsl/Android.gen.mk +++ b/src/glsl/Android.gen.mk @@ -33,17 +33,10 @@ LOCAL_SRC_FILES := $(LOCAL_SRC_FILES) LOCAL_C_INCLUDES += \ $(intermediates)/glcpp \ - $(intermediates)/nir \ $(MESA_TOP)/src/glsl/glcpp \ - $(MESA_TOP)/src/glsl/nir - -LOCAL_EXPORT_C_INCLUDE_DIRS += \ - $(intermediates)/nir \ - $(MESA_TOP)/src/glsl/nir LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ $(LIBGLCPP_GENERATED_FILES) \ - $(NIR_GENERATED_FILES) \ $(LIBGLSL_GENERATED_CXX_FILES)) define local-l-or-ll-to-c-or-cpp @@ -81,50 +74,3 @@ $(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l $(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y $(call glsl_local-y-to-c-and-h) - -nir_builder_opcodes_gen := $(LOCAL_PATH)/nir/nir_builder_opcodes_h.py -nir_builder_opcodes_deps := \ - $(LOCAL_PATH)/nir/nir_opcodes.py \ - $(LOCAL_PATH)/nir/nir_builder_opcodes_h.py - -$(intermediates)/nir/nir_builder_opcodes.h: $(nir_builder_opcodes_deps) - @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $(nir_builder_opcodes_gen) $< > $@ - -nir_constant_expressions_gen := $(LOCAL_PATH)/nir/nir_constant_expressions.py -nir_constant_expressions_deps := \ - $(LOCAL_PATH)/nir/nir_opcodes.py \ - $(LOCAL_PATH)/nir/nir_constant_expressions.py - -$(intermediates)/nir/nir_constant_expressions.c: $(nir_constant_expressions_deps) - @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $(nir_constant_expressions_gen) $< > $@ - -nir_opcodes_h_gen := $(LOCAL_PATH)/nir/nir_opcodes_h.py -nir_opcodes_h_deps := \ - $(LOCAL_PATH)/nir/nir_opcodes.py \ - $(LOCAL_PATH)/nir/nir_opcodes_h.py - -$(intermediates)/nir/nir_opcodes.h: $(nir_opcodes_h_deps) - @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $(nir_opcodes_h_gen) $< > $@ - -$(LOCAL_PATH)/nir/nir.h: $(intermediates)/nir/nir_opcodes.h - -nir_opcodes_c_gen := $(LOCAL_PATH)/nir/nir_opcodes_c.py -nir_opcodes_c_deps := \ - $(LOCAL_PATH)/nir/nir_opcodes.py \ - $(LOCAL_PATH)/nir/nir_opcodes_c.py - -$(intermediates)/nir/nir_opcodes.c: $(nir_opcodes_c_deps) - @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $(nir_opcodes_c_gen) $< > $@ - -nir_opt_algebraic_gen := $(LOCAL_PATH)/nir/nir_opt_algebraic.py -nir_opt_algebraic_deps := \ - $(LOCAL_PATH)/nir/nir_opt_algebraic.py \ - $(LOCAL_PATH)/nir/nir_algebraic.py - -$(intermediates)/nir/nir_opt_algebraic.c: $(nir_opt_algebraic_deps) - @mkdir -p $(dir $@) - $(hide) $(MESA_PYTHON2) $(nir_opt_algebraic_gen) $< > $@ diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am index 07d5267df5a..9954b812403 100644 --- a/src/glsl/Makefile.am +++ b/src/glsl/Makefile.am @@ -27,9 +27,7 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/glsl/glcpp \ - -I$(top_srcdir)/src/glsl/nir \ -I$(top_srcdir)/src/gtest/include \ - -I$(top_builddir)/src/glsl/nir \ $(DEFINES) AM_CFLAGS = \ $(VISIBILITY_CFLAGS) \ @@ -43,21 +41,12 @@ EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \ glsl_parser.yy \ glcpp/glcpp-lex.l \ glcpp/glcpp-parse.y \ - nir/nir_algebraic.py \ - nir/nir_builder_opcodes_h.py \ - nir/nir_constant_expressions.py \ - nir/nir_opcodes.py \ - nir/nir_opcodes_c.py \ - nir/nir_opcodes_h.py \ - nir/nir_opt_algebraic.py \ - nir/tests \ SConscript include Makefile.sources TESTS = glcpp/tests/glcpp-test \ glcpp/tests/glcpp-test-cr-lf \ - nir/tests/control_flow_tests \ tests/blob-test \ tests/general-ir-test \ tests/optimization-test \ @@ -68,11 +57,10 @@ TESTS_ENVIRONMENT= \ export PYTHON2=$(PYTHON2); \ export PYTHON_FLAGS=$(PYTHON_FLAGS); -noinst_LTLIBRARIES = libnir.la libglsl.la libglcpp.la +noinst_LTLIBRARIES = libglsl.la libglcpp.la check_PROGRAMS = \ glcpp/glcpp \ glsl_test \ - nir/tests/control_flow_tests \ tests/blob-test \ tests/general-ir-test \ tests/sampler-types-test \ @@ -138,24 +126,15 @@ glcpp_glcpp_LDADD = \ -lm libglsl_la_LIBADD = \ - $(top_builddir)/src/compiler/libcompiler.la \ + $(top_builddir)/src/compiler/nir/libnir.la \ libglcpp.la libglsl_la_SOURCES = \ glsl_lexer.cpp \ glsl_parser.cpp \ glsl_parser.h \ - $(LIBGLSL_FILES) \ - $(NIR_FILES) \ - $(NIR_GENERATED_FILES) \ - $(GLSL_TO_NIR_FILES) + $(LIBGLSL_FILES) -libnir_la_LIBADD = \ - $(top_builddir)/src/compiler/libcompiler.la - -libnir_la_SOURCES = \ - $(NIR_FILES) \ - $(NIR_GENERATED_FILES) glsl_compiler_SOURCES = \ $(GLSL_COMPILER_CXX_FILES) @@ -235,8 +214,7 @@ BUILT_SOURCES = \ glsl_parser.cpp \ glsl_lexer.cpp \ glcpp/glcpp-parse.c \ - glcpp/glcpp-lex.c \ - $(NIR_GENERATED_FILES) + glcpp/glcpp-lex.c CLEANFILES = \ glcpp/glcpp-parse.h \ glsl_parser.h \ @@ -248,35 +226,3 @@ clean-local: dist-hook: $(RM) glcpp/tests/*.out $(RM) glcpp/tests/subtest*/*.out - -PYTHON_GEN = $(AM_V_GEN)$(PYTHON2) $(PYTHON_FLAGS) - -nir/nir_builder_opcodes.h: nir/nir_opcodes.py nir/nir_builder_opcodes_h.py - $(MKDIR_GEN) - $(PYTHON_GEN) $(srcdir)/nir/nir_builder_opcodes_h.py > $@ || ($(RM) $@; false) - -nir/nir_constant_expressions.c: nir/nir_opcodes.py nir/nir_constant_expressions.py - $(MKDIR_GEN) - $(PYTHON_GEN) $(srcdir)/nir/nir_constant_expressions.py > $@ || ($(RM) $@; false) - -nir/nir_opcodes.h: nir/nir_opcodes.py nir/nir_opcodes_h.py - $(MKDIR_GEN) - $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_h.py > $@ || ($(RM) $@; false) - -nir/nir_opcodes.c: nir/nir_opcodes.py nir/nir_opcodes_c.py - $(MKDIR_GEN) - $(PYTHON_GEN) $(srcdir)/nir/nir_opcodes_c.py > $@ || ($(RM) $@; false) - -nir/nir_opt_algebraic.c: nir/nir_opt_algebraic.py nir/nir_algebraic.py - $(MKDIR_GEN) - $(PYTHON_GEN) $(srcdir)/nir/nir_opt_algebraic.py > $@ || ($(RM) $@; false) - -nir_tests_control_flow_tests_SOURCES = \ - nir/tests/control_flow_tests.cpp -nir_tests_control_flow_tests_CFLAGS = \ - $(PTHREAD_CFLAGS) -nir_tests_control_flow_tests_LDADD = \ - $(top_builddir)/src/gtest/libgtest.la \ - $(top_builddir)/src/glsl/libnir.la \ - $(top_builddir)/src/util/libmesautil.la \ - $(PTHREAD_LIBS) diff --git a/src/glsl/nir/.gitignore b/src/glsl/nir/.gitignore deleted file mode 100644 index 64828eba6d3..00000000000 --- a/src/glsl/nir/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -nir_builder_opcodes.h -nir_opt_algebraic.c -nir_opcodes.c -nir_opcodes.h -nir_constant_expressions.c diff --git a/src/glsl/nir/README b/src/glsl/nir/README deleted file mode 100644 index 2c81db9db61..00000000000 --- a/src/glsl/nir/README +++ /dev/null @@ -1,118 +0,0 @@ -New IR, or NIR, is an IR for Mesa intended to sit below GLSL IR and Mesa IR. -Its design inherits from the various IR's that Mesa has used in the past, as -well as Direct3D assembly, and it includes a few new ideas as well. It is a -flat (in terms of using instructions instead of expressions), typeless IR, -similar to TGSI and Mesa IR. It also supports SSA (although it doesn't require -it). - -Variables -========= - -NIR includes support for source-level GLSL variables through a structure mostly -copied from GLSL IR. These will be used for linking and conversion from GLSL IR -(and later, from an AST), but for the most part, they will be lowered to -registers (see below) and loads/stores. - -Registers -========= - -Registers are light-weight; they consist of a structure that only contains its -size, its index for liveness analysis, and an optional name for debugging. In -addition, registers can be local to a function or global to the entire shader; -the latter will be used in ARB_shader_subroutine for passing parameters and -getting return values from subroutines. Registers can also be an array, in which -case they can be accessed indirectly. Each ALU instruction (add, subtract, etc.) -works directly with registers or SSA values (see below). - -SSA -======== - -Everywhere a register can be loaded/stored, an SSA value can be used instead. -The only exception is that arrays/indirect addressing are not supported with -SSA; although research has been done on extensions of SSA to arrays before, it's -usually for the purpose of parallelization (which we're not interested in), and -adds some overhead in the form of adding copies or extra arrays (which is much -more expensive than introducing copies between non-array registers). SSA uses -point directly to their corresponding definition, which in turn points to the -instruction it is part of. This creates an implicit use-def chain and avoids the -need for an external structure for each SSA register. - -Functions -========= - -Support for function calls is mostly similar to GLSL IR. Each shader contains a -list of functions, and each function has a list of overloads. Each overload -contains a list of parameters, and may contain an implementation which specifies -the variables that correspond to the parameters and return value. Inlining a -function, assuming it has a single return point, is as simple as copying its -instructions, registers, and local variables into the target function and then -inserting copies to and from the new parameters as appropriate. After functions -are inlined and any non-subroutine functions are deleted, parameters and return -variables will be converted to global variables and then global registers. We -don't do this lowering earlier (i.e. the fortranizer idea) for a few reasons: - -- If we want to do optimizations before link time, we need to have the function -signature available during link-time. - -- If we do any inlining before link time, then we might wind up with the -inlined function and the non-inlined function using the same global -variables/registers which would preclude optimization. - -Intrinsics -========= - -Any operation (other than function calls and textures) which touches a variable -or is not referentially transparent is represented by an intrinsic. Intrinsics -are similar to the idea of a "builtin function," i.e. a function declaration -whose implementation is provided by the backend, except they are more powerful -in the following ways: - -- They can also load and store registers when appropriate, which limits the -number of variables needed in later stages of the IR while obviating the need -for a separate load/store variable instruction. - -- Intrinsics can be marked as side-effect free, which permits them to be -treated like any other instruction when it comes to optimizations. This allows -load intrinsics to be represented as intrinsics while still being optimized -away by dead code elimination, common subexpression elimination, etc. - -Intrinsics are used for: - -- Atomic operations -- Memory barriers -- Subroutine calls -- Geometry shader emitVertex and endPrimitive -- Loading and storing variables (before lowering) -- Loading and storing uniforms, shader inputs and outputs, etc (after lowering) -- Copying variables (cases where in GLSL the destination is a structure or -array) -- The kitchen sink -- ... - -Textures -========= - -Unfortunately, there are far too many texture operations to represent each one -of them with an intrinsic, so there's a special texture instruction similar to -the GLSL IR one. The biggest difference is that, while the texture instruction -has a sampler dereference field used just like in GLSL IR, this gets lowered to -a texture unit index (with a possible indirect offset) while the type -information of the original sampler is kept around for backends. Also, all the -non-constant sources are stored in a single array to make it easier for -optimization passes to iterate over all the sources. - -Control Flow -========= - -Like in GLSL IR, control flow consists of a tree of "control flow nodes", which -include if statements and loops, and jump instructions (break, continue, and -return). Unlike GLSL IR, though, the leaves of the tree aren't statements but -basic blocks. Each basic block also keeps track of its successors and -predecessors, and function implementations keep track of the beginning basic -block (the first basic block of the function) and the ending basic block (a fake -basic block that every return statement points to). Together, these elements -make up the control flow graph, in this case a redundant piece of information on -top of the control flow tree that will be used by almost all the optimizations. -There are helper functions to add and remove control flow nodes that also update -the control flow graph, and so usually it doesn't need to be touched by passes -that modify control flow nodes. diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp deleted file mode 100644 index c7399ebba0b..00000000000 --- a/src/glsl/nir/glsl_to_nir.cpp +++ /dev/null @@ -1,2031 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "glsl_to_nir.h" -#include "nir_control_flow.h" -#include "nir_builder.h" -#include "ir_visitor.h" -#include "ir_hierarchical_visitor.h" -#include "ir.h" -#include "main/imports.h" - -/* - * pass to lower GLSL IR to NIR - * - * This will lower variable dereferences to loads/stores of corresponding - * variables in NIR - the variables will be converted to registers in a later - * pass. - */ - -namespace { - -class nir_visitor : public ir_visitor -{ -public: - nir_visitor(nir_shader *shader); - ~nir_visitor(); - - virtual void visit(ir_variable *); - virtual void visit(ir_function *); - virtual void visit(ir_function_signature *); - virtual void visit(ir_loop *); - virtual void visit(ir_if *); - virtual void visit(ir_discard *); - virtual void visit(ir_loop_jump *); - virtual void visit(ir_return *); - virtual void visit(ir_call *); - virtual void visit(ir_assignment *); - virtual void visit(ir_emit_vertex *); - virtual void visit(ir_end_primitive *); - virtual void visit(ir_expression *); - virtual void visit(ir_swizzle *); - virtual void visit(ir_texture *); - virtual void visit(ir_constant *); - virtual void visit(ir_dereference_variable *); - virtual void visit(ir_dereference_record *); - virtual void visit(ir_dereference_array *); - virtual void visit(ir_barrier *); - - void create_function(ir_function_signature *ir); - -private: - void add_instr(nir_instr *instr, unsigned num_components); - nir_ssa_def *evaluate_rvalue(ir_rvalue *ir); - - nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def **srcs); - nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1); - nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, - nir_ssa_def *src2); - nir_alu_instr *emit(nir_op op, unsigned dest_size, nir_ssa_def *src1, - nir_ssa_def *src2, nir_ssa_def *src3); - - bool supports_ints; - - nir_shader *shader; - nir_function_impl *impl; - nir_builder b; - nir_ssa_def *result; /* result of the expression tree last visited */ - - nir_deref_var *evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir); - - /* the head of the dereference chain we're creating */ - nir_deref_var *deref_head; - /* the tail of the dereference chain we're creating */ - nir_deref *deref_tail; - - nir_variable *var; /* variable created by ir_variable visitor */ - - /* whether the IR we're operating on is per-function or global */ - bool is_global; - - /* map of ir_variable -> nir_variable */ - struct hash_table *var_table; - - /* map of ir_function_signature -> nir_function_overload */ - struct hash_table *overload_table; -}; - -/* - * This visitor runs before the main visitor, calling create_function() for - * each function so that the main visitor can resolve forward references in - * calls. - */ - -class nir_function_visitor : public ir_hierarchical_visitor -{ -public: - nir_function_visitor(nir_visitor *v) : visitor(v) - { - } - virtual ir_visitor_status visit_enter(ir_function *); - -private: - nir_visitor *visitor; -}; - -}; /* end of anonymous namespace */ - -nir_shader * -glsl_to_nir(const struct gl_shader_program *shader_prog, - gl_shader_stage stage, - const nir_shader_compiler_options *options) -{ - struct gl_shader *sh = shader_prog->_LinkedShaders[stage]; - - nir_shader *shader = nir_shader_create(NULL, stage, options); - - nir_visitor v1(shader); - nir_function_visitor v2(&v1); - v2.run(sh->ir); - visit_exec_list(sh->ir, &v1); - - nir_lower_outputs_to_temporaries(shader); - - shader->info.name = ralloc_asprintf(shader, "GLSL%d", shader_prog->Name); - if (shader_prog->Label) - shader->info.label = ralloc_strdup(shader, shader_prog->Label); - shader->info.num_textures = _mesa_fls(sh->Program->SamplersUsed); - shader->info.num_ubos = sh->NumUniformBlocks; - shader->info.num_abos = shader_prog->NumAtomicBuffers; - shader->info.num_ssbos = sh->NumShaderStorageBlocks; - shader->info.num_images = sh->NumImages; - shader->info.inputs_read = sh->Program->InputsRead; - shader->info.outputs_written = sh->Program->OutputsWritten; - shader->info.patch_inputs_read = sh->Program->PatchInputsRead; - shader->info.patch_outputs_written = sh->Program->PatchOutputsWritten; - shader->info.system_values_read = sh->Program->SystemValuesRead; - shader->info.uses_texture_gather = sh->Program->UsesGather; - shader->info.uses_clip_distance_out = - sh->Program->ClipDistanceArraySize != 0; - shader->info.separate_shader = shader_prog->SeparateShader; - shader->info.has_transform_feedback_varyings = - shader_prog->TransformFeedback.NumVarying > 0; - - switch (stage) { - case MESA_SHADER_TESS_CTRL: - shader->info.tcs.vertices_out = shader_prog->TessCtrl.VerticesOut; - break; - - case MESA_SHADER_GEOMETRY: - shader->info.gs.vertices_in = shader_prog->Geom.VerticesIn; - shader->info.gs.output_primitive = sh->Geom.OutputType; - shader->info.gs.vertices_out = sh->Geom.VerticesOut; - shader->info.gs.invocations = sh->Geom.Invocations; - shader->info.gs.uses_end_primitive = shader_prog->Geom.UsesEndPrimitive; - shader->info.gs.uses_streams = shader_prog->Geom.UsesStreams; - break; - - case MESA_SHADER_FRAGMENT: { - struct gl_fragment_program *fp = - (struct gl_fragment_program *)sh->Program; - - shader->info.fs.uses_discard = fp->UsesKill; - shader->info.fs.early_fragment_tests = sh->EarlyFragmentTests; - shader->info.fs.depth_layout = fp->FragDepthLayout; - break; - } - - case MESA_SHADER_COMPUTE: { - struct gl_compute_program *cp = (struct gl_compute_program *)sh->Program; - shader->info.cs.local_size[0] = cp->LocalSize[0]; - shader->info.cs.local_size[1] = cp->LocalSize[1]; - shader->info.cs.local_size[2] = cp->LocalSize[2]; - break; - } - - default: - break; /* No stage-specific info */ - } - - return shader; -} - -nir_visitor::nir_visitor(nir_shader *shader) -{ - this->supports_ints = shader->options->native_integers; - this->shader = shader; - this->is_global = true; - this->var_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - this->overload_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); -} - -nir_visitor::~nir_visitor() -{ - _mesa_hash_table_destroy(this->var_table, NULL); - _mesa_hash_table_destroy(this->overload_table, NULL); -} - -nir_deref_var * -nir_visitor::evaluate_deref(nir_instr *mem_ctx, ir_instruction *ir) -{ - ir->accept(this); - ralloc_steal(mem_ctx, this->deref_head); - return this->deref_head; -} - -static nir_constant * -constant_copy(ir_constant *ir, void *mem_ctx) -{ - if (ir == NULL) - return NULL; - - nir_constant *ret = ralloc(mem_ctx, nir_constant); - - unsigned total_elems = ir->type->components(); - unsigned i; - - ret->num_elements = 0; - switch (ir->type->base_type) { - case GLSL_TYPE_UINT: - for (i = 0; i < total_elems; i++) - ret->value.u[i] = ir->value.u[i]; - break; - - case GLSL_TYPE_INT: - for (i = 0; i < total_elems; i++) - ret->value.i[i] = ir->value.i[i]; - break; - - case GLSL_TYPE_FLOAT: - for (i = 0; i < total_elems; i++) - ret->value.f[i] = ir->value.f[i]; - break; - - case GLSL_TYPE_BOOL: - for (i = 0; i < total_elems; i++) - ret->value.b[i] = ir->value.b[i]; - break; - - case GLSL_TYPE_STRUCT: - ret->elements = ralloc_array(mem_ctx, nir_constant *, - ir->type->length); - ret->num_elements = ir->type->length; - - i = 0; - foreach_in_list(ir_constant, field, &ir->components) { - ret->elements[i] = constant_copy(field, mem_ctx); - i++; - } - break; - - case GLSL_TYPE_ARRAY: - ret->elements = ralloc_array(mem_ctx, nir_constant *, - ir->type->length); - ret->num_elements = ir->type->length; - - for (i = 0; i < ir->type->length; i++) - ret->elements[i] = constant_copy(ir->array_elements[i], mem_ctx); - break; - - default: - unreachable("not reached"); - } - - return ret; -} - -void -nir_visitor::visit(ir_variable *ir) -{ - nir_variable *var = ralloc(shader, nir_variable); - var->type = ir->type; - var->name = ralloc_strdup(var, ir->name); - - var->data.read_only = ir->data.read_only; - var->data.centroid = ir->data.centroid; - var->data.sample = ir->data.sample; - var->data.patch = ir->data.patch; - var->data.invariant = ir->data.invariant; - var->data.location = ir->data.location; - - switch(ir->data.mode) { - case ir_var_auto: - case ir_var_temporary: - if (is_global) - var->data.mode = nir_var_global; - else - var->data.mode = nir_var_local; - break; - - case ir_var_function_in: - case ir_var_function_out: - case ir_var_function_inout: - case ir_var_const_in: - var->data.mode = nir_var_local; - break; - - case ir_var_shader_in: - if (shader->stage == MESA_SHADER_FRAGMENT && - ir->data.location == VARYING_SLOT_FACE) { - /* For whatever reason, GLSL IR makes gl_FrontFacing an input */ - var->data.location = SYSTEM_VALUE_FRONT_FACE; - var->data.mode = nir_var_system_value; - } else if (shader->stage == MESA_SHADER_GEOMETRY && - ir->data.location == VARYING_SLOT_PRIMITIVE_ID) { - /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */ - var->data.location = SYSTEM_VALUE_PRIMITIVE_ID; - var->data.mode = nir_var_system_value; - } else { - var->data.mode = nir_var_shader_in; - } - break; - - case ir_var_shader_out: - var->data.mode = nir_var_shader_out; - break; - - case ir_var_uniform: - var->data.mode = nir_var_uniform; - break; - - case ir_var_shader_storage: - var->data.mode = nir_var_shader_storage; - break; - - case ir_var_system_value: - var->data.mode = nir_var_system_value; - break; - - default: - unreachable("not reached"); - } - - var->data.interpolation = ir->data.interpolation; - var->data.origin_upper_left = ir->data.origin_upper_left; - var->data.pixel_center_integer = ir->data.pixel_center_integer; - var->data.explicit_location = ir->data.explicit_location; - var->data.explicit_index = ir->data.explicit_index; - var->data.explicit_binding = ir->data.explicit_binding; - var->data.has_initializer = ir->data.has_initializer; - var->data.location_frac = ir->data.location_frac; - var->data.from_named_ifc_block_array = ir->data.from_named_ifc_block_array; - var->data.from_named_ifc_block_nonarray = ir->data.from_named_ifc_block_nonarray; - - switch (ir->data.depth_layout) { - case ir_depth_layout_none: - var->data.depth_layout = nir_depth_layout_none; - break; - case ir_depth_layout_any: - var->data.depth_layout = nir_depth_layout_any; - break; - case ir_depth_layout_greater: - var->data.depth_layout = nir_depth_layout_greater; - break; - case ir_depth_layout_less: - var->data.depth_layout = nir_depth_layout_less; - break; - case ir_depth_layout_unchanged: - var->data.depth_layout = nir_depth_layout_unchanged; - break; - default: - unreachable("not reached"); - } - - var->data.index = ir->data.index; - var->data.binding = ir->data.binding; - var->data.offset = ir->data.offset; - var->data.image.read_only = ir->data.image_read_only; - var->data.image.write_only = ir->data.image_write_only; - var->data.image.coherent = ir->data.image_coherent; - var->data.image._volatile = ir->data.image_volatile; - var->data.image.restrict_flag = ir->data.image_restrict; - var->data.image.format = ir->data.image_format; - var->data.max_array_access = ir->data.max_array_access; - - var->num_state_slots = ir->get_num_state_slots(); - if (var->num_state_slots > 0) { - var->state_slots = ralloc_array(var, nir_state_slot, - var->num_state_slots); - - ir_state_slot *state_slots = ir->get_state_slots(); - for (unsigned i = 0; i < var->num_state_slots; i++) { - for (unsigned j = 0; j < 5; j++) - var->state_slots[i].tokens[j] = state_slots[i].tokens[j]; - var->state_slots[i].swizzle = state_slots[i].swizzle; - } - } else { - var->state_slots = NULL; - } - - var->constant_initializer = constant_copy(ir->constant_initializer, var); - - var->interface_type = ir->get_interface_type(); - - if (var->data.mode == nir_var_local) - nir_function_impl_add_variable(impl, var); - else - nir_shader_add_variable(shader, var); - - _mesa_hash_table_insert(var_table, ir, var); - this->var = var; -} - -ir_visitor_status -nir_function_visitor::visit_enter(ir_function *ir) -{ - foreach_in_list(ir_function_signature, sig, &ir->signatures) { - visitor->create_function(sig); - } - return visit_continue_with_parent; -} - -void -nir_visitor::create_function(ir_function_signature *ir) -{ - if (ir->is_intrinsic) - return; - - nir_function *func = nir_function_create(shader, ir->function_name()); - - unsigned num_params = ir->parameters.length(); - func->num_params = num_params; - func->params = ralloc_array(shader, nir_parameter, num_params); - - unsigned i = 0; - foreach_in_list(ir_variable, param, &ir->parameters) { - switch (param->data.mode) { - case ir_var_function_in: - func->params[i].param_type = nir_parameter_in; - break; - - case ir_var_function_out: - func->params[i].param_type = nir_parameter_out; - break; - - case ir_var_function_inout: - func->params[i].param_type = nir_parameter_inout; - break; - - default: - unreachable("not reached"); - } - - func->params[i].type = param->type; - i++; - } - - func->return_type = ir->return_type; - - _mesa_hash_table_insert(this->overload_table, ir, func); -} - -void -nir_visitor::visit(ir_function *ir) -{ - foreach_in_list(ir_function_signature, sig, &ir->signatures) - sig->accept(this); -} - -void -nir_visitor::visit(ir_function_signature *ir) -{ - if (ir->is_intrinsic) - return; - - struct hash_entry *entry = - _mesa_hash_table_search(this->overload_table, ir); - - assert(entry); - nir_function *func = (nir_function *) entry->data; - - if (ir->is_defined) { - nir_function_impl *impl = nir_function_impl_create(func); - this->impl = impl; - - unsigned num_params = func->num_params; - impl->num_params = num_params; - impl->params = ralloc_array(this->shader, nir_variable *, num_params); - unsigned i = 0; - foreach_in_list(ir_variable, param, &ir->parameters) { - param->accept(this); - impl->params[i] = this->var; - i++; - } - - if (func->return_type == glsl_type::void_type) { - impl->return_var = NULL; - } else { - impl->return_var = ralloc(this->shader, nir_variable); - impl->return_var->name = ralloc_strdup(impl->return_var, - "return_var"); - impl->return_var->type = func->return_type; - } - - this->is_global = false; - - nir_builder_init(&b, impl); - b.cursor = nir_after_cf_list(&impl->body); - visit_exec_list(&ir->body, this); - - this->is_global = true; - } else { - func->impl = NULL; - } -} - -void -nir_visitor::visit(ir_loop *ir) -{ - nir_loop *loop = nir_loop_create(this->shader); - nir_builder_cf_insert(&b, &loop->cf_node); - - b.cursor = nir_after_cf_list(&loop->body); - visit_exec_list(&ir->body_instructions, this); - b.cursor = nir_after_cf_node(&loop->cf_node); -} - -void -nir_visitor::visit(ir_if *ir) -{ - nir_src condition = - nir_src_for_ssa(evaluate_rvalue(ir->condition)); - - nir_if *if_stmt = nir_if_create(this->shader); - if_stmt->condition = condition; - nir_builder_cf_insert(&b, &if_stmt->cf_node); - - b.cursor = nir_after_cf_list(&if_stmt->then_list); - visit_exec_list(&ir->then_instructions, this); - - b.cursor = nir_after_cf_list(&if_stmt->else_list); - visit_exec_list(&ir->else_instructions, this); - - b.cursor = nir_after_cf_node(&if_stmt->cf_node); -} - -void -nir_visitor::visit(ir_discard *ir) -{ - /* - * discards aren't treated as control flow, because before we lower them - * they can appear anywhere in the shader and the stuff after them may still - * be executed (yay, crazy GLSL rules!). However, after lowering, all the - * discards will be immediately followed by a return. - */ - - nir_intrinsic_instr *discard; - if (ir->condition) { - discard = nir_intrinsic_instr_create(this->shader, - nir_intrinsic_discard_if); - discard->src[0] = - nir_src_for_ssa(evaluate_rvalue(ir->condition)); - } else { - discard = nir_intrinsic_instr_create(this->shader, nir_intrinsic_discard); - } - - nir_builder_instr_insert(&b, &discard->instr); -} - -void -nir_visitor::visit(ir_emit_vertex *ir) -{ - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_emit_vertex); - instr->const_index[0] = ir->stream_id(); - nir_builder_instr_insert(&b, &instr->instr); -} - -void -nir_visitor::visit(ir_end_primitive *ir) -{ - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_end_primitive); - instr->const_index[0] = ir->stream_id(); - nir_builder_instr_insert(&b, &instr->instr); -} - -void -nir_visitor::visit(ir_loop_jump *ir) -{ - nir_jump_type type; - switch (ir->mode) { - case ir_loop_jump::jump_break: - type = nir_jump_break; - break; - case ir_loop_jump::jump_continue: - type = nir_jump_continue; - break; - default: - unreachable("not reached"); - } - - nir_jump_instr *instr = nir_jump_instr_create(this->shader, type); - nir_builder_instr_insert(&b, &instr->instr); -} - -void -nir_visitor::visit(ir_return *ir) -{ - if (ir->value != NULL) { - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); - - copy->variables[0] = nir_deref_var_create(copy, this->impl->return_var); - copy->variables[1] = evaluate_deref(©->instr, ir->value); - } - - nir_jump_instr *instr = nir_jump_instr_create(this->shader, nir_jump_return); - nir_builder_instr_insert(&b, &instr->instr); -} - -void -nir_visitor::visit(ir_call *ir) -{ - if (ir->callee->is_intrinsic) { - nir_intrinsic_op op; - if (strcmp(ir->callee_name(), "__intrinsic_atomic_read") == 0) { - op = nir_intrinsic_atomic_counter_read_var; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_increment") == 0) { - op = nir_intrinsic_atomic_counter_inc_var; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_predecrement") == 0) { - op = nir_intrinsic_atomic_counter_dec_var; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_load") == 0) { - op = nir_intrinsic_image_load; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_store") == 0) { - op = nir_intrinsic_image_store; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_add") == 0) { - op = nir_intrinsic_image_atomic_add; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_min") == 0) { - op = nir_intrinsic_image_atomic_min; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_max") == 0) { - op = nir_intrinsic_image_atomic_max; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_and") == 0) { - op = nir_intrinsic_image_atomic_and; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_or") == 0) { - op = nir_intrinsic_image_atomic_or; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_xor") == 0) { - op = nir_intrinsic_image_atomic_xor; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_exchange") == 0) { - op = nir_intrinsic_image_atomic_exchange; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_atomic_comp_swap") == 0) { - op = nir_intrinsic_image_atomic_comp_swap; - } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier") == 0) { - op = nir_intrinsic_memory_barrier; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_size") == 0) { - op = nir_intrinsic_image_size; - } else if (strcmp(ir->callee_name(), "__intrinsic_image_samples") == 0) { - op = nir_intrinsic_image_samples; - } else if (strcmp(ir->callee_name(), "__intrinsic_store_ssbo") == 0) { - op = nir_intrinsic_store_ssbo; - } else if (strcmp(ir->callee_name(), "__intrinsic_load_ssbo") == 0) { - op = nir_intrinsic_load_ssbo; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_ssbo") == 0) { - op = nir_intrinsic_ssbo_atomic_add; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_ssbo") == 0) { - op = nir_intrinsic_ssbo_atomic_and; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_ssbo") == 0) { - op = nir_intrinsic_ssbo_atomic_or; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_ssbo") == 0) { - op = nir_intrinsic_ssbo_atomic_xor; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_ssbo") == 0) { - assert(ir->return_deref); - if (ir->return_deref->type == glsl_type::int_type) - op = nir_intrinsic_ssbo_atomic_imin; - else if (ir->return_deref->type == glsl_type::uint_type) - op = nir_intrinsic_ssbo_atomic_umin; - else - unreachable("Invalid type"); - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_ssbo") == 0) { - assert(ir->return_deref); - if (ir->return_deref->type == glsl_type::int_type) - op = nir_intrinsic_ssbo_atomic_imax; - else if (ir->return_deref->type == glsl_type::uint_type) - op = nir_intrinsic_ssbo_atomic_umax; - else - unreachable("Invalid type"); - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_ssbo") == 0) { - op = nir_intrinsic_ssbo_atomic_exchange; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_ssbo") == 0) { - op = nir_intrinsic_ssbo_atomic_comp_swap; - } else if (strcmp(ir->callee_name(), "__intrinsic_shader_clock") == 0) { - op = nir_intrinsic_shader_clock; - } else if (strcmp(ir->callee_name(), "__intrinsic_group_memory_barrier") == 0) { - op = nir_intrinsic_group_memory_barrier; - } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_atomic_counter") == 0) { - op = nir_intrinsic_memory_barrier_atomic_counter; - } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_buffer") == 0) { - op = nir_intrinsic_memory_barrier_buffer; - } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_image") == 0) { - op = nir_intrinsic_memory_barrier_image; - } else if (strcmp(ir->callee_name(), "__intrinsic_memory_barrier_shared") == 0) { - op = nir_intrinsic_memory_barrier_shared; - } else if (strcmp(ir->callee_name(), "__intrinsic_load_shared") == 0) { - op = nir_intrinsic_load_shared; - } else if (strcmp(ir->callee_name(), "__intrinsic_store_shared") == 0) { - op = nir_intrinsic_store_shared; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_add_shared") == 0) { - op = nir_intrinsic_shared_atomic_add; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_and_shared") == 0) { - op = nir_intrinsic_shared_atomic_and; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_or_shared") == 0) { - op = nir_intrinsic_shared_atomic_or; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_xor_shared") == 0) { - op = nir_intrinsic_shared_atomic_xor; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_min_shared") == 0) { - assert(ir->return_deref); - if (ir->return_deref->type == glsl_type::int_type) - op = nir_intrinsic_shared_atomic_imin; - else if (ir->return_deref->type == glsl_type::uint_type) - op = nir_intrinsic_shared_atomic_umin; - else - unreachable("Invalid type"); - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_max_shared") == 0) { - assert(ir->return_deref); - if (ir->return_deref->type == glsl_type::int_type) - op = nir_intrinsic_shared_atomic_imax; - else if (ir->return_deref->type == glsl_type::uint_type) - op = nir_intrinsic_shared_atomic_umax; - else - unreachable("Invalid type"); - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_exchange_shared") == 0) { - op = nir_intrinsic_shared_atomic_exchange; - } else if (strcmp(ir->callee_name(), "__intrinsic_atomic_comp_swap_shared") == 0) { - op = nir_intrinsic_shared_atomic_comp_swap; - } else { - unreachable("not reached"); - } - - nir_intrinsic_instr *instr = nir_intrinsic_instr_create(shader, op); - nir_dest *dest = &instr->dest; - - switch (op) { - case nir_intrinsic_atomic_counter_read_var: - case nir_intrinsic_atomic_counter_inc_var: - case nir_intrinsic_atomic_counter_dec_var: { - ir_dereference *param = - (ir_dereference *) ir->actual_parameters.get_head(); - instr->variables[0] = evaluate_deref(&instr->instr, param); - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); - nir_builder_instr_insert(&b, &instr->instr); - break; - } - case nir_intrinsic_image_load: - case nir_intrinsic_image_store: - case nir_intrinsic_image_atomic_add: - case nir_intrinsic_image_atomic_min: - case nir_intrinsic_image_atomic_max: - case nir_intrinsic_image_atomic_and: - case nir_intrinsic_image_atomic_or: - case nir_intrinsic_image_atomic_xor: - case nir_intrinsic_image_atomic_exchange: - case nir_intrinsic_image_atomic_comp_swap: - case nir_intrinsic_image_samples: - case nir_intrinsic_image_size: { - nir_ssa_undef_instr *instr_undef = - nir_ssa_undef_instr_create(shader, 1); - nir_builder_instr_insert(&b, &instr_undef->instr); - - /* Set the image variable dereference. */ - exec_node *param = ir->actual_parameters.get_head(); - ir_dereference *image = (ir_dereference *)param; - const glsl_type *type = - image->variable_referenced()->type->without_array(); - - instr->variables[0] = evaluate_deref(&instr->instr, image); - param = param->get_next(); - - /* Set the intrinsic destination. */ - if (ir->return_deref) { - const nir_intrinsic_info *info = - &nir_intrinsic_infos[instr->intrinsic]; - nir_ssa_dest_init(&instr->instr, &instr->dest, - info->dest_components, NULL); - } - - if (op == nir_intrinsic_image_size || - op == nir_intrinsic_image_samples) { - nir_builder_instr_insert(&b, &instr->instr); - break; - } - - /* Set the address argument, extending the coordinate vector to four - * components. - */ - nir_ssa_def *src_addr = - evaluate_rvalue((ir_dereference *)param); - nir_ssa_def *srcs[4]; - - for (int i = 0; i < 4; i++) { - if (i < type->coordinate_components()) - srcs[i] = nir_channel(&b, src_addr, i); - else - srcs[i] = &instr_undef->def; - } - - instr->src[0] = nir_src_for_ssa(nir_vec(&b, srcs, 4)); - param = param->get_next(); - - /* Set the sample argument, which is undefined for single-sample - * images. - */ - if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { - instr->src[1] = - nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); - param = param->get_next(); - } else { - instr->src[1] = nir_src_for_ssa(&instr_undef->def); - } - - /* Set the intrinsic parameters. */ - if (!param->is_tail_sentinel()) { - instr->src[2] = - nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); - param = param->get_next(); - } - - if (!param->is_tail_sentinel()) { - instr->src[3] = - nir_src_for_ssa(evaluate_rvalue((ir_dereference *)param)); - param = param->get_next(); - } - nir_builder_instr_insert(&b, &instr->instr); - break; - } - case nir_intrinsic_memory_barrier: - case nir_intrinsic_group_memory_barrier: - case nir_intrinsic_memory_barrier_atomic_counter: - case nir_intrinsic_memory_barrier_buffer: - case nir_intrinsic_memory_barrier_image: - case nir_intrinsic_memory_barrier_shared: - nir_builder_instr_insert(&b, &instr->instr); - break; - case nir_intrinsic_shader_clock: - nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); - nir_builder_instr_insert(&b, &instr->instr); - break; - case nir_intrinsic_store_ssbo: { - exec_node *param = ir->actual_parameters.get_head(); - ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); - - param = param->get_next(); - ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - - param = param->get_next(); - ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); - - param = param->get_next(); - ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); - assert(write_mask); - - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(block)); - instr->src[2] = nir_src_for_ssa(evaluate_rvalue(offset)); - instr->const_index[0] = write_mask->value.u[0]; - instr->num_components = val->type->vector_elements; - - nir_builder_instr_insert(&b, &instr->instr); - break; - } - case nir_intrinsic_load_ssbo: { - exec_node *param = ir->actual_parameters.get_head(); - ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); - - param = param->get_next(); - ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(block)); - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); - - const glsl_type *type = ir->return_deref->var->type; - instr->num_components = type->vector_elements; - - /* Setup destination register */ - nir_ssa_dest_init(&instr->instr, &instr->dest, - type->vector_elements, NULL); - - /* Insert the created nir instruction now since in the case of boolean - * result we will need to emit another instruction after it - */ - nir_builder_instr_insert(&b, &instr->instr); - - /* - * In SSBO/UBO's, a true boolean value is any non-zero value, but we - * consider a true boolean to be ~0. Fix this up with a != 0 - * comparison. - */ - if (type->base_type == GLSL_TYPE_BOOL) { - nir_alu_instr *load_ssbo_compare = - nir_alu_instr_create(shader, nir_op_ine); - load_ssbo_compare->src[0].src.is_ssa = true; - load_ssbo_compare->src[0].src.ssa = &instr->dest.ssa; - load_ssbo_compare->src[1].src = - nir_src_for_ssa(nir_imm_int(&b, 0)); - for (unsigned i = 0; i < type->vector_elements; i++) - load_ssbo_compare->src[1].swizzle[i] = 0; - nir_ssa_dest_init(&load_ssbo_compare->instr, - &load_ssbo_compare->dest.dest, - type->vector_elements, NULL); - load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1; - nir_builder_instr_insert(&b, &load_ssbo_compare->instr); - dest = &load_ssbo_compare->dest.dest; - } - break; - } - case nir_intrinsic_ssbo_atomic_add: - case nir_intrinsic_ssbo_atomic_imin: - case nir_intrinsic_ssbo_atomic_umin: - case nir_intrinsic_ssbo_atomic_imax: - case nir_intrinsic_ssbo_atomic_umax: - case nir_intrinsic_ssbo_atomic_and: - case nir_intrinsic_ssbo_atomic_or: - case nir_intrinsic_ssbo_atomic_xor: - case nir_intrinsic_ssbo_atomic_exchange: - case nir_intrinsic_ssbo_atomic_comp_swap: { - int param_count = ir->actual_parameters.length(); - assert(param_count == 3 || param_count == 4); - - /* Block index */ - exec_node *param = ir->actual_parameters.get_head(); - ir_instruction *inst = (ir_instruction *) param; - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - - /* Offset */ - param = param->get_next(); - inst = (ir_instruction *) param; - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - - /* data1 parameter (this is always present) */ - param = param->get_next(); - inst = (ir_instruction *) param; - instr->src[2] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - - /* data2 parameter (only with atomic_comp_swap) */ - if (param_count == 4) { - assert(op == nir_intrinsic_ssbo_atomic_comp_swap); - param = param->get_next(); - inst = (ir_instruction *) param; - instr->src[3] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - } - - /* Atomic result */ - assert(ir->return_deref); - nir_ssa_dest_init(&instr->instr, &instr->dest, - ir->return_deref->type->vector_elements, NULL); - nir_builder_instr_insert(&b, &instr->instr); - break; - } - case nir_intrinsic_load_shared: { - exec_node *param = ir->actual_parameters.get_head(); - ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - - instr->const_index[0] = 0; - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(offset)); - - const glsl_type *type = ir->return_deref->var->type; - instr->num_components = type->vector_elements; - - /* Setup destination register */ - nir_ssa_dest_init(&instr->instr, &instr->dest, - type->vector_elements, NULL); - - nir_builder_instr_insert(&b, &instr->instr); - break; - } - case nir_intrinsic_store_shared: { - exec_node *param = ir->actual_parameters.get_head(); - ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); - - param = param->get_next(); - ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); - - param = param->get_next(); - ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); - assert(write_mask); - - instr->const_index[0] = 0; - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(offset)); - - instr->const_index[1] = write_mask->value.u[0]; - - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(val)); - instr->num_components = val->type->vector_elements; - - nir_builder_instr_insert(&b, &instr->instr); - break; - } - case nir_intrinsic_shared_atomic_add: - case nir_intrinsic_shared_atomic_imin: - case nir_intrinsic_shared_atomic_umin: - case nir_intrinsic_shared_atomic_imax: - case nir_intrinsic_shared_atomic_umax: - case nir_intrinsic_shared_atomic_and: - case nir_intrinsic_shared_atomic_or: - case nir_intrinsic_shared_atomic_xor: - case nir_intrinsic_shared_atomic_exchange: - case nir_intrinsic_shared_atomic_comp_swap: { - int param_count = ir->actual_parameters.length(); - assert(param_count == 2 || param_count == 3); - - /* Offset */ - exec_node *param = ir->actual_parameters.get_head(); - ir_instruction *inst = (ir_instruction *) param; - instr->src[0] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - - /* data1 parameter (this is always present) */ - param = param->get_next(); - inst = (ir_instruction *) param; - instr->src[1] = nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - - /* data2 parameter (only with atomic_comp_swap) */ - if (param_count == 3) { - assert(op == nir_intrinsic_shared_atomic_comp_swap); - param = param->get_next(); - inst = (ir_instruction *) param; - instr->src[2] = - nir_src_for_ssa(evaluate_rvalue(inst->as_rvalue())); - } - - /* Atomic result */ - assert(ir->return_deref); - nir_ssa_dest_init(&instr->instr, &instr->dest, - ir->return_deref->type->vector_elements, NULL); - nir_builder_instr_insert(&b, &instr->instr); - break; - } - default: - unreachable("not reached"); - } - - if (ir->return_deref) { - nir_intrinsic_instr *store_instr = - nir_intrinsic_instr_create(shader, nir_intrinsic_store_var); - store_instr->num_components = ir->return_deref->type->vector_elements; - store_instr->const_index[0] = (1 << store_instr->num_components) - 1; - - store_instr->variables[0] = - evaluate_deref(&store_instr->instr, ir->return_deref); - store_instr->src[0] = nir_src_for_ssa(&dest->ssa); - - nir_builder_instr_insert(&b, &store_instr->instr); - } - - return; - } - - struct hash_entry *entry = - _mesa_hash_table_search(this->overload_table, ir->callee); - assert(entry); - nir_function *callee = (nir_function *) entry->data; - - nir_call_instr *instr = nir_call_instr_create(this->shader, callee); - - unsigned i = 0; - foreach_in_list(ir_dereference, param, &ir->actual_parameters) { - instr->params[i] = evaluate_deref(&instr->instr, param); - i++; - } - - instr->return_deref = evaluate_deref(&instr->instr, ir->return_deref); - nir_builder_instr_insert(&b, &instr->instr); -} - -void -nir_visitor::visit(ir_assignment *ir) -{ - unsigned num_components = ir->lhs->type->vector_elements; - - if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) && - (ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) { - /* We're doing a plain-as-can-be copy, so emit a copy_var */ - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_copy_var); - - copy->variables[0] = evaluate_deref(©->instr, ir->lhs); - copy->variables[1] = evaluate_deref(©->instr, ir->rhs); - - if (ir->condition) { - nir_if *if_stmt = nir_if_create(this->shader); - if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition)); - nir_builder_cf_insert(&b, &if_stmt->cf_node); - nir_instr_insert_after_cf_list(&if_stmt->then_list, ©->instr); - b.cursor = nir_after_cf_node(&if_stmt->cf_node); - } else { - nir_builder_instr_insert(&b, ©->instr); - } - return; - } - - assert(ir->rhs->type->is_scalar() || ir->rhs->type->is_vector()); - - ir->lhs->accept(this); - nir_deref_var *lhs_deref = this->deref_head; - nir_ssa_def *src = evaluate_rvalue(ir->rhs); - - if (ir->write_mask != (1 << num_components) - 1 && ir->write_mask != 0) { - /* GLSL IR will give us the input to the write-masked assignment in a - * single packed vector. So, for example, if the writemask is xzw, then - * we have to swizzle x -> x, y -> z, and z -> w and get the y component - * from the load. - */ - unsigned swiz[4]; - unsigned component = 0; - for (unsigned i = 0; i < 4; i++) { - swiz[i] = ir->write_mask & (1 << i) ? component++ : 0; - } - src = nir_swizzle(&b, src, swiz, num_components, !supports_ints); - } - - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_store_var); - store->num_components = ir->lhs->type->vector_elements; - store->const_index[0] = ir->write_mask; - nir_deref *store_deref = nir_copy_deref(store, &lhs_deref->deref); - store->variables[0] = nir_deref_as_var(store_deref); - store->src[0] = nir_src_for_ssa(src); - - if (ir->condition) { - nir_if *if_stmt = nir_if_create(this->shader); - if_stmt->condition = nir_src_for_ssa(evaluate_rvalue(ir->condition)); - nir_builder_cf_insert(&b, &if_stmt->cf_node); - nir_instr_insert_after_cf_list(&if_stmt->then_list, &store->instr); - b.cursor = nir_after_cf_node(&if_stmt->cf_node); - } else { - nir_builder_instr_insert(&b, &store->instr); - } -} - -/* - * Given an instruction, returns a pointer to its destination or NULL if there - * is no destination. - * - * Note that this only handles instructions we generate at this level. - */ -static nir_dest * -get_instr_dest(nir_instr *instr) -{ - nir_alu_instr *alu_instr; - nir_intrinsic_instr *intrinsic_instr; - nir_tex_instr *tex_instr; - - switch (instr->type) { - case nir_instr_type_alu: - alu_instr = nir_instr_as_alu(instr); - return &alu_instr->dest.dest; - - case nir_instr_type_intrinsic: - intrinsic_instr = nir_instr_as_intrinsic(instr); - if (nir_intrinsic_infos[intrinsic_instr->intrinsic].has_dest) - return &intrinsic_instr->dest; - else - return NULL; - - case nir_instr_type_tex: - tex_instr = nir_instr_as_tex(instr); - return &tex_instr->dest; - - default: - unreachable("not reached"); - } - - return NULL; -} - -void -nir_visitor::add_instr(nir_instr *instr, unsigned num_components) -{ - nir_dest *dest = get_instr_dest(instr); - - if (dest) - nir_ssa_dest_init(instr, dest, num_components, NULL); - - nir_builder_instr_insert(&b, instr); - - if (dest) { - assert(dest->is_ssa); - this->result = &dest->ssa; - } -} - -nir_ssa_def * -nir_visitor::evaluate_rvalue(ir_rvalue* ir) -{ - ir->accept(this); - if (ir->as_dereference() || ir->as_constant()) { - /* - * A dereference is being used on the right hand side, which means we - * must emit a variable load. - */ - - nir_intrinsic_instr *load_instr = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_var); - load_instr->num_components = ir->type->vector_elements; - load_instr->variables[0] = this->deref_head; - ralloc_steal(load_instr, load_instr->variables[0]); - add_instr(&load_instr->instr, ir->type->vector_elements); - } - - return this->result; -} - -void -nir_visitor::visit(ir_expression *ir) -{ - /* Some special cases */ - switch (ir->operation) { - case ir_binop_ubo_load: { - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo); - load->num_components = ir->type->vector_elements; - load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); - load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); - add_instr(&load->instr, ir->type->vector_elements); - - /* - * In UBO's, a true boolean value is any non-zero value, but we consider - * a true boolean to be ~0. Fix this up with a != 0 comparison. - */ - - if (ir->type->base_type == GLSL_TYPE_BOOL) - this->result = nir_ine(&b, &load->dest.ssa, nir_imm_int(&b, 0)); - - return; - } - - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: { - ir_dereference *deref = ir->operands[0]->as_dereference(); - ir_swizzle *swizzle = NULL; - if (!deref) { - /* the api does not allow a swizzle here, but the varying packing code - * may have pushed one into here. - */ - swizzle = ir->operands[0]->as_swizzle(); - assert(swizzle); - deref = swizzle->val->as_dereference(); - assert(deref); - } - - deref->accept(this); - - nir_intrinsic_op op; - if (this->deref_head->var->data.mode == nir_var_shader_in) { - switch (ir->operation) { - case ir_unop_interpolate_at_centroid: - op = nir_intrinsic_interp_var_at_centroid; - break; - case ir_binop_interpolate_at_offset: - op = nir_intrinsic_interp_var_at_offset; - break; - case ir_binop_interpolate_at_sample: - op = nir_intrinsic_interp_var_at_sample; - break; - default: - unreachable("Invalid interpolation intrinsic"); - } - } else { - /* This case can happen if the vertex shader does not write the - * given varying. In this case, the linker will lower it to a - * global variable. Since interpolating a variable makes no - * sense, we'll just turn it into a load which will probably - * eventually end up as an SSA definition. - */ - assert(this->deref_head->var->data.mode == nir_var_global); - op = nir_intrinsic_load_var; - } - - nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(shader, op); - intrin->num_components = deref->type->vector_elements; - intrin->variables[0] = this->deref_head; - ralloc_steal(intrin, intrin->variables[0]); - - if (intrin->intrinsic == nir_intrinsic_interp_var_at_offset || - intrin->intrinsic == nir_intrinsic_interp_var_at_sample) - intrin->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1])); - - add_instr(&intrin->instr, deref->type->vector_elements); - - if (swizzle) { - unsigned swiz[4] = { - swizzle->mask.x, swizzle->mask.y, swizzle->mask.z, swizzle->mask.w - }; - - result = nir_swizzle(&b, result, swiz, - swizzle->type->vector_elements, false); - } - - return; - } - - default: - break; - } - - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < ir->get_num_operands(); i++) - srcs[i] = evaluate_rvalue(ir->operands[i]); - - glsl_base_type types[4]; - for (unsigned i = 0; i < ir->get_num_operands(); i++) - if (supports_ints) - types[i] = ir->operands[i]->type->base_type; - else - types[i] = GLSL_TYPE_FLOAT; - - glsl_base_type out_type; - if (supports_ints) - out_type = ir->type->base_type; - else - out_type = GLSL_TYPE_FLOAT; - - switch (ir->operation) { - case ir_unop_bit_not: result = nir_inot(&b, srcs[0]); break; - case ir_unop_logic_not: - result = supports_ints ? nir_inot(&b, srcs[0]) : nir_fnot(&b, srcs[0]); - break; - case ir_unop_neg: - result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fneg(&b, srcs[0]) - : nir_ineg(&b, srcs[0]); - break; - case ir_unop_abs: - result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fabs(&b, srcs[0]) - : nir_iabs(&b, srcs[0]); - break; - case ir_unop_saturate: - assert(types[0] == GLSL_TYPE_FLOAT); - result = nir_fsat(&b, srcs[0]); - break; - case ir_unop_sign: - result = (types[0] == GLSL_TYPE_FLOAT) ? nir_fsign(&b, srcs[0]) - : nir_isign(&b, srcs[0]); - break; - case ir_unop_rcp: result = nir_frcp(&b, srcs[0]); break; - case ir_unop_rsq: result = nir_frsq(&b, srcs[0]); break; - case ir_unop_sqrt: result = nir_fsqrt(&b, srcs[0]); break; - case ir_unop_exp: unreachable("ir_unop_exp should have been lowered"); - case ir_unop_log: unreachable("ir_unop_log should have been lowered"); - case ir_unop_exp2: result = nir_fexp2(&b, srcs[0]); break; - case ir_unop_log2: result = nir_flog2(&b, srcs[0]); break; - case ir_unop_i2f: - result = supports_ints ? nir_i2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); - break; - case ir_unop_u2f: - result = supports_ints ? nir_u2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); - break; - case ir_unop_b2f: - result = supports_ints ? nir_b2f(&b, srcs[0]) : nir_fmov(&b, srcs[0]); - break; - case ir_unop_f2i: result = nir_f2i(&b, srcs[0]); break; - case ir_unop_f2u: result = nir_f2u(&b, srcs[0]); break; - case ir_unop_f2b: result = nir_f2b(&b, srcs[0]); break; - case ir_unop_i2b: result = nir_i2b(&b, srcs[0]); break; - case ir_unop_b2i: result = nir_b2i(&b, srcs[0]); break; - case ir_unop_i2u: - case ir_unop_u2i: - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_f2i: - case ir_unop_bitcast_u2f: - case ir_unop_bitcast_f2u: - case ir_unop_subroutine_to_int: - /* no-op */ - result = nir_imov(&b, srcs[0]); - break; - case ir_unop_trunc: result = nir_ftrunc(&b, srcs[0]); break; - case ir_unop_ceil: result = nir_fceil(&b, srcs[0]); break; - case ir_unop_floor: result = nir_ffloor(&b, srcs[0]); break; - case ir_unop_fract: result = nir_ffract(&b, srcs[0]); break; - case ir_unop_round_even: result = nir_fround_even(&b, srcs[0]); break; - case ir_unop_sin: result = nir_fsin(&b, srcs[0]); break; - case ir_unop_cos: result = nir_fcos(&b, srcs[0]); break; - case ir_unop_dFdx: result = nir_fddx(&b, srcs[0]); break; - case ir_unop_dFdy: result = nir_fddy(&b, srcs[0]); break; - case ir_unop_dFdx_fine: result = nir_fddx_fine(&b, srcs[0]); break; - case ir_unop_dFdy_fine: result = nir_fddy_fine(&b, srcs[0]); break; - case ir_unop_dFdx_coarse: result = nir_fddx_coarse(&b, srcs[0]); break; - case ir_unop_dFdy_coarse: result = nir_fddy_coarse(&b, srcs[0]); break; - case ir_unop_pack_snorm_2x16: - result = nir_pack_snorm_2x16(&b, srcs[0]); - break; - case ir_unop_pack_snorm_4x8: - result = nir_pack_snorm_4x8(&b, srcs[0]); - break; - case ir_unop_pack_unorm_2x16: - result = nir_pack_unorm_2x16(&b, srcs[0]); - break; - case ir_unop_pack_unorm_4x8: - result = nir_pack_unorm_4x8(&b, srcs[0]); - break; - case ir_unop_pack_half_2x16: - result = nir_pack_half_2x16(&b, srcs[0]); - break; - case ir_unop_unpack_snorm_2x16: - result = nir_unpack_snorm_2x16(&b, srcs[0]); - break; - case ir_unop_unpack_snorm_4x8: - result = nir_unpack_snorm_4x8(&b, srcs[0]); - break; - case ir_unop_unpack_unorm_2x16: - result = nir_unpack_unorm_2x16(&b, srcs[0]); - break; - case ir_unop_unpack_unorm_4x8: - result = nir_unpack_unorm_4x8(&b, srcs[0]); - break; - case ir_unop_unpack_half_2x16: - result = nir_unpack_half_2x16(&b, srcs[0]); - break; - case ir_unop_unpack_half_2x16_split_x: - result = nir_unpack_half_2x16_split_x(&b, srcs[0]); - break; - case ir_unop_unpack_half_2x16_split_y: - result = nir_unpack_half_2x16_split_y(&b, srcs[0]); - break; - case ir_unop_bitfield_reverse: - result = nir_bitfield_reverse(&b, srcs[0]); - break; - case ir_unop_bit_count: - result = nir_bit_count(&b, srcs[0]); - break; - case ir_unop_find_msb: - switch (types[0]) { - case GLSL_TYPE_UINT: - result = nir_ufind_msb(&b, srcs[0]); - break; - case GLSL_TYPE_INT: - result = nir_ifind_msb(&b, srcs[0]); - break; - default: - unreachable("Invalid type for findMSB()"); - } - break; - case ir_unop_find_lsb: - result = nir_find_lsb(&b, srcs[0]); - break; - - case ir_unop_noise: - switch (ir->type->vector_elements) { - case 1: - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fnoise1_1(&b, srcs[0]); break; - case 2: result = nir_fnoise1_2(&b, srcs[0]); break; - case 3: result = nir_fnoise1_3(&b, srcs[0]); break; - case 4: result = nir_fnoise1_4(&b, srcs[0]); break; - default: unreachable("not reached"); - } - break; - case 2: - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fnoise2_1(&b, srcs[0]); break; - case 2: result = nir_fnoise2_2(&b, srcs[0]); break; - case 3: result = nir_fnoise2_3(&b, srcs[0]); break; - case 4: result = nir_fnoise2_4(&b, srcs[0]); break; - default: unreachable("not reached"); - } - break; - case 3: - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fnoise3_1(&b, srcs[0]); break; - case 2: result = nir_fnoise3_2(&b, srcs[0]); break; - case 3: result = nir_fnoise3_3(&b, srcs[0]); break; - case 4: result = nir_fnoise3_4(&b, srcs[0]); break; - default: unreachable("not reached"); - } - break; - case 4: - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fnoise4_1(&b, srcs[0]); break; - case 2: result = nir_fnoise4_2(&b, srcs[0]); break; - case 3: result = nir_fnoise4_3(&b, srcs[0]); break; - case 4: result = nir_fnoise4_4(&b, srcs[0]); break; - default: unreachable("not reached"); - } - break; - default: - unreachable("not reached"); - } - break; - case ir_unop_get_buffer_size: { - nir_intrinsic_instr *load = nir_intrinsic_instr_create( - this->shader, - nir_intrinsic_get_buffer_size); - load->num_components = ir->type->vector_elements; - load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0])); - add_instr(&load->instr, ir->type->vector_elements); - return; - } - - case ir_binop_add: - result = (out_type == GLSL_TYPE_FLOAT) ? nir_fadd(&b, srcs[0], srcs[1]) - : nir_iadd(&b, srcs[0], srcs[1]); - break; - case ir_binop_sub: - result = (out_type == GLSL_TYPE_FLOAT) ? nir_fsub(&b, srcs[0], srcs[1]) - : nir_isub(&b, srcs[0], srcs[1]); - break; - case ir_binop_mul: - result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmul(&b, srcs[0], srcs[1]) - : nir_imul(&b, srcs[0], srcs[1]); - break; - case ir_binop_div: - if (out_type == GLSL_TYPE_FLOAT) - result = nir_fdiv(&b, srcs[0], srcs[1]); - else if (out_type == GLSL_TYPE_INT) - result = nir_idiv(&b, srcs[0], srcs[1]); - else - result = nir_udiv(&b, srcs[0], srcs[1]); - break; - case ir_binop_mod: - result = (out_type == GLSL_TYPE_FLOAT) ? nir_fmod(&b, srcs[0], srcs[1]) - : nir_umod(&b, srcs[0], srcs[1]); - break; - case ir_binop_min: - if (out_type == GLSL_TYPE_FLOAT) - result = nir_fmin(&b, srcs[0], srcs[1]); - else if (out_type == GLSL_TYPE_INT) - result = nir_imin(&b, srcs[0], srcs[1]); - else - result = nir_umin(&b, srcs[0], srcs[1]); - break; - case ir_binop_max: - if (out_type == GLSL_TYPE_FLOAT) - result = nir_fmax(&b, srcs[0], srcs[1]); - else if (out_type == GLSL_TYPE_INT) - result = nir_imax(&b, srcs[0], srcs[1]); - else - result = nir_umax(&b, srcs[0], srcs[1]); - break; - case ir_binop_pow: result = nir_fpow(&b, srcs[0], srcs[1]); break; - case ir_binop_bit_and: result = nir_iand(&b, srcs[0], srcs[1]); break; - case ir_binop_bit_or: result = nir_ior(&b, srcs[0], srcs[1]); break; - case ir_binop_bit_xor: result = nir_ixor(&b, srcs[0], srcs[1]); break; - case ir_binop_logic_and: - result = supports_ints ? nir_iand(&b, srcs[0], srcs[1]) - : nir_fand(&b, srcs[0], srcs[1]); - break; - case ir_binop_logic_or: - result = supports_ints ? nir_ior(&b, srcs[0], srcs[1]) - : nir_for(&b, srcs[0], srcs[1]); - break; - case ir_binop_logic_xor: - result = supports_ints ? nir_ixor(&b, srcs[0], srcs[1]) - : nir_fxor(&b, srcs[0], srcs[1]); - break; - case ir_binop_lshift: result = nir_ishl(&b, srcs[0], srcs[1]); break; - case ir_binop_rshift: - result = (out_type == GLSL_TYPE_INT) ? nir_ishr(&b, srcs[0], srcs[1]) - : nir_ushr(&b, srcs[0], srcs[1]); - break; - case ir_binop_imul_high: - result = (out_type == GLSL_TYPE_INT) ? nir_imul_high(&b, srcs[0], srcs[1]) - : nir_umul_high(&b, srcs[0], srcs[1]); - break; - case ir_binop_carry: result = nir_uadd_carry(&b, srcs[0], srcs[1]); break; - case ir_binop_borrow: result = nir_usub_borrow(&b, srcs[0], srcs[1]); break; - case ir_binop_less: - if (supports_ints) { - if (types[0] == GLSL_TYPE_FLOAT) - result = nir_flt(&b, srcs[0], srcs[1]); - else if (types[0] == GLSL_TYPE_INT) - result = nir_ilt(&b, srcs[0], srcs[1]); - else - result = nir_ult(&b, srcs[0], srcs[1]); - } else { - result = nir_slt(&b, srcs[0], srcs[1]); - } - break; - case ir_binop_greater: - if (supports_ints) { - if (types[0] == GLSL_TYPE_FLOAT) - result = nir_flt(&b, srcs[1], srcs[0]); - else if (types[0] == GLSL_TYPE_INT) - result = nir_ilt(&b, srcs[1], srcs[0]); - else - result = nir_ult(&b, srcs[1], srcs[0]); - } else { - result = nir_slt(&b, srcs[1], srcs[0]); - } - break; - case ir_binop_lequal: - if (supports_ints) { - if (types[0] == GLSL_TYPE_FLOAT) - result = nir_fge(&b, srcs[1], srcs[0]); - else if (types[0] == GLSL_TYPE_INT) - result = nir_ige(&b, srcs[1], srcs[0]); - else - result = nir_uge(&b, srcs[1], srcs[0]); - } else { - result = nir_slt(&b, srcs[1], srcs[0]); - } - break; - case ir_binop_gequal: - if (supports_ints) { - if (types[0] == GLSL_TYPE_FLOAT) - result = nir_fge(&b, srcs[0], srcs[1]); - else if (types[0] == GLSL_TYPE_INT) - result = nir_ige(&b, srcs[0], srcs[1]); - else - result = nir_uge(&b, srcs[0], srcs[1]); - } else { - result = nir_slt(&b, srcs[0], srcs[1]); - } - break; - case ir_binop_equal: - if (supports_ints) { - if (types[0] == GLSL_TYPE_FLOAT) - result = nir_feq(&b, srcs[0], srcs[1]); - else - result = nir_ieq(&b, srcs[0], srcs[1]); - } else { - result = nir_seq(&b, srcs[0], srcs[1]); - } - break; - case ir_binop_nequal: - if (supports_ints) { - if (types[0] == GLSL_TYPE_FLOAT) - result = nir_fne(&b, srcs[0], srcs[1]); - else - result = nir_ine(&b, srcs[0], srcs[1]); - } else { - result = nir_sne(&b, srcs[0], srcs[1]); - } - break; - case ir_binop_all_equal: - if (supports_ints) { - if (types[0] == GLSL_TYPE_FLOAT) { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_feq(&b, srcs[0], srcs[1]); break; - case 2: result = nir_ball_fequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_ball_fequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_ball_fequal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } - } else { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_ieq(&b, srcs[0], srcs[1]); break; - case 2: result = nir_ball_iequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_ball_iequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_ball_iequal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } - } - } else { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_seq(&b, srcs[0], srcs[1]); break; - case 2: result = nir_fall_equal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_fall_equal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_fall_equal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } - } - break; - case ir_binop_any_nequal: - if (supports_ints) { - if (types[0] == GLSL_TYPE_FLOAT) { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_fne(&b, srcs[0], srcs[1]); break; - case 2: result = nir_bany_fnequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_bany_fnequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_bany_fnequal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } - } else { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_ine(&b, srcs[0], srcs[1]); break; - case 2: result = nir_bany_inequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_bany_inequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_bany_inequal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } - } - } else { - switch (ir->operands[0]->type->vector_elements) { - case 1: result = nir_sne(&b, srcs[0], srcs[1]); break; - case 2: result = nir_fany_nequal2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_fany_nequal3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_fany_nequal4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } - } - break; - case ir_binop_dot: - switch (ir->operands[0]->type->vector_elements) { - case 2: result = nir_fdot2(&b, srcs[0], srcs[1]); break; - case 3: result = nir_fdot3(&b, srcs[0], srcs[1]); break; - case 4: result = nir_fdot4(&b, srcs[0], srcs[1]); break; - default: - unreachable("not reached"); - } - break; - - case ir_binop_pack_half_2x16_split: - result = nir_pack_half_2x16_split(&b, srcs[0], srcs[1]); - break; - case ir_binop_ldexp: result = nir_ldexp(&b, srcs[0], srcs[1]); break; - case ir_triop_fma: - result = nir_ffma(&b, srcs[0], srcs[1], srcs[2]); - break; - case ir_triop_lrp: - result = nir_flrp(&b, srcs[0], srcs[1], srcs[2]); - break; - case ir_triop_csel: - if (supports_ints) - result = nir_bcsel(&b, srcs[0], srcs[1], srcs[2]); - else - result = nir_fcsel(&b, srcs[0], srcs[1], srcs[2]); - break; - case ir_triop_bitfield_extract: - result = (out_type == GLSL_TYPE_INT) ? - nir_ibitfield_extract(&b, srcs[0], srcs[1], srcs[2]) : - nir_ubitfield_extract(&b, srcs[0], srcs[1], srcs[2]); - break; - case ir_quadop_bitfield_insert: - result = nir_bitfield_insert(&b, srcs[0], srcs[1], srcs[2], srcs[3]); - break; - case ir_quadop_vector: - result = nir_vec(&b, srcs, ir->type->vector_elements); - break; - - default: - unreachable("not reached"); - } -} - -void -nir_visitor::visit(ir_swizzle *ir) -{ - unsigned swizzle[4] = { ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w }; - result = nir_swizzle(&b, evaluate_rvalue(ir->val), swizzle, - ir->type->vector_elements, !supports_ints); -} - -void -nir_visitor::visit(ir_texture *ir) -{ - unsigned num_srcs; - nir_texop op; - switch (ir->op) { - case ir_tex: - op = nir_texop_tex; - num_srcs = 1; /* coordinate */ - break; - - case ir_txb: - case ir_txl: - op = (ir->op == ir_txb) ? nir_texop_txb : nir_texop_txl; - num_srcs = 2; /* coordinate, bias/lod */ - break; - - case ir_txd: - op = nir_texop_txd; /* coordinate, dPdx, dPdy */ - num_srcs = 3; - break; - - case ir_txf: - op = nir_texop_txf; - if (ir->lod_info.lod != NULL) - num_srcs = 2; /* coordinate, lod */ - else - num_srcs = 1; /* coordinate */ - break; - - case ir_txf_ms: - op = nir_texop_txf_ms; - num_srcs = 2; /* coordinate, sample_index */ - break; - - case ir_txs: - op = nir_texop_txs; - if (ir->lod_info.lod != NULL) - num_srcs = 1; /* lod */ - else - num_srcs = 0; - break; - - case ir_lod: - op = nir_texop_lod; - num_srcs = 1; /* coordinate */ - break; - - case ir_tg4: - op = nir_texop_tg4; - num_srcs = 1; /* coordinate */ - break; - - case ir_query_levels: - op = nir_texop_query_levels; - num_srcs = 0; - break; - - case ir_texture_samples: - op = nir_texop_texture_samples; - num_srcs = 0; - break; - - case ir_samples_identical: - op = nir_texop_samples_identical; - num_srcs = 1; /* coordinate */ - break; - - default: - unreachable("not reached"); - } - - if (ir->projector != NULL) - num_srcs++; - if (ir->shadow_comparitor != NULL) - num_srcs++; - if (ir->offset != NULL && ir->offset->as_constant() == NULL) - num_srcs++; - - nir_tex_instr *instr = nir_tex_instr_create(this->shader, num_srcs); - - instr->op = op; - instr->sampler_dim = - (glsl_sampler_dim) ir->sampler->type->sampler_dimensionality; - instr->is_array = ir->sampler->type->sampler_array; - instr->is_shadow = ir->sampler->type->sampler_shadow; - if (instr->is_shadow) - instr->is_new_style_shadow = (ir->type->vector_elements == 1); - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - instr->dest_type = nir_type_float; - break; - case GLSL_TYPE_INT: - instr->dest_type = nir_type_int; - break; - case GLSL_TYPE_BOOL: - case GLSL_TYPE_UINT: - instr->dest_type = nir_type_uint; - break; - default: - unreachable("not reached"); - } - - instr->sampler = evaluate_deref(&instr->instr, ir->sampler); - - unsigned src_number = 0; - - if (ir->coordinate != NULL) { - instr->coord_components = ir->coordinate->type->vector_elements; - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->coordinate)); - instr->src[src_number].src_type = nir_tex_src_coord; - src_number++; - } - - if (ir->projector != NULL) { - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->projector)); - instr->src[src_number].src_type = nir_tex_src_projector; - src_number++; - } - - if (ir->shadow_comparitor != NULL) { - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->shadow_comparitor)); - instr->src[src_number].src_type = nir_tex_src_comparitor; - src_number++; - } - - if (ir->offset != NULL) { - /* we don't support multiple offsets yet */ - assert(ir->offset->type->is_vector() || ir->offset->type->is_scalar()); - - ir_constant *const_offset = ir->offset->as_constant(); - if (const_offset != NULL) { - for (unsigned i = 0; i < const_offset->type->vector_elements; i++) - instr->const_offset[i] = const_offset->value.i[i]; - } else { - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->offset)); - instr->src[src_number].src_type = nir_tex_src_offset; - src_number++; - } - } - - switch (ir->op) { - case ir_txb: - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->lod_info.bias)); - instr->src[src_number].src_type = nir_tex_src_bias; - src_number++; - break; - - case ir_txl: - case ir_txf: - case ir_txs: - if (ir->lod_info.lod != NULL) { - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->lod_info.lod)); - instr->src[src_number].src_type = nir_tex_src_lod; - src_number++; - } - break; - - case ir_txd: - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdx)); - instr->src[src_number].src_type = nir_tex_src_ddx; - src_number++; - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->lod_info.grad.dPdy)); - instr->src[src_number].src_type = nir_tex_src_ddy; - src_number++; - break; - - case ir_txf_ms: - instr->src[src_number].src = - nir_src_for_ssa(evaluate_rvalue(ir->lod_info.sample_index)); - instr->src[src_number].src_type = nir_tex_src_ms_index; - src_number++; - break; - - case ir_tg4: - instr->component = ir->lod_info.component->as_constant()->value.u[0]; - break; - - default: - break; - } - - assert(src_number == num_srcs); - - add_instr(&instr->instr, nir_tex_instr_dest_size(instr)); -} - -void -nir_visitor::visit(ir_constant *ir) -{ - /* - * We don't know if this variable is an an array or struct that gets - * dereferenced, so do the safe thing an make it a variable with a - * constant initializer and return a dereference. - */ - - nir_variable *var = - nir_local_variable_create(this->impl, ir->type, "const_temp"); - var->data.read_only = true; - var->constant_initializer = constant_copy(ir, var); - - this->deref_head = nir_deref_var_create(this->shader, var); - this->deref_tail = &this->deref_head->deref; -} - -void -nir_visitor::visit(ir_dereference_variable *ir) -{ - struct hash_entry *entry = - _mesa_hash_table_search(this->var_table, ir->var); - assert(entry); - nir_variable *var = (nir_variable *) entry->data; - - nir_deref_var *deref = nir_deref_var_create(this->shader, var); - this->deref_head = deref; - this->deref_tail = &deref->deref; -} - -void -nir_visitor::visit(ir_dereference_record *ir) -{ - ir->record->accept(this); - - int field_index = this->deref_tail->type->field_index(ir->field); - assert(field_index >= 0); - - nir_deref_struct *deref = nir_deref_struct_create(this->deref_tail, field_index); - deref->deref.type = ir->type; - this->deref_tail->child = &deref->deref; - this->deref_tail = &deref->deref; -} - -void -nir_visitor::visit(ir_dereference_array *ir) -{ - nir_deref_array *deref = nir_deref_array_create(this->shader); - deref->deref.type = ir->type; - - ir_constant *const_index = ir->array_index->as_constant(); - if (const_index != NULL) { - deref->deref_array_type = nir_deref_array_type_direct; - deref->base_offset = const_index->value.u[0]; - } else { - deref->deref_array_type = nir_deref_array_type_indirect; - deref->indirect = - nir_src_for_ssa(evaluate_rvalue(ir->array_index)); - } - - ir->array->accept(this); - - this->deref_tail->child = &deref->deref; - ralloc_steal(this->deref_tail, deref); - this->deref_tail = &deref->deref; -} - -void -nir_visitor::visit(ir_barrier *ir) -{ - nir_intrinsic_instr *instr = - nir_intrinsic_instr_create(this->shader, nir_intrinsic_barrier); - nir_builder_instr_insert(&b, &instr->instr); -} diff --git a/src/glsl/nir/glsl_to_nir.h b/src/glsl/nir/glsl_to_nir.h deleted file mode 100644 index 29badcda08d..00000000000 --- a/src/glsl/nir/glsl_to_nir.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" -#include "../glsl_parser_extras.h" - -#ifdef __cplusplus -extern "C" { -#endif - -nir_shader *glsl_to_nir(const struct gl_shader_program *shader_prog, - gl_shader_stage stage, - const nir_shader_compiler_options *options); - -#ifdef __cplusplus -} -#endif diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c deleted file mode 100644 index 21bf678c04e..00000000000 --- a/src/glsl/nir/nir.c +++ /dev/null @@ -1,1665 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" -#include "nir_control_flow_private.h" -#include - -nir_shader * -nir_shader_create(void *mem_ctx, - gl_shader_stage stage, - const nir_shader_compiler_options *options) -{ - nir_shader *shader = ralloc(mem_ctx, nir_shader); - - exec_list_make_empty(&shader->uniforms); - exec_list_make_empty(&shader->inputs); - exec_list_make_empty(&shader->outputs); - - shader->options = options; - memset(&shader->info, 0, sizeof(shader->info)); - - exec_list_make_empty(&shader->functions); - exec_list_make_empty(&shader->registers); - exec_list_make_empty(&shader->globals); - exec_list_make_empty(&shader->system_values); - shader->reg_alloc = 0; - - shader->num_inputs = 0; - shader->num_outputs = 0; - shader->num_uniforms = 0; - - shader->stage = stage; - - return shader; -} - -static nir_register * -reg_create(void *mem_ctx, struct exec_list *list) -{ - nir_register *reg = ralloc(mem_ctx, nir_register); - - list_inithead(®->uses); - list_inithead(®->defs); - list_inithead(®->if_uses); - - reg->num_components = 0; - reg->num_array_elems = 0; - reg->is_packed = false; - reg->name = NULL; - - exec_list_push_tail(list, ®->node); - - return reg; -} - -nir_register * -nir_global_reg_create(nir_shader *shader) -{ - nir_register *reg = reg_create(shader, &shader->registers); - reg->index = shader->reg_alloc++; - reg->is_global = true; - - return reg; -} - -nir_register * -nir_local_reg_create(nir_function_impl *impl) -{ - nir_register *reg = reg_create(ralloc_parent(impl), &impl->registers); - reg->index = impl->reg_alloc++; - reg->is_global = false; - - return reg; -} - -void -nir_reg_remove(nir_register *reg) -{ - exec_node_remove(®->node); -} - -void -nir_shader_add_variable(nir_shader *shader, nir_variable *var) -{ - switch (var->data.mode) { - case nir_var_all: - assert(!"invalid mode"); - break; - - case nir_var_local: - assert(!"nir_shader_add_variable cannot be used for local variables"); - break; - - case nir_var_global: - exec_list_push_tail(&shader->globals, &var->node); - break; - - case nir_var_shader_in: - exec_list_push_tail(&shader->inputs, &var->node); - break; - - case nir_var_shader_out: - exec_list_push_tail(&shader->outputs, &var->node); - break; - - case nir_var_uniform: - case nir_var_shader_storage: - exec_list_push_tail(&shader->uniforms, &var->node); - break; - - case nir_var_system_value: - exec_list_push_tail(&shader->system_values, &var->node); - break; - } -} - -nir_variable * -nir_variable_create(nir_shader *shader, nir_variable_mode mode, - const struct glsl_type *type, const char *name) -{ - nir_variable *var = rzalloc(shader, nir_variable); - var->name = ralloc_strdup(var, name); - var->type = type; - var->data.mode = mode; - - if ((mode == nir_var_shader_in && shader->stage != MESA_SHADER_VERTEX) || - (mode == nir_var_shader_out && shader->stage != MESA_SHADER_FRAGMENT)) - var->data.interpolation = INTERP_QUALIFIER_SMOOTH; - - if (mode == nir_var_shader_in || mode == nir_var_uniform) - var->data.read_only = true; - - nir_shader_add_variable(shader, var); - - return var; -} - -nir_variable * -nir_local_variable_create(nir_function_impl *impl, - const struct glsl_type *type, const char *name) -{ - nir_variable *var = rzalloc(impl->function->shader, nir_variable); - var->name = ralloc_strdup(var, name); - var->type = type; - var->data.mode = nir_var_local; - - nir_function_impl_add_variable(impl, var); - - return var; -} - -nir_function * -nir_function_create(nir_shader *shader, const char *name) -{ - nir_function *func = ralloc(shader, nir_function); - - exec_list_push_tail(&shader->functions, &func->node); - - func->name = ralloc_strdup(func, name); - func->shader = shader; - func->num_params = 0; - func->params = NULL; - func->return_type = glsl_void_type(); - func->impl = NULL; - - return func; -} - -void nir_src_copy(nir_src *dest, const nir_src *src, void *mem_ctx) -{ - dest->is_ssa = src->is_ssa; - if (src->is_ssa) { - dest->ssa = src->ssa; - } else { - dest->reg.base_offset = src->reg.base_offset; - dest->reg.reg = src->reg.reg; - if (src->reg.indirect) { - dest->reg.indirect = ralloc(mem_ctx, nir_src); - nir_src_copy(dest->reg.indirect, src->reg.indirect, mem_ctx); - } else { - dest->reg.indirect = NULL; - } - } -} - -void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr) -{ - /* Copying an SSA definition makes no sense whatsoever. */ - assert(!src->is_ssa); - - dest->is_ssa = false; - - dest->reg.base_offset = src->reg.base_offset; - dest->reg.reg = src->reg.reg; - if (src->reg.indirect) { - dest->reg.indirect = ralloc(instr, nir_src); - nir_src_copy(dest->reg.indirect, src->reg.indirect, instr); - } else { - dest->reg.indirect = NULL; - } -} - -void -nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, - nir_alu_instr *instr) -{ - nir_src_copy(&dest->src, &src->src, &instr->instr); - dest->abs = src->abs; - dest->negate = src->negate; - for (unsigned i = 0; i < 4; i++) - dest->swizzle[i] = src->swizzle[i]; -} - -void -nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, - nir_alu_instr *instr) -{ - nir_dest_copy(&dest->dest, &src->dest, &instr->instr); - dest->write_mask = src->write_mask; - dest->saturate = src->saturate; -} - - -static void -cf_init(nir_cf_node *node, nir_cf_node_type type) -{ - exec_node_init(&node->node); - node->parent = NULL; - node->type = type; -} - -nir_function_impl * -nir_function_impl_create(nir_function *function) -{ - assert(function->impl == NULL); - - void *mem_ctx = ralloc_parent(function); - - nir_function_impl *impl = ralloc(mem_ctx, nir_function_impl); - - function->impl = impl; - impl->function = function; - - cf_init(&impl->cf_node, nir_cf_node_function); - - exec_list_make_empty(&impl->body); - exec_list_make_empty(&impl->registers); - exec_list_make_empty(&impl->locals); - impl->num_params = 0; - impl->params = NULL; - impl->return_var = NULL; - impl->reg_alloc = 0; - impl->ssa_alloc = 0; - impl->valid_metadata = nir_metadata_none; - - /* create start & end blocks */ - nir_block *start_block = nir_block_create(mem_ctx); - nir_block *end_block = nir_block_create(mem_ctx); - start_block->cf_node.parent = &impl->cf_node; - end_block->cf_node.parent = &impl->cf_node; - impl->end_block = end_block; - - exec_list_push_tail(&impl->body, &start_block->cf_node.node); - - start_block->successors[0] = end_block; - _mesa_set_add(end_block->predecessors, start_block); - return impl; -} - -nir_block * -nir_block_create(nir_shader *shader) -{ - nir_block *block = ralloc(shader, nir_block); - - cf_init(&block->cf_node, nir_cf_node_block); - - block->successors[0] = block->successors[1] = NULL; - block->predecessors = _mesa_set_create(block, _mesa_hash_pointer, - _mesa_key_pointer_equal); - block->imm_dom = NULL; - /* XXX maybe it would be worth it to defer allocation? This - * way it doesn't get allocated for shader ref's that never run - * nir_calc_dominance? For example, state-tracker creates an - * initial IR, clones that, runs appropriate lowering pass, passes - * to driver which does common lowering/opt, and then stores ref - * which is later used to do state specific lowering and futher - * opt. Do any of the references not need dominance metadata? - */ - block->dom_frontier = _mesa_set_create(block, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - exec_list_make_empty(&block->instr_list); - - return block; -} - -static inline void -src_init(nir_src *src) -{ - src->is_ssa = false; - src->reg.reg = NULL; - src->reg.indirect = NULL; - src->reg.base_offset = 0; -} - -nir_if * -nir_if_create(nir_shader *shader) -{ - nir_if *if_stmt = ralloc(shader, nir_if); - - cf_init(&if_stmt->cf_node, nir_cf_node_if); - src_init(&if_stmt->condition); - - nir_block *then = nir_block_create(shader); - exec_list_make_empty(&if_stmt->then_list); - exec_list_push_tail(&if_stmt->then_list, &then->cf_node.node); - then->cf_node.parent = &if_stmt->cf_node; - - nir_block *else_stmt = nir_block_create(shader); - exec_list_make_empty(&if_stmt->else_list); - exec_list_push_tail(&if_stmt->else_list, &else_stmt->cf_node.node); - else_stmt->cf_node.parent = &if_stmt->cf_node; - - return if_stmt; -} - -nir_loop * -nir_loop_create(nir_shader *shader) -{ - nir_loop *loop = ralloc(shader, nir_loop); - - cf_init(&loop->cf_node, nir_cf_node_loop); - - nir_block *body = nir_block_create(shader); - exec_list_make_empty(&loop->body); - exec_list_push_tail(&loop->body, &body->cf_node.node); - body->cf_node.parent = &loop->cf_node; - - body->successors[0] = body; - _mesa_set_add(body->predecessors, body); - - return loop; -} - -static void -instr_init(nir_instr *instr, nir_instr_type type) -{ - instr->type = type; - instr->block = NULL; - exec_node_init(&instr->node); -} - -static void -dest_init(nir_dest *dest) -{ - dest->is_ssa = false; - dest->reg.reg = NULL; - dest->reg.indirect = NULL; - dest->reg.base_offset = 0; -} - -static void -alu_dest_init(nir_alu_dest *dest) -{ - dest_init(&dest->dest); - dest->saturate = false; - dest->write_mask = 0xf; -} - -static void -alu_src_init(nir_alu_src *src) -{ - src_init(&src->src); - src->abs = src->negate = false; - src->swizzle[0] = 0; - src->swizzle[1] = 1; - src->swizzle[2] = 2; - src->swizzle[3] = 3; -} - -nir_alu_instr * -nir_alu_instr_create(nir_shader *shader, nir_op op) -{ - unsigned num_srcs = nir_op_infos[op].num_inputs; - nir_alu_instr *instr = - ralloc_size(shader, - sizeof(nir_alu_instr) + num_srcs * sizeof(nir_alu_src)); - - instr_init(&instr->instr, nir_instr_type_alu); - instr->op = op; - alu_dest_init(&instr->dest); - for (unsigned i = 0; i < num_srcs; i++) - alu_src_init(&instr->src[i]); - - return instr; -} - -nir_jump_instr * -nir_jump_instr_create(nir_shader *shader, nir_jump_type type) -{ - nir_jump_instr *instr = ralloc(shader, nir_jump_instr); - instr_init(&instr->instr, nir_instr_type_jump); - instr->type = type; - return instr; -} - -nir_load_const_instr * -nir_load_const_instr_create(nir_shader *shader, unsigned num_components) -{ - nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr); - instr_init(&instr->instr, nir_instr_type_load_const); - - nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); - - return instr; -} - -nir_intrinsic_instr * -nir_intrinsic_instr_create(nir_shader *shader, nir_intrinsic_op op) -{ - unsigned num_srcs = nir_intrinsic_infos[op].num_srcs; - nir_intrinsic_instr *instr = - ralloc_size(shader, - sizeof(nir_intrinsic_instr) + num_srcs * sizeof(nir_src)); - - instr_init(&instr->instr, nir_instr_type_intrinsic); - instr->intrinsic = op; - - if (nir_intrinsic_infos[op].has_dest) - dest_init(&instr->dest); - - for (unsigned i = 0; i < num_srcs; i++) - src_init(&instr->src[i]); - - return instr; -} - -nir_call_instr * -nir_call_instr_create(nir_shader *shader, nir_function *callee) -{ - nir_call_instr *instr = ralloc(shader, nir_call_instr); - instr_init(&instr->instr, nir_instr_type_call); - - instr->callee = callee; - instr->num_params = callee->num_params; - instr->params = ralloc_array(instr, nir_deref_var *, instr->num_params); - instr->return_deref = NULL; - - return instr; -} - -nir_tex_instr * -nir_tex_instr_create(nir_shader *shader, unsigned num_srcs) -{ - nir_tex_instr *instr = rzalloc(shader, nir_tex_instr); - instr_init(&instr->instr, nir_instr_type_tex); - - dest_init(&instr->dest); - - instr->num_srcs = num_srcs; - instr->src = ralloc_array(instr, nir_tex_src, num_srcs); - for (unsigned i = 0; i < num_srcs; i++) - src_init(&instr->src[i].src); - - instr->sampler_index = 0; - instr->sampler_array_size = 0; - instr->sampler = NULL; - - return instr; -} - -nir_phi_instr * -nir_phi_instr_create(nir_shader *shader) -{ - nir_phi_instr *instr = ralloc(shader, nir_phi_instr); - instr_init(&instr->instr, nir_instr_type_phi); - - dest_init(&instr->dest); - exec_list_make_empty(&instr->srcs); - return instr; -} - -nir_parallel_copy_instr * -nir_parallel_copy_instr_create(nir_shader *shader) -{ - nir_parallel_copy_instr *instr = ralloc(shader, nir_parallel_copy_instr); - instr_init(&instr->instr, nir_instr_type_parallel_copy); - - exec_list_make_empty(&instr->entries); - - return instr; -} - -nir_ssa_undef_instr * -nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components) -{ - nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr); - instr_init(&instr->instr, nir_instr_type_ssa_undef); - - nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL); - - return instr; -} - -nir_deref_var * -nir_deref_var_create(void *mem_ctx, nir_variable *var) -{ - nir_deref_var *deref = ralloc(mem_ctx, nir_deref_var); - deref->deref.deref_type = nir_deref_type_var; - deref->deref.child = NULL; - deref->deref.type = var->type; - deref->var = var; - return deref; -} - -nir_deref_array * -nir_deref_array_create(void *mem_ctx) -{ - nir_deref_array *deref = ralloc(mem_ctx, nir_deref_array); - deref->deref.deref_type = nir_deref_type_array; - deref->deref.child = NULL; - deref->deref_array_type = nir_deref_array_type_direct; - src_init(&deref->indirect); - deref->base_offset = 0; - return deref; -} - -nir_deref_struct * -nir_deref_struct_create(void *mem_ctx, unsigned field_index) -{ - nir_deref_struct *deref = ralloc(mem_ctx, nir_deref_struct); - deref->deref.deref_type = nir_deref_type_struct; - deref->deref.child = NULL; - deref->index = field_index; - return deref; -} - -static nir_deref_var * -copy_deref_var(void *mem_ctx, nir_deref_var *deref) -{ - nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var); - ret->deref.type = deref->deref.type; - if (deref->deref.child) - ret->deref.child = nir_copy_deref(ret, deref->deref.child); - return ret; -} - -static nir_deref_array * -copy_deref_array(void *mem_ctx, nir_deref_array *deref) -{ - nir_deref_array *ret = nir_deref_array_create(mem_ctx); - ret->base_offset = deref->base_offset; - ret->deref_array_type = deref->deref_array_type; - if (deref->deref_array_type == nir_deref_array_type_indirect) { - nir_src_copy(&ret->indirect, &deref->indirect, mem_ctx); - } - ret->deref.type = deref->deref.type; - if (deref->deref.child) - ret->deref.child = nir_copy_deref(ret, deref->deref.child); - return ret; -} - -static nir_deref_struct * -copy_deref_struct(void *mem_ctx, nir_deref_struct *deref) -{ - nir_deref_struct *ret = nir_deref_struct_create(mem_ctx, deref->index); - ret->deref.type = deref->deref.type; - if (deref->deref.child) - ret->deref.child = nir_copy_deref(ret, deref->deref.child); - return ret; -} - -nir_deref * -nir_copy_deref(void *mem_ctx, nir_deref *deref) -{ - switch (deref->deref_type) { - case nir_deref_type_var: - return ©_deref_var(mem_ctx, nir_deref_as_var(deref))->deref; - case nir_deref_type_array: - return ©_deref_array(mem_ctx, nir_deref_as_array(deref))->deref; - case nir_deref_type_struct: - return ©_deref_struct(mem_ctx, nir_deref_as_struct(deref))->deref; - default: - unreachable("Invalid dereference type"); - } - - return NULL; -} - -/* Returns a load_const instruction that represents the constant - * initializer for the given deref chain. The caller is responsible for - * ensuring that there actually is a constant initializer. - */ -nir_load_const_instr * -nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref) -{ - nir_constant *constant = deref->var->constant_initializer; - assert(constant); - - const nir_deref *tail = &deref->deref; - unsigned matrix_offset = 0; - while (tail->child) { - switch (tail->child->deref_type) { - case nir_deref_type_array: { - nir_deref_array *arr = nir_deref_as_array(tail->child); - assert(arr->deref_array_type == nir_deref_array_type_direct); - if (glsl_type_is_matrix(tail->type)) { - assert(arr->deref.child == NULL); - matrix_offset = arr->base_offset; - } else { - constant = constant->elements[arr->base_offset]; - } - break; - } - - case nir_deref_type_struct: { - constant = constant->elements[nir_deref_as_struct(tail->child)->index]; - break; - } - - default: - unreachable("Invalid deref child type"); - } - - tail = tail->child; - } - - nir_load_const_instr *load = - nir_load_const_instr_create(shader, glsl_get_vector_elements(tail->type)); - - matrix_offset *= load->def.num_components; - for (unsigned i = 0; i < load->def.num_components; i++) { - switch (glsl_get_base_type(tail->type)) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - load->value.u[i] = constant->value.u[matrix_offset + i]; - break; - case GLSL_TYPE_BOOL: - load->value.u[i] = constant->value.b[matrix_offset + i] ? - NIR_TRUE : NIR_FALSE; - break; - default: - unreachable("Invalid immediate type"); - } - } - - return load; -} - -nir_function_impl * -nir_cf_node_get_function(nir_cf_node *node) -{ - while (node->type != nir_cf_node_function) { - node = node->parent; - } - - return nir_cf_node_as_function(node); -} - -static bool -add_use_cb(nir_src *src, void *state) -{ - nir_instr *instr = state; - - src->parent_instr = instr; - list_addtail(&src->use_link, - src->is_ssa ? &src->ssa->uses : &src->reg.reg->uses); - - return true; -} - -static bool -add_ssa_def_cb(nir_ssa_def *def, void *state) -{ - nir_instr *instr = state; - - if (instr->block && def->index == UINT_MAX) { - nir_function_impl *impl = - nir_cf_node_get_function(&instr->block->cf_node); - - def->index = impl->ssa_alloc++; - } - - return true; -} - -static bool -add_reg_def_cb(nir_dest *dest, void *state) -{ - nir_instr *instr = state; - - if (!dest->is_ssa) { - dest->reg.parent_instr = instr; - list_addtail(&dest->reg.def_link, &dest->reg.reg->defs); - } - - return true; -} - -static void -add_defs_uses(nir_instr *instr) -{ - nir_foreach_src(instr, add_use_cb, instr); - nir_foreach_dest(instr, add_reg_def_cb, instr); - nir_foreach_ssa_def(instr, add_ssa_def_cb, instr); -} - -void -nir_instr_insert(nir_cursor cursor, nir_instr *instr) -{ - switch (cursor.option) { - case nir_cursor_before_block: - /* Only allow inserting jumps into empty blocks. */ - if (instr->type == nir_instr_type_jump) - assert(exec_list_is_empty(&cursor.block->instr_list)); - - instr->block = cursor.block; - add_defs_uses(instr); - exec_list_push_head(&cursor.block->instr_list, &instr->node); - break; - case nir_cursor_after_block: { - /* Inserting instructions after a jump is illegal. */ - nir_instr *last = nir_block_last_instr(cursor.block); - assert(last == NULL || last->type != nir_instr_type_jump); - (void) last; - - instr->block = cursor.block; - add_defs_uses(instr); - exec_list_push_tail(&cursor.block->instr_list, &instr->node); - break; - } - case nir_cursor_before_instr: - assert(instr->type != nir_instr_type_jump); - instr->block = cursor.instr->block; - add_defs_uses(instr); - exec_node_insert_node_before(&cursor.instr->node, &instr->node); - break; - case nir_cursor_after_instr: - /* Inserting instructions after a jump is illegal. */ - assert(cursor.instr->type != nir_instr_type_jump); - - /* Only allow inserting jumps at the end of the block. */ - if (instr->type == nir_instr_type_jump) - assert(cursor.instr == nir_block_last_instr(cursor.instr->block)); - - instr->block = cursor.instr->block; - add_defs_uses(instr); - exec_node_insert_after(&cursor.instr->node, &instr->node); - break; - } - - if (instr->type == nir_instr_type_jump) - nir_handle_add_jump(instr->block); -} - -static bool -src_is_valid(const nir_src *src) -{ - return src->is_ssa ? (src->ssa != NULL) : (src->reg.reg != NULL); -} - -static bool -remove_use_cb(nir_src *src, void *state) -{ - if (src_is_valid(src)) - list_del(&src->use_link); - - return true; -} - -static bool -remove_def_cb(nir_dest *dest, void *state) -{ - if (!dest->is_ssa) - list_del(&dest->reg.def_link); - - return true; -} - -static void -remove_defs_uses(nir_instr *instr) -{ - nir_foreach_dest(instr, remove_def_cb, instr); - nir_foreach_src(instr, remove_use_cb, instr); -} - -void nir_instr_remove(nir_instr *instr) -{ - remove_defs_uses(instr); - exec_node_remove(&instr->node); - - if (instr->type == nir_instr_type_jump) { - nir_jump_instr *jump_instr = nir_instr_as_jump(instr); - nir_handle_remove_jump(instr->block, jump_instr->type); - } -} - -/*@}*/ - -void -nir_index_local_regs(nir_function_impl *impl) -{ - unsigned index = 0; - foreach_list_typed(nir_register, reg, node, &impl->registers) { - reg->index = index++; - } - impl->reg_alloc = index; -} - -void -nir_index_global_regs(nir_shader *shader) -{ - unsigned index = 0; - foreach_list_typed(nir_register, reg, node, &shader->registers) { - reg->index = index++; - } - shader->reg_alloc = index; -} - -static bool -visit_alu_dest(nir_alu_instr *instr, nir_foreach_dest_cb cb, void *state) -{ - return cb(&instr->dest.dest, state); -} - -static bool -visit_intrinsic_dest(nir_intrinsic_instr *instr, nir_foreach_dest_cb cb, - void *state) -{ - if (nir_intrinsic_infos[instr->intrinsic].has_dest) - return cb(&instr->dest, state); - - return true; -} - -static bool -visit_texture_dest(nir_tex_instr *instr, nir_foreach_dest_cb cb, - void *state) -{ - return cb(&instr->dest, state); -} - -static bool -visit_phi_dest(nir_phi_instr *instr, nir_foreach_dest_cb cb, void *state) -{ - return cb(&instr->dest, state); -} - -static bool -visit_parallel_copy_dest(nir_parallel_copy_instr *instr, - nir_foreach_dest_cb cb, void *state) -{ - nir_foreach_parallel_copy_entry(instr, entry) { - if (!cb(&entry->dest, state)) - return false; - } - - return true; -} - -bool -nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state) -{ - switch (instr->type) { - case nir_instr_type_alu: - return visit_alu_dest(nir_instr_as_alu(instr), cb, state); - case nir_instr_type_intrinsic: - return visit_intrinsic_dest(nir_instr_as_intrinsic(instr), cb, state); - case nir_instr_type_tex: - return visit_texture_dest(nir_instr_as_tex(instr), cb, state); - case nir_instr_type_phi: - return visit_phi_dest(nir_instr_as_phi(instr), cb, state); - case nir_instr_type_parallel_copy: - return visit_parallel_copy_dest(nir_instr_as_parallel_copy(instr), - cb, state); - - case nir_instr_type_load_const: - case nir_instr_type_ssa_undef: - case nir_instr_type_call: - case nir_instr_type_jump: - break; - - default: - unreachable("Invalid instruction type"); - break; - } - - return true; -} - -struct foreach_ssa_def_state { - nir_foreach_ssa_def_cb cb; - void *client_state; -}; - -static inline bool -nir_ssa_def_visitor(nir_dest *dest, void *void_state) -{ - struct foreach_ssa_def_state *state = void_state; - - if (dest->is_ssa) - return state->cb(&dest->ssa, state->client_state); - else - return true; -} - -bool -nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, void *state) -{ - switch (instr->type) { - case nir_instr_type_alu: - case nir_instr_type_tex: - case nir_instr_type_intrinsic: - case nir_instr_type_phi: - case nir_instr_type_parallel_copy: { - struct foreach_ssa_def_state foreach_state = {cb, state}; - return nir_foreach_dest(instr, nir_ssa_def_visitor, &foreach_state); - } - - case nir_instr_type_load_const: - return cb(&nir_instr_as_load_const(instr)->def, state); - case nir_instr_type_ssa_undef: - return cb(&nir_instr_as_ssa_undef(instr)->def, state); - case nir_instr_type_call: - case nir_instr_type_jump: - return true; - default: - unreachable("Invalid instruction type"); - } -} - -static bool -visit_src(nir_src *src, nir_foreach_src_cb cb, void *state) -{ - if (!cb(src, state)) - return false; - if (!src->is_ssa && src->reg.indirect) - return cb(src->reg.indirect, state); - return true; -} - -static bool -visit_deref_array_src(nir_deref_array *deref, nir_foreach_src_cb cb, - void *state) -{ - if (deref->deref_array_type == nir_deref_array_type_indirect) - return visit_src(&deref->indirect, cb, state); - return true; -} - -static bool -visit_deref_src(nir_deref_var *deref, nir_foreach_src_cb cb, void *state) -{ - nir_deref *cur = &deref->deref; - while (cur != NULL) { - if (cur->deref_type == nir_deref_type_array) - if (!visit_deref_array_src(nir_deref_as_array(cur), cb, state)) - return false; - - cur = cur->child; - } - - return true; -} - -static bool -visit_alu_src(nir_alu_instr *instr, nir_foreach_src_cb cb, void *state) -{ - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) - if (!visit_src(&instr->src[i].src, cb, state)) - return false; - - return true; -} - -static bool -visit_tex_src(nir_tex_instr *instr, nir_foreach_src_cb cb, void *state) -{ - for (unsigned i = 0; i < instr->num_srcs; i++) - if (!visit_src(&instr->src[i].src, cb, state)) - return false; - - if (instr->sampler != NULL) - if (!visit_deref_src(instr->sampler, cb, state)) - return false; - - return true; -} - -static bool -visit_intrinsic_src(nir_intrinsic_instr *instr, nir_foreach_src_cb cb, - void *state) -{ - unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; - for (unsigned i = 0; i < num_srcs; i++) - if (!visit_src(&instr->src[i], cb, state)) - return false; - - unsigned num_vars = - nir_intrinsic_infos[instr->intrinsic].num_variables; - for (unsigned i = 0; i < num_vars; i++) - if (!visit_deref_src(instr->variables[i], cb, state)) - return false; - - return true; -} - -static bool -visit_call_src(nir_call_instr *instr, nir_foreach_src_cb cb, void *state) -{ - return true; -} - -static bool -visit_load_const_src(nir_load_const_instr *instr, nir_foreach_src_cb cb, - void *state) -{ - return true; -} - -static bool -visit_phi_src(nir_phi_instr *instr, nir_foreach_src_cb cb, void *state) -{ - nir_foreach_phi_src(instr, src) { - if (!visit_src(&src->src, cb, state)) - return false; - } - - return true; -} - -static bool -visit_parallel_copy_src(nir_parallel_copy_instr *instr, - nir_foreach_src_cb cb, void *state) -{ - nir_foreach_parallel_copy_entry(instr, entry) { - if (!visit_src(&entry->src, cb, state)) - return false; - } - - return true; -} - -typedef struct { - void *state; - nir_foreach_src_cb cb; -} visit_dest_indirect_state; - -static bool -visit_dest_indirect(nir_dest *dest, void *_state) -{ - visit_dest_indirect_state *state = (visit_dest_indirect_state *) _state; - - if (!dest->is_ssa && dest->reg.indirect) - return state->cb(dest->reg.indirect, state->state); - - return true; -} - -bool -nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state) -{ - switch (instr->type) { - case nir_instr_type_alu: - if (!visit_alu_src(nir_instr_as_alu(instr), cb, state)) - return false; - break; - case nir_instr_type_intrinsic: - if (!visit_intrinsic_src(nir_instr_as_intrinsic(instr), cb, state)) - return false; - break; - case nir_instr_type_tex: - if (!visit_tex_src(nir_instr_as_tex(instr), cb, state)) - return false; - break; - case nir_instr_type_call: - if (!visit_call_src(nir_instr_as_call(instr), cb, state)) - return false; - break; - case nir_instr_type_load_const: - if (!visit_load_const_src(nir_instr_as_load_const(instr), cb, state)) - return false; - break; - case nir_instr_type_phi: - if (!visit_phi_src(nir_instr_as_phi(instr), cb, state)) - return false; - break; - case nir_instr_type_parallel_copy: - if (!visit_parallel_copy_src(nir_instr_as_parallel_copy(instr), - cb, state)) - return false; - break; - case nir_instr_type_jump: - case nir_instr_type_ssa_undef: - return true; - - default: - unreachable("Invalid instruction type"); - break; - } - - visit_dest_indirect_state dest_state; - dest_state.state = state; - dest_state.cb = cb; - return nir_foreach_dest(instr, visit_dest_indirect, &dest_state); -} - -nir_const_value * -nir_src_as_const_value(nir_src src) -{ - if (!src.is_ssa) - return NULL; - - if (src.ssa->parent_instr->type != nir_instr_type_load_const) - return NULL; - - nir_load_const_instr *load = nir_instr_as_load_const(src.ssa->parent_instr); - - return &load->value; -} - -/** - * Returns true if the source is known to be dynamically uniform. Otherwise it - * returns false which means it may or may not be dynamically uniform but it - * can't be determined. - */ -bool -nir_src_is_dynamically_uniform(nir_src src) -{ - if (!src.is_ssa) - return false; - - /* Constants are trivially dynamically uniform */ - if (src.ssa->parent_instr->type == nir_instr_type_load_const) - return true; - - /* As are uniform variables */ - if (src.ssa->parent_instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(src.ssa->parent_instr); - - if (intr->intrinsic == nir_intrinsic_load_uniform) - return true; - } - - /* XXX: this could have many more tests, such as when a sampler function is - * called with dynamically uniform arguments. - */ - return false; -} - -static void -src_remove_all_uses(nir_src *src) -{ - for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { - if (!src_is_valid(src)) - continue; - - list_del(&src->use_link); - } -} - -static void -src_add_all_uses(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) -{ - for (; src; src = src->is_ssa ? NULL : src->reg.indirect) { - if (!src_is_valid(src)) - continue; - - if (parent_instr) { - src->parent_instr = parent_instr; - if (src->is_ssa) - list_addtail(&src->use_link, &src->ssa->uses); - else - list_addtail(&src->use_link, &src->reg.reg->uses); - } else { - assert(parent_if); - src->parent_if = parent_if; - if (src->is_ssa) - list_addtail(&src->use_link, &src->ssa->if_uses); - else - list_addtail(&src->use_link, &src->reg.reg->if_uses); - } - } -} - -void -nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src) -{ - assert(!src_is_valid(src) || src->parent_instr == instr); - - src_remove_all_uses(src); - *src = new_src; - src_add_all_uses(src, instr, NULL); -} - -void -nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src) -{ - assert(!src_is_valid(dest) || dest->parent_instr == dest_instr); - - src_remove_all_uses(dest); - src_remove_all_uses(src); - *dest = *src; - *src = NIR_SRC_INIT; - src_add_all_uses(dest, dest_instr, NULL); -} - -void -nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src) -{ - nir_src *src = &if_stmt->condition; - assert(!src_is_valid(src) || src->parent_if == if_stmt); - - src_remove_all_uses(src); - *src = new_src; - src_add_all_uses(src, NULL, if_stmt); -} - -void -nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest) -{ - if (dest->is_ssa) { - /* We can only overwrite an SSA destination if it has no uses. */ - assert(list_empty(&dest->ssa.uses) && list_empty(&dest->ssa.if_uses)); - } else { - list_del(&dest->reg.def_link); - if (dest->reg.indirect) - src_remove_all_uses(dest->reg.indirect); - } - - /* We can't re-write with an SSA def */ - assert(!new_dest.is_ssa); - - nir_dest_copy(dest, &new_dest, instr); - - dest->reg.parent_instr = instr; - list_addtail(&dest->reg.def_link, &new_dest.reg.reg->defs); - - if (dest->reg.indirect) - src_add_all_uses(dest->reg.indirect, instr, NULL); -} - -void -nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, - unsigned num_components, const char *name) -{ - def->name = name; - def->parent_instr = instr; - list_inithead(&def->uses); - list_inithead(&def->if_uses); - def->num_components = num_components; - - if (instr->block) { - nir_function_impl *impl = - nir_cf_node_get_function(&instr->block->cf_node); - - def->index = impl->ssa_alloc++; - } else { - def->index = UINT_MAX; - } -} - -void -nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, - unsigned num_components, const char *name) -{ - dest->is_ssa = true; - nir_ssa_def_init(instr, &dest->ssa, num_components, name); -} - -void -nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src) -{ - assert(!new_src.is_ssa || def != new_src.ssa); - - nir_foreach_use_safe(def, use_src) - nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); - - nir_foreach_if_use_safe(def, use_src) - nir_if_rewrite_condition(use_src->parent_if, new_src); -} - -static bool -is_instr_between(nir_instr *start, nir_instr *end, nir_instr *between) -{ - assert(start->block == end->block); - - if (between->block != start->block) - return false; - - /* Search backwards looking for "between" */ - while (start != end) { - if (between == end) - return true; - - end = nir_instr_prev(end); - assert(end); - } - - return false; -} - -/* Replaces all uses of the given SSA def with the given source but only if - * the use comes after the after_me instruction. This can be useful if you - * are emitting code to fix up the result of some instruction: you can freely - * use the result in that code and then call rewrite_uses_after and pass the - * last fixup instruction as after_me and it will replace all of the uses you - * want without touching the fixup code. - * - * This function assumes that after_me is in the same block as - * def->parent_instr and that after_me comes after def->parent_instr. - */ -void -nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, - nir_instr *after_me) -{ - assert(!new_src.is_ssa || def != new_src.ssa); - - nir_foreach_use_safe(def, use_src) { - assert(use_src->parent_instr != def->parent_instr); - /* Since def already dominates all of its uses, the only way a use can - * not be dominated by after_me is if it is between def and after_me in - * the instruction list. - */ - if (!is_instr_between(def->parent_instr, after_me, use_src->parent_instr)) - nir_instr_rewrite_src(use_src->parent_instr, use_src, new_src); - } - - nir_foreach_if_use_safe(def, use_src) - nir_if_rewrite_condition(use_src->parent_if, new_src); -} - -static bool foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, - bool reverse, void *state); - -static inline bool -foreach_if(nir_if *if_stmt, nir_foreach_block_cb cb, bool reverse, void *state) -{ - if (reverse) { - foreach_list_typed_reverse_safe(nir_cf_node, node, node, - &if_stmt->else_list) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } - - foreach_list_typed_reverse_safe(nir_cf_node, node, node, - &if_stmt->then_list) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } - } else { - foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->then_list) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } - - foreach_list_typed_safe(nir_cf_node, node, node, &if_stmt->else_list) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } - } - - return true; -} - -static inline bool -foreach_loop(nir_loop *loop, nir_foreach_block_cb cb, bool reverse, void *state) -{ - if (reverse) { - foreach_list_typed_reverse_safe(nir_cf_node, node, node, &loop->body) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } - } else { - foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) { - if (!foreach_cf_node(node, cb, reverse, state)) - return false; - } - } - - return true; -} - -static bool -foreach_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, - bool reverse, void *state) -{ - switch (node->type) { - case nir_cf_node_block: - return cb(nir_cf_node_as_block(node), state); - case nir_cf_node_if: - return foreach_if(nir_cf_node_as_if(node), cb, reverse, state); - case nir_cf_node_loop: - return foreach_loop(nir_cf_node_as_loop(node), cb, reverse, state); - break; - - default: - unreachable("Invalid CFG node type"); - break; - } - - return false; -} - -bool -nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, - void *state) -{ - return foreach_cf_node(node, cb, false, state); -} - -bool -nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, void *state) -{ - foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { - if (!foreach_cf_node(node, cb, false, state)) - return false; - } - - return cb(impl->end_block, state); -} - -bool -nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb, - void *state) -{ - if (!cb(impl->end_block, state)) - return false; - - foreach_list_typed_reverse_safe(nir_cf_node, node, node, &impl->body) { - if (!foreach_cf_node(node, cb, true, state)) - return false; - } - - return true; -} - -nir_if * -nir_block_get_following_if(nir_block *block) -{ - if (exec_node_is_tail_sentinel(&block->cf_node.node)) - return NULL; - - if (nir_cf_node_is_last(&block->cf_node)) - return NULL; - - nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); - - if (next_node->type != nir_cf_node_if) - return NULL; - - return nir_cf_node_as_if(next_node); -} - -nir_loop * -nir_block_get_following_loop(nir_block *block) -{ - if (exec_node_is_tail_sentinel(&block->cf_node.node)) - return NULL; - - if (nir_cf_node_is_last(&block->cf_node)) - return NULL; - - nir_cf_node *next_node = nir_cf_node_next(&block->cf_node); - - if (next_node->type != nir_cf_node_loop) - return NULL; - - return nir_cf_node_as_loop(next_node); -} -static bool -index_block(nir_block *block, void *state) -{ - unsigned *index = state; - block->index = (*index)++; - return true; -} - -void -nir_index_blocks(nir_function_impl *impl) -{ - unsigned index = 0; - - if (impl->valid_metadata & nir_metadata_block_index) - return; - - nir_foreach_block(impl, index_block, &index); - - impl->num_blocks = index; -} - -static bool -index_ssa_def_cb(nir_ssa_def *def, void *state) -{ - unsigned *index = (unsigned *) state; - def->index = (*index)++; - - return true; -} - -static bool -index_ssa_block(nir_block *block, void *state) -{ - nir_foreach_instr(block, instr) - nir_foreach_ssa_def(instr, index_ssa_def_cb, state); - - return true; -} - -/** - * The indices are applied top-to-bottom which has the very nice property - * that, if A dominates B, then A->index <= B->index. - */ -void -nir_index_ssa_defs(nir_function_impl *impl) -{ - unsigned index = 0; - nir_foreach_block(impl, index_ssa_block, &index); - impl->ssa_alloc = index; -} - -static bool -index_instrs_block(nir_block *block, void *state) -{ - unsigned *index = state; - nir_foreach_instr(block, instr) - instr->index = (*index)++; - - return true; -} - -/** - * The indices are applied top-to-bottom which has the very nice property - * that, if A dominates B, then A->index <= B->index. - */ -unsigned -nir_index_instrs(nir_function_impl *impl) -{ - unsigned index = 0; - nir_foreach_block(impl, index_instrs_block, &index); - return index; -} - -nir_intrinsic_op -nir_intrinsic_from_system_value(gl_system_value val) -{ - switch (val) { - case SYSTEM_VALUE_VERTEX_ID: - return nir_intrinsic_load_vertex_id; - case SYSTEM_VALUE_INSTANCE_ID: - return nir_intrinsic_load_instance_id; - case SYSTEM_VALUE_DRAW_ID: - return nir_intrinsic_load_draw_id; - case SYSTEM_VALUE_BASE_INSTANCE: - return nir_intrinsic_load_base_instance; - case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: - return nir_intrinsic_load_vertex_id_zero_base; - case SYSTEM_VALUE_BASE_VERTEX: - return nir_intrinsic_load_base_vertex; - case SYSTEM_VALUE_INVOCATION_ID: - return nir_intrinsic_load_invocation_id; - case SYSTEM_VALUE_FRONT_FACE: - return nir_intrinsic_load_front_face; - case SYSTEM_VALUE_SAMPLE_ID: - return nir_intrinsic_load_sample_id; - case SYSTEM_VALUE_SAMPLE_POS: - return nir_intrinsic_load_sample_pos; - case SYSTEM_VALUE_SAMPLE_MASK_IN: - return nir_intrinsic_load_sample_mask_in; - case SYSTEM_VALUE_LOCAL_INVOCATION_ID: - return nir_intrinsic_load_local_invocation_id; - case SYSTEM_VALUE_WORK_GROUP_ID: - return nir_intrinsic_load_work_group_id; - case SYSTEM_VALUE_NUM_WORK_GROUPS: - return nir_intrinsic_load_num_work_groups; - case SYSTEM_VALUE_PRIMITIVE_ID: - return nir_intrinsic_load_primitive_id; - case SYSTEM_VALUE_TESS_COORD: - return nir_intrinsic_load_tess_coord; - case SYSTEM_VALUE_TESS_LEVEL_OUTER: - return nir_intrinsic_load_tess_level_outer; - case SYSTEM_VALUE_TESS_LEVEL_INNER: - return nir_intrinsic_load_tess_level_inner; - case SYSTEM_VALUE_VERTICES_IN: - return nir_intrinsic_load_patch_vertices_in; - case SYSTEM_VALUE_HELPER_INVOCATION: - return nir_intrinsic_load_helper_invocation; - default: - unreachable("system value does not directly correspond to intrinsic"); - } -} - -gl_system_value -nir_system_value_from_intrinsic(nir_intrinsic_op intrin) -{ - switch (intrin) { - case nir_intrinsic_load_vertex_id: - return SYSTEM_VALUE_VERTEX_ID; - case nir_intrinsic_load_instance_id: - return SYSTEM_VALUE_INSTANCE_ID; - case nir_intrinsic_load_draw_id: - return SYSTEM_VALUE_DRAW_ID; - case nir_intrinsic_load_base_instance: - return SYSTEM_VALUE_BASE_INSTANCE; - case nir_intrinsic_load_vertex_id_zero_base: - return SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; - case nir_intrinsic_load_base_vertex: - return SYSTEM_VALUE_BASE_VERTEX; - case nir_intrinsic_load_invocation_id: - return SYSTEM_VALUE_INVOCATION_ID; - case nir_intrinsic_load_front_face: - return SYSTEM_VALUE_FRONT_FACE; - case nir_intrinsic_load_sample_id: - return SYSTEM_VALUE_SAMPLE_ID; - case nir_intrinsic_load_sample_pos: - return SYSTEM_VALUE_SAMPLE_POS; - case nir_intrinsic_load_sample_mask_in: - return SYSTEM_VALUE_SAMPLE_MASK_IN; - case nir_intrinsic_load_local_invocation_id: - return SYSTEM_VALUE_LOCAL_INVOCATION_ID; - case nir_intrinsic_load_num_work_groups: - return SYSTEM_VALUE_NUM_WORK_GROUPS; - case nir_intrinsic_load_work_group_id: - return SYSTEM_VALUE_WORK_GROUP_ID; - case nir_intrinsic_load_primitive_id: - return SYSTEM_VALUE_PRIMITIVE_ID; - case nir_intrinsic_load_tess_coord: - return SYSTEM_VALUE_TESS_COORD; - case nir_intrinsic_load_tess_level_outer: - return SYSTEM_VALUE_TESS_LEVEL_OUTER; - case nir_intrinsic_load_tess_level_inner: - return SYSTEM_VALUE_TESS_LEVEL_INNER; - case nir_intrinsic_load_patch_vertices_in: - return SYSTEM_VALUE_VERTICES_IN; - case nir_intrinsic_load_helper_invocation: - return SYSTEM_VALUE_HELPER_INVOCATION; - default: - unreachable("intrinsic doesn't produce a system value"); - } -} diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h deleted file mode 100644 index d76df66bb67..00000000000 --- a/src/glsl/nir/nir.h +++ /dev/null @@ -1,2111 +0,0 @@ -/* - * Copyright © 2014 Connor Abbott - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#pragma once - -#include "util/hash_table.h" -#include "../list.h" -#include "GL/gl.h" /* GLenum */ -#include "util/list.h" -#include "util/ralloc.h" -#include "util/set.h" -#include "util/bitset.h" -#include "compiler/nir_types.h" -#include "compiler/shader_enums.h" -#include - -#include "nir_opcodes.h" - -#ifdef __cplusplus -extern "C" { -#endif - -struct gl_program; -struct gl_shader_program; - -#define NIR_FALSE 0u -#define NIR_TRUE (~0u) - -/** Defines a cast function - * - * This macro defines a cast function from in_type to out_type where - * out_type is some structure type that contains a field of type out_type. - * - * Note that you have to be a bit careful as the generated cast function - * destroys constness. - */ -#define NIR_DEFINE_CAST(name, in_type, out_type, field) \ -static inline out_type * \ -name(const in_type *parent) \ -{ \ - return exec_node_data(out_type, parent, field); \ -} - -struct nir_function; -struct nir_shader; -struct nir_instr; - - -/** - * Description of built-in state associated with a uniform - * - * \sa nir_variable::state_slots - */ -typedef struct { - int tokens[5]; - int swizzle; -} nir_state_slot; - -typedef enum { - nir_var_all = -1, - nir_var_shader_in, - nir_var_shader_out, - nir_var_global, - nir_var_local, - nir_var_uniform, - nir_var_shader_storage, - nir_var_system_value -} nir_variable_mode; - -/** - * Data stored in an nir_constant - */ -union nir_constant_data { - unsigned u[16]; - int i[16]; - float f[16]; - bool b[16]; -}; - -typedef struct nir_constant { - /** - * Value of the constant. - * - * The field used to back the values supplied by the constant is determined - * by the type associated with the \c nir_variable. Constants may be - * scalars, vectors, or matrices. - */ - union nir_constant_data value; - - /* we could get this from the var->type but makes clone *much* easier to - * not have to care about the type. - */ - unsigned num_elements; - - /* Array elements / Structure Fields */ - struct nir_constant **elements; -} nir_constant; - -/** - * \brief Layout qualifiers for gl_FragDepth. - * - * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared - * with a layout qualifier. - */ -typedef enum { - nir_depth_layout_none, /**< No depth layout is specified. */ - nir_depth_layout_any, - nir_depth_layout_greater, - nir_depth_layout_less, - nir_depth_layout_unchanged -} nir_depth_layout; - -/** - * Either a uniform, global variable, shader input, or shader output. Based on - * ir_variable - it should be easy to translate between the two. - */ - -typedef struct nir_variable { - struct exec_node node; - - /** - * Declared type of the variable - */ - const struct glsl_type *type; - - /** - * Declared name of the variable - */ - char *name; - - struct nir_variable_data { - - /** - * Is the variable read-only? - * - * This is set for variables declared as \c const, shader inputs, - * and uniforms. - */ - unsigned read_only:1; - unsigned centroid:1; - unsigned sample:1; - unsigned patch:1; - unsigned invariant:1; - - /** - * Storage class of the variable. - * - * \sa nir_variable_mode - */ - nir_variable_mode mode:4; - - /** - * Interpolation mode for shader inputs / outputs - * - * \sa glsl_interp_qualifier - */ - unsigned interpolation:2; - - /** - * \name ARB_fragment_coord_conventions - * @{ - */ - unsigned origin_upper_left:1; - unsigned pixel_center_integer:1; - /*@}*/ - - /** - * Was the location explicitly set in the shader? - * - * If the location is explicitly set in the shader, it \b cannot be changed - * by the linker or by the API (e.g., calls to \c glBindAttribLocation have - * no effect). - */ - unsigned explicit_location:1; - unsigned explicit_index:1; - - /** - * Was an initial binding explicitly set in the shader? - * - * If so, constant_initializer contains an integer nir_constant - * representing the initial binding point. - */ - unsigned explicit_binding:1; - - /** - * Does this variable have an initializer? - * - * This is used by the linker to cross-validiate initializers of global - * variables. - */ - unsigned has_initializer:1; - - /** - * If non-zero, then this variable may be packed along with other variables - * into a single varying slot, so this offset should be applied when - * accessing components. For example, an offset of 1 means that the x - * component of this variable is actually stored in component y of the - * location specified by \c location. - */ - unsigned location_frac:2; - - /** - * Non-zero if this variable was created by lowering a named interface - * block which was not an array. - * - * Note that this variable and \c from_named_ifc_block_array will never - * both be non-zero. - */ - unsigned from_named_ifc_block_nonarray:1; - - /** - * Non-zero if this variable was created by lowering a named interface - * block which was an array. - * - * Note that this variable and \c from_named_ifc_block_nonarray will never - * both be non-zero. - */ - unsigned from_named_ifc_block_array:1; - - /** - * \brief Layout qualifier for gl_FragDepth. - * - * This is not equal to \c ir_depth_layout_none if and only if this - * variable is \c gl_FragDepth and a layout qualifier is specified. - */ - nir_depth_layout depth_layout; - - /** - * Storage location of the base of this variable - * - * The precise meaning of this field depends on the nature of the variable. - * - * - Vertex shader input: one of the values from \c gl_vert_attrib. - * - Vertex shader output: one of the values from \c gl_varying_slot. - * - Geometry shader input: one of the values from \c gl_varying_slot. - * - Geometry shader output: one of the values from \c gl_varying_slot. - * - Fragment shader input: one of the values from \c gl_varying_slot. - * - Fragment shader output: one of the values from \c gl_frag_result. - * - Uniforms: Per-stage uniform slot number for default uniform block. - * - Uniforms: Index within the uniform block definition for UBO members. - * - Non-UBO Uniforms: uniform slot number. - * - Other: This field is not currently used. - * - * If the variable is a uniform, shader input, or shader output, and the - * slot has not been assigned, the value will be -1. - */ - int location; - - /** - * The actual location of the variable in the IR. Only valid for inputs - * and outputs. - */ - unsigned int driver_location; - - /** - * output index for dual source blending. - */ - int index; - - /** - * Initial binding point for a sampler or UBO. - * - * For array types, this represents the binding point for the first element. - */ - int binding; - - /** - * Location an atomic counter is stored at. - */ - unsigned offset; - - /** - * ARB_shader_image_load_store qualifiers. - */ - struct { - bool read_only; /**< "readonly" qualifier. */ - bool write_only; /**< "writeonly" qualifier. */ - bool coherent; - bool _volatile; - bool restrict_flag; - - /** Image internal format if specified explicitly, otherwise GL_NONE. */ - GLenum format; - } image; - - /** - * Highest element accessed with a constant expression array index - * - * Not used for non-array variables. - */ - unsigned max_array_access; - - } data; - - /** - * Built-in state that backs this uniform - * - * Once set at variable creation, \c state_slots must remain invariant. - * This is because, ideally, this array would be shared by all clones of - * this variable in the IR tree. In other words, we'd really like for it - * to be a fly-weight. - * - * If the variable is not a uniform, \c num_state_slots will be zero and - * \c state_slots will be \c NULL. - */ - /*@{*/ - unsigned num_state_slots; /**< Number of state slots used */ - nir_state_slot *state_slots; /**< State descriptors. */ - /*@}*/ - - /** - * Constant expression assigned in the initializer of the variable - */ - nir_constant *constant_initializer; - - /** - * For variables that are in an interface block or are an instance of an - * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. - * - * \sa ir_variable::location - */ - const struct glsl_type *interface_type; -} nir_variable; - -#define nir_foreach_variable(var, var_list) \ - foreach_list_typed(nir_variable, var, node, var_list) - -typedef struct nir_register { - struct exec_node node; - - unsigned num_components; /** < number of vector components */ - unsigned num_array_elems; /** < size of array (0 for no array) */ - - /** generic register index. */ - unsigned index; - - /** only for debug purposes, can be NULL */ - const char *name; - - /** whether this register is local (per-function) or global (per-shader) */ - bool is_global; - - /** - * If this flag is set to true, then accessing channels >= num_components - * is well-defined, and simply spills over to the next array element. This - * is useful for backends that can do per-component accessing, in - * particular scalar backends. By setting this flag and making - * num_components equal to 1, structures can be packed tightly into - * registers and then registers can be accessed per-component to get to - * each structure member, even if it crosses vec4 boundaries. - */ - bool is_packed; - - /** set of nir_src's where this register is used (read from) */ - struct list_head uses; - - /** set of nir_dest's where this register is defined (written to) */ - struct list_head defs; - - /** set of nir_if's where this register is used as a condition */ - struct list_head if_uses; -} nir_register; - -typedef enum { - nir_instr_type_alu, - nir_instr_type_call, - nir_instr_type_tex, - nir_instr_type_intrinsic, - nir_instr_type_load_const, - nir_instr_type_jump, - nir_instr_type_ssa_undef, - nir_instr_type_phi, - nir_instr_type_parallel_copy, -} nir_instr_type; - -typedef struct nir_instr { - struct exec_node node; - nir_instr_type type; - struct nir_block *block; - - /** generic instruction index. */ - unsigned index; - - /* A temporary for optimization and analysis passes to use for storing - * flags. For instance, DCE uses this to store the "dead/live" info. - */ - uint8_t pass_flags; -} nir_instr; - -static inline nir_instr * -nir_instr_next(nir_instr *instr) -{ - struct exec_node *next = exec_node_get_next(&instr->node); - if (exec_node_is_tail_sentinel(next)) - return NULL; - else - return exec_node_data(nir_instr, next, node); -} - -static inline nir_instr * -nir_instr_prev(nir_instr *instr) -{ - struct exec_node *prev = exec_node_get_prev(&instr->node); - if (exec_node_is_head_sentinel(prev)) - return NULL; - else - return exec_node_data(nir_instr, prev, node); -} - -static inline bool -nir_instr_is_first(nir_instr *instr) -{ - return exec_node_is_head_sentinel(exec_node_get_prev(&instr->node)); -} - -static inline bool -nir_instr_is_last(nir_instr *instr) -{ - return exec_node_is_tail_sentinel(exec_node_get_next(&instr->node)); -} - -typedef struct nir_ssa_def { - /** for debugging only, can be NULL */ - const char* name; - - /** generic SSA definition index. */ - unsigned index; - - /** Index into the live_in and live_out bitfields */ - unsigned live_index; - - nir_instr *parent_instr; - - /** set of nir_instr's where this register is used (read from) */ - struct list_head uses; - - /** set of nir_if's where this register is used as a condition */ - struct list_head if_uses; - - uint8_t num_components; -} nir_ssa_def; - -struct nir_src; - -typedef struct { - nir_register *reg; - struct nir_src *indirect; /** < NULL for no indirect offset */ - unsigned base_offset; - - /* TODO use-def chain goes here */ -} nir_reg_src; - -typedef struct { - nir_instr *parent_instr; - struct list_head def_link; - - nir_register *reg; - struct nir_src *indirect; /** < NULL for no indirect offset */ - unsigned base_offset; - - /* TODO def-use chain goes here */ -} nir_reg_dest; - -struct nir_if; - -typedef struct nir_src { - union { - nir_instr *parent_instr; - struct nir_if *parent_if; - }; - - struct list_head use_link; - - union { - nir_reg_src reg; - nir_ssa_def *ssa; - }; - - bool is_ssa; -} nir_src; - -#define NIR_SRC_INIT (nir_src) { { NULL } } - -#define nir_foreach_use(reg_or_ssa_def, src) \ - list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->uses, use_link) - -#define nir_foreach_use_safe(reg_or_ssa_def, src) \ - list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->uses, use_link) - -#define nir_foreach_if_use(reg_or_ssa_def, src) \ - list_for_each_entry(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) - -#define nir_foreach_if_use_safe(reg_or_ssa_def, src) \ - list_for_each_entry_safe(nir_src, src, &(reg_or_ssa_def)->if_uses, use_link) - -typedef struct { - union { - nir_reg_dest reg; - nir_ssa_def ssa; - }; - - bool is_ssa; -} nir_dest; - -#define NIR_DEST_INIT (nir_dest) { { { NULL } } } - -#define nir_foreach_def(reg, dest) \ - list_for_each_entry(nir_dest, dest, &(reg)->defs, reg.def_link) - -#define nir_foreach_def_safe(reg, dest) \ - list_for_each_entry_safe(nir_dest, dest, &(reg)->defs, reg.def_link) - -static inline nir_src -nir_src_for_ssa(nir_ssa_def *def) -{ - nir_src src = NIR_SRC_INIT; - - src.is_ssa = true; - src.ssa = def; - - return src; -} - -static inline nir_src -nir_src_for_reg(nir_register *reg) -{ - nir_src src = NIR_SRC_INIT; - - src.is_ssa = false; - src.reg.reg = reg; - src.reg.indirect = NULL; - src.reg.base_offset = 0; - - return src; -} - -static inline nir_dest -nir_dest_for_reg(nir_register *reg) -{ - nir_dest dest = NIR_DEST_INIT; - - dest.reg.reg = reg; - - return dest; -} - -void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if); -void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr); - -typedef struct { - nir_src src; - - /** - * \name input modifiers - */ - /*@{*/ - /** - * For inputs interpreted as floating point, flips the sign bit. For - * inputs interpreted as integers, performs the two's complement negation. - */ - bool negate; - - /** - * Clears the sign bit for floating point values, and computes the integer - * absolute value for integers. Note that the negate modifier acts after - * the absolute value modifier, therefore if both are set then all inputs - * will become negative. - */ - bool abs; - /*@}*/ - - /** - * For each input component, says which component of the register it is - * chosen from. Note that which elements of the swizzle are used and which - * are ignored are based on the write mask for most opcodes - for example, - * a statement like "foo.xzw = bar.zyx" would have a writemask of 1101b and - * a swizzle of {2, x, 1, 0} where x means "don't care." - */ - uint8_t swizzle[4]; -} nir_alu_src; - -typedef struct { - nir_dest dest; - - /** - * \name saturate output modifier - * - * Only valid for opcodes that output floating-point numbers. Clamps the - * output to between 0.0 and 1.0 inclusive. - */ - - bool saturate; - - unsigned write_mask : 4; /* ignored if dest.is_ssa is true */ -} nir_alu_dest; - -typedef enum { - nir_type_invalid = 0, /* Not a valid type */ - nir_type_float, - nir_type_int, - nir_type_uint, - nir_type_bool -} nir_alu_type; - -typedef enum { - NIR_OP_IS_COMMUTATIVE = (1 << 0), - NIR_OP_IS_ASSOCIATIVE = (1 << 1), -} nir_op_algebraic_property; - -typedef struct { - const char *name; - - unsigned num_inputs; - - /** - * The number of components in the output - * - * If non-zero, this is the size of the output and input sizes are - * explicitly given; swizzle and writemask are still in effect, but if - * the output component is masked out, then the input component may - * still be in use. - * - * If zero, the opcode acts in the standard, per-component manner; the - * operation is performed on each component (except the ones that are - * masked out) with the input being taken from the input swizzle for - * that component. - * - * The size of some of the inputs may be given (i.e. non-zero) even - * though output_size is zero; in that case, the inputs with a zero - * size act per-component, while the inputs with non-zero size don't. - */ - unsigned output_size; - - /** - * The type of vector that the instruction outputs. Note that the - * staurate modifier is only allowed on outputs with the float type. - */ - - nir_alu_type output_type; - - /** - * The number of components in each input - */ - unsigned input_sizes[4]; - - /** - * The type of vector that each input takes. Note that negate and - * absolute value are only allowed on inputs with int or float type and - * behave differently on the two. - */ - nir_alu_type input_types[4]; - - nir_op_algebraic_property algebraic_properties; -} nir_op_info; - -extern const nir_op_info nir_op_infos[nir_num_opcodes]; - -typedef struct nir_alu_instr { - nir_instr instr; - nir_op op; - nir_alu_dest dest; - nir_alu_src src[]; -} nir_alu_instr; - -void nir_alu_src_copy(nir_alu_src *dest, const nir_alu_src *src, - nir_alu_instr *instr); -void nir_alu_dest_copy(nir_alu_dest *dest, const nir_alu_dest *src, - nir_alu_instr *instr); - -/* is this source channel used? */ -static inline bool -nir_alu_instr_channel_used(nir_alu_instr *instr, unsigned src, unsigned channel) -{ - if (nir_op_infos[instr->op].input_sizes[src] > 0) - return channel < nir_op_infos[instr->op].input_sizes[src]; - - return (instr->dest.write_mask >> channel) & 1; -} - -/* - * For instructions whose destinations are SSA, get the number of channels - * used for a source - */ -static inline unsigned -nir_ssa_alu_instr_src_components(const nir_alu_instr *instr, unsigned src) -{ - assert(instr->dest.dest.is_ssa); - - if (nir_op_infos[instr->op].input_sizes[src] > 0) - return nir_op_infos[instr->op].input_sizes[src]; - - return instr->dest.dest.ssa.num_components; -} - -typedef enum { - nir_deref_type_var, - nir_deref_type_array, - nir_deref_type_struct -} nir_deref_type; - -typedef struct nir_deref { - nir_deref_type deref_type; - struct nir_deref *child; - const struct glsl_type *type; -} nir_deref; - -typedef struct { - nir_deref deref; - - nir_variable *var; -} nir_deref_var; - -/* This enum describes how the array is referenced. If the deref is - * direct then the base_offset is used. If the deref is indirect then then - * offset is given by base_offset + indirect. If the deref is a wildcard - * then the deref refers to all of the elements of the array at the same - * time. Wildcard dereferences are only ever allowed in copy_var - * intrinsics and the source and destination derefs must have matching - * wildcards. - */ -typedef enum { - nir_deref_array_type_direct, - nir_deref_array_type_indirect, - nir_deref_array_type_wildcard, -} nir_deref_array_type; - -typedef struct { - nir_deref deref; - - nir_deref_array_type deref_array_type; - unsigned base_offset; - nir_src indirect; -} nir_deref_array; - -typedef struct { - nir_deref deref; - - unsigned index; -} nir_deref_struct; - -NIR_DEFINE_CAST(nir_deref_as_var, nir_deref, nir_deref_var, deref) -NIR_DEFINE_CAST(nir_deref_as_array, nir_deref, nir_deref_array, deref) -NIR_DEFINE_CAST(nir_deref_as_struct, nir_deref, nir_deref_struct, deref) - -/* Returns the last deref in the chain. */ -static inline nir_deref * -nir_deref_tail(nir_deref *deref) -{ - while (deref->child) - deref = deref->child; - return deref; -} - -typedef struct { - nir_instr instr; - - unsigned num_params; - nir_deref_var **params; - nir_deref_var *return_deref; - - struct nir_function *callee; -} nir_call_instr; - -#define INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, \ - num_variables, num_indices, flags) \ - nir_intrinsic_##name, - -#define LAST_INTRINSIC(name) nir_last_intrinsic = nir_intrinsic_##name, - -typedef enum { -#include "nir_intrinsics.h" - nir_num_intrinsics = nir_last_intrinsic + 1 -} nir_intrinsic_op; - -#undef INTRINSIC -#undef LAST_INTRINSIC - -/** Represents an intrinsic - * - * An intrinsic is an instruction type for handling things that are - * more-or-less regular operations but don't just consume and produce SSA - * values like ALU operations do. Intrinsics are not for things that have - * special semantic meaning such as phi nodes and parallel copies. - * Examples of intrinsics include variable load/store operations, system - * value loads, and the like. Even though texturing more-or-less falls - * under this category, texturing is its own instruction type because - * trying to represent texturing with intrinsics would lead to a - * combinatorial explosion of intrinsic opcodes. - * - * By having a single instruction type for handling a lot of different - * cases, optimization passes can look for intrinsics and, for the most - * part, completely ignore them. Each intrinsic type also has a few - * possible flags that govern whether or not they can be reordered or - * eliminated. That way passes like dead code elimination can still work - * on intrisics without understanding the meaning of each. - * - * Each intrinsic has some number of constant indices, some number of - * variables, and some number of sources. What these sources, variables, - * and indices mean depends on the intrinsic and is documented with the - * intrinsic declaration in nir_intrinsics.h. Intrinsics and texture - * instructions are the only types of instruction that can operate on - * variables. - */ -typedef struct { - nir_instr instr; - - nir_intrinsic_op intrinsic; - - nir_dest dest; - - /** number of components if this is a vectorized intrinsic - * - * Similarly to ALU operations, some intrinsics are vectorized. - * An intrinsic is vectorized if nir_intrinsic_infos.dest_components == 0. - * For vectorized intrinsics, the num_components field specifies the - * number of destination components and the number of source components - * for all sources with nir_intrinsic_infos.src_components[i] == 0. - */ - uint8_t num_components; - - int const_index[3]; - - nir_deref_var *variables[2]; - - nir_src src[]; -} nir_intrinsic_instr; - -/** - * \name NIR intrinsics semantic flags - * - * information about what the compiler can do with the intrinsics. - * - * \sa nir_intrinsic_info::flags - */ -typedef enum { - /** - * whether the intrinsic can be safely eliminated if none of its output - * value is not being used. - */ - NIR_INTRINSIC_CAN_ELIMINATE = (1 << 0), - - /** - * Whether the intrinsic can be reordered with respect to any other - * intrinsic, i.e. whether the only reordering dependencies of the - * intrinsic are due to the register reads/writes. - */ - NIR_INTRINSIC_CAN_REORDER = (1 << 1), -} nir_intrinsic_semantic_flag; - -#define NIR_INTRINSIC_MAX_INPUTS 4 - -typedef struct { - const char *name; - - unsigned num_srcs; /** < number of register/SSA inputs */ - - /** number of components of each input register - * - * If this value is 0, the number of components is given by the - * num_components field of nir_intrinsic_instr. - */ - unsigned src_components[NIR_INTRINSIC_MAX_INPUTS]; - - bool has_dest; - - /** number of components of the output register - * - * If this value is 0, the number of components is given by the - * num_components field of nir_intrinsic_instr. - */ - unsigned dest_components; - - /** the number of inputs/outputs that are variables */ - unsigned num_variables; - - /** the number of constant indices used by the intrinsic */ - unsigned num_indices; - - /** semantic flags for calls to this intrinsic */ - nir_intrinsic_semantic_flag flags; -} nir_intrinsic_info; - -extern const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics]; - -/** - * \group texture information - * - * This gives semantic information about textures which is useful to the - * frontend, the backend, and lowering passes, but not the optimizer. - */ - -typedef enum { - nir_tex_src_coord, - nir_tex_src_projector, - nir_tex_src_comparitor, /* shadow comparitor */ - nir_tex_src_offset, - nir_tex_src_bias, - nir_tex_src_lod, - nir_tex_src_ms_index, /* MSAA sample index */ - nir_tex_src_ddx, - nir_tex_src_ddy, - nir_tex_src_sampler_offset, /* < dynamically uniform indirect offset */ - nir_num_tex_src_types -} nir_tex_src_type; - -typedef struct { - nir_src src; - nir_tex_src_type src_type; -} nir_tex_src; - -typedef enum { - nir_texop_tex, /**< Regular texture look-up */ - nir_texop_txb, /**< Texture look-up with LOD bias */ - nir_texop_txl, /**< Texture look-up with explicit LOD */ - nir_texop_txd, /**< Texture look-up with partial derivatvies */ - nir_texop_txf, /**< Texel fetch with explicit LOD */ - nir_texop_txf_ms, /**< Multisample texture fetch */ - nir_texop_txs, /**< Texture size */ - nir_texop_lod, /**< Texture lod query */ - nir_texop_tg4, /**< Texture gather */ - nir_texop_query_levels, /**< Texture levels query */ - nir_texop_texture_samples, /**< Texture samples query */ - nir_texop_samples_identical, /**< Query whether all samples are definitely - * identical. - */ -} nir_texop; - -typedef struct { - nir_instr instr; - - enum glsl_sampler_dim sampler_dim; - nir_alu_type dest_type; - - nir_texop op; - nir_dest dest; - nir_tex_src *src; - unsigned num_srcs, coord_components; - bool is_array, is_shadow; - - /** - * If is_shadow is true, whether this is the old-style shadow that outputs 4 - * components or the new-style shadow that outputs 1 component. - */ - bool is_new_style_shadow; - - /* constant offset - must be 0 if the offset source is used */ - int const_offset[4]; - - /* gather component selector */ - unsigned component : 2; - - /** The sampler index - * - * If this texture instruction has a nir_tex_src_sampler_offset source, - * then the sampler index is given by sampler_index + sampler_offset. - */ - unsigned sampler_index; - - /** The size of the sampler array or 0 if it's not an array */ - unsigned sampler_array_size; - - nir_deref_var *sampler; /* if this is NULL, use sampler_index instead */ -} nir_tex_instr; - -static inline unsigned -nir_tex_instr_dest_size(nir_tex_instr *instr) -{ - switch (instr->op) { - case nir_texop_txs: { - unsigned ret; - switch (instr->sampler_dim) { - case GLSL_SAMPLER_DIM_1D: - case GLSL_SAMPLER_DIM_BUF: - ret = 1; - break; - case GLSL_SAMPLER_DIM_2D: - case GLSL_SAMPLER_DIM_CUBE: - case GLSL_SAMPLER_DIM_MS: - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_EXTERNAL: - ret = 2; - break; - case GLSL_SAMPLER_DIM_3D: - ret = 3; - break; - default: - unreachable("not reached"); - } - if (instr->is_array) - ret++; - return ret; - } - - case nir_texop_lod: - return 2; - - case nir_texop_texture_samples: - case nir_texop_query_levels: - case nir_texop_samples_identical: - return 1; - - default: - if (instr->is_shadow && instr->is_new_style_shadow) - return 1; - - return 4; - } -} - -/* Returns true if this texture operation queries something about the texture - * rather than actually sampling it. - */ -static inline bool -nir_tex_instr_is_query(nir_tex_instr *instr) -{ - switch (instr->op) { - case nir_texop_txs: - case nir_texop_lod: - case nir_texop_texture_samples: - case nir_texop_query_levels: - return true; - case nir_texop_tex: - case nir_texop_txb: - case nir_texop_txl: - case nir_texop_txd: - case nir_texop_txf: - case nir_texop_txf_ms: - case nir_texop_tg4: - return false; - default: - unreachable("Invalid texture opcode"); - } -} - -static inline unsigned -nir_tex_instr_src_size(nir_tex_instr *instr, unsigned src) -{ - if (instr->src[src].src_type == nir_tex_src_coord) - return instr->coord_components; - - - if (instr->src[src].src_type == nir_tex_src_offset || - instr->src[src].src_type == nir_tex_src_ddx || - instr->src[src].src_type == nir_tex_src_ddy) { - if (instr->is_array) - return instr->coord_components - 1; - else - return instr->coord_components; - } - - return 1; -} - -static inline int -nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type) -{ - for (unsigned i = 0; i < instr->num_srcs; i++) - if (instr->src[i].src_type == type) - return (int) i; - - return -1; -} - -typedef struct { - union { - float f[4]; - int32_t i[4]; - uint32_t u[4]; - }; -} nir_const_value; - -typedef struct { - nir_instr instr; - - nir_const_value value; - - nir_ssa_def def; -} nir_load_const_instr; - -typedef enum { - nir_jump_return, - nir_jump_break, - nir_jump_continue, -} nir_jump_type; - -typedef struct { - nir_instr instr; - nir_jump_type type; -} nir_jump_instr; - -/* creates a new SSA variable in an undefined state */ - -typedef struct { - nir_instr instr; - nir_ssa_def def; -} nir_ssa_undef_instr; - -typedef struct { - struct exec_node node; - - /* The predecessor block corresponding to this source */ - struct nir_block *pred; - - nir_src src; -} nir_phi_src; - -#define nir_foreach_phi_src(phi, entry) \ - foreach_list_typed(nir_phi_src, entry, node, &(phi)->srcs) -#define nir_foreach_phi_src_safe(phi, entry) \ - foreach_list_typed_safe(nir_phi_src, entry, node, &(phi)->srcs) - -typedef struct { - nir_instr instr; - - struct exec_list srcs; /** < list of nir_phi_src */ - - nir_dest dest; -} nir_phi_instr; - -typedef struct { - struct exec_node node; - nir_src src; - nir_dest dest; -} nir_parallel_copy_entry; - -#define nir_foreach_parallel_copy_entry(pcopy, entry) \ - foreach_list_typed(nir_parallel_copy_entry, entry, node, &(pcopy)->entries) - -typedef struct { - nir_instr instr; - - /* A list of nir_parallel_copy_entry's. The sources of all of the - * entries are copied to the corresponding destinations "in parallel". - * In other words, if we have two entries: a -> b and b -> a, the values - * get swapped. - */ - struct exec_list entries; -} nir_parallel_copy_instr; - -NIR_DEFINE_CAST(nir_instr_as_alu, nir_instr, nir_alu_instr, instr) -NIR_DEFINE_CAST(nir_instr_as_call, nir_instr, nir_call_instr, instr) -NIR_DEFINE_CAST(nir_instr_as_jump, nir_instr, nir_jump_instr, instr) -NIR_DEFINE_CAST(nir_instr_as_tex, nir_instr, nir_tex_instr, instr) -NIR_DEFINE_CAST(nir_instr_as_intrinsic, nir_instr, nir_intrinsic_instr, instr) -NIR_DEFINE_CAST(nir_instr_as_load_const, nir_instr, nir_load_const_instr, instr) -NIR_DEFINE_CAST(nir_instr_as_ssa_undef, nir_instr, nir_ssa_undef_instr, instr) -NIR_DEFINE_CAST(nir_instr_as_phi, nir_instr, nir_phi_instr, instr) -NIR_DEFINE_CAST(nir_instr_as_parallel_copy, nir_instr, - nir_parallel_copy_instr, instr) - -/* - * Control flow - * - * Control flow consists of a tree of control flow nodes, which include - * if-statements and loops. The leaves of the tree are basic blocks, lists of - * instructions that always run start-to-finish. Each basic block also keeps - * track of its successors (blocks which may run immediately after the current - * block) and predecessors (blocks which could have run immediately before the - * current block). Each function also has a start block and an end block which - * all return statements point to (which is always empty). Together, all the - * blocks with their predecessors and successors make up the control flow - * graph (CFG) of the function. There are helpers that modify the tree of - * control flow nodes while modifying the CFG appropriately; these should be - * used instead of modifying the tree directly. - */ - -typedef enum { - nir_cf_node_block, - nir_cf_node_if, - nir_cf_node_loop, - nir_cf_node_function -} nir_cf_node_type; - -typedef struct nir_cf_node { - struct exec_node node; - nir_cf_node_type type; - struct nir_cf_node *parent; -} nir_cf_node; - -typedef struct nir_block { - nir_cf_node cf_node; - - struct exec_list instr_list; /** < list of nir_instr */ - - /** generic block index; generated by nir_index_blocks */ - unsigned index; - - /* - * Each block can only have up to 2 successors, so we put them in a simple - * array - no need for anything more complicated. - */ - struct nir_block *successors[2]; - - /* Set of nir_block predecessors in the CFG */ - struct set *predecessors; - - /* - * this node's immediate dominator in the dominance tree - set to NULL for - * the start block. - */ - struct nir_block *imm_dom; - - /* This node's children in the dominance tree */ - unsigned num_dom_children; - struct nir_block **dom_children; - - /* Set of nir_block's on the dominance frontier of this block */ - struct set *dom_frontier; - - /* - * These two indices have the property that dom_{pre,post}_index for each - * child of this block in the dominance tree will always be between - * dom_pre_index and dom_post_index for this block, which makes testing if - * a given block is dominated by another block an O(1) operation. - */ - unsigned dom_pre_index, dom_post_index; - - /* live in and out for this block; used for liveness analysis */ - BITSET_WORD *live_in; - BITSET_WORD *live_out; -} nir_block; - -static inline nir_instr * -nir_block_first_instr(nir_block *block) -{ - struct exec_node *head = exec_list_get_head(&block->instr_list); - return exec_node_data(nir_instr, head, node); -} - -static inline nir_instr * -nir_block_last_instr(nir_block *block) -{ - struct exec_node *tail = exec_list_get_tail(&block->instr_list); - return exec_node_data(nir_instr, tail, node); -} - -#define nir_foreach_instr(block, instr) \ - foreach_list_typed(nir_instr, instr, node, &(block)->instr_list) -#define nir_foreach_instr_reverse(block, instr) \ - foreach_list_typed_reverse(nir_instr, instr, node, &(block)->instr_list) -#define nir_foreach_instr_safe(block, instr) \ - foreach_list_typed_safe(nir_instr, instr, node, &(block)->instr_list) -#define nir_foreach_instr_reverse_safe(block, instr) \ - foreach_list_typed_reverse_safe(nir_instr, instr, node, &(block)->instr_list) - -typedef struct nir_if { - nir_cf_node cf_node; - nir_src condition; - - struct exec_list then_list; /** < list of nir_cf_node */ - struct exec_list else_list; /** < list of nir_cf_node */ -} nir_if; - -static inline nir_cf_node * -nir_if_first_then_node(nir_if *if_stmt) -{ - struct exec_node *head = exec_list_get_head(&if_stmt->then_list); - return exec_node_data(nir_cf_node, head, node); -} - -static inline nir_cf_node * -nir_if_last_then_node(nir_if *if_stmt) -{ - struct exec_node *tail = exec_list_get_tail(&if_stmt->then_list); - return exec_node_data(nir_cf_node, tail, node); -} - -static inline nir_cf_node * -nir_if_first_else_node(nir_if *if_stmt) -{ - struct exec_node *head = exec_list_get_head(&if_stmt->else_list); - return exec_node_data(nir_cf_node, head, node); -} - -static inline nir_cf_node * -nir_if_last_else_node(nir_if *if_stmt) -{ - struct exec_node *tail = exec_list_get_tail(&if_stmt->else_list); - return exec_node_data(nir_cf_node, tail, node); -} - -typedef struct { - nir_cf_node cf_node; - - struct exec_list body; /** < list of nir_cf_node */ -} nir_loop; - -static inline nir_cf_node * -nir_loop_first_cf_node(nir_loop *loop) -{ - return exec_node_data(nir_cf_node, exec_list_get_head(&loop->body), node); -} - -static inline nir_cf_node * -nir_loop_last_cf_node(nir_loop *loop) -{ - return exec_node_data(nir_cf_node, exec_list_get_tail(&loop->body), node); -} - -/** - * Various bits of metadata that can may be created or required by - * optimization and analysis passes - */ -typedef enum { - nir_metadata_none = 0x0, - nir_metadata_block_index = 0x1, - nir_metadata_dominance = 0x2, - nir_metadata_live_ssa_defs = 0x4, - nir_metadata_not_properly_reset = 0x8, -} nir_metadata; - -typedef struct { - nir_cf_node cf_node; - - /** pointer to the function of which this is an implementation */ - struct nir_function *function; - - struct exec_list body; /** < list of nir_cf_node */ - - nir_block *end_block; - - /** list for all local variables in the function */ - struct exec_list locals; - - /** array of variables used as parameters */ - unsigned num_params; - nir_variable **params; - - /** variable used to hold the result of the function */ - nir_variable *return_var; - - /** list of local registers in the function */ - struct exec_list registers; - - /** next available local register index */ - unsigned reg_alloc; - - /** next available SSA value index */ - unsigned ssa_alloc; - - /* total number of basic blocks, only valid when block_index_dirty = false */ - unsigned num_blocks; - - nir_metadata valid_metadata; -} nir_function_impl; - -static inline nir_block * -nir_start_block(nir_function_impl *impl) -{ - return (nir_block *) exec_list_get_head(&impl->body); -} - -static inline nir_cf_node * -nir_cf_node_next(nir_cf_node *node) -{ - struct exec_node *next = exec_node_get_next(&node->node); - if (exec_node_is_tail_sentinel(next)) - return NULL; - else - return exec_node_data(nir_cf_node, next, node); -} - -static inline nir_cf_node * -nir_cf_node_prev(nir_cf_node *node) -{ - struct exec_node *prev = exec_node_get_prev(&node->node); - if (exec_node_is_head_sentinel(prev)) - return NULL; - else - return exec_node_data(nir_cf_node, prev, node); -} - -static inline bool -nir_cf_node_is_first(const nir_cf_node *node) -{ - return exec_node_is_head_sentinel(node->node.prev); -} - -static inline bool -nir_cf_node_is_last(const nir_cf_node *node) -{ - return exec_node_is_tail_sentinel(node->node.next); -} - -NIR_DEFINE_CAST(nir_cf_node_as_block, nir_cf_node, nir_block, cf_node) -NIR_DEFINE_CAST(nir_cf_node_as_if, nir_cf_node, nir_if, cf_node) -NIR_DEFINE_CAST(nir_cf_node_as_loop, nir_cf_node, nir_loop, cf_node) -NIR_DEFINE_CAST(nir_cf_node_as_function, nir_cf_node, nir_function_impl, cf_node) - -typedef enum { - nir_parameter_in, - nir_parameter_out, - nir_parameter_inout, -} nir_parameter_type; - -typedef struct { - nir_parameter_type param_type; - const struct glsl_type *type; -} nir_parameter; - -typedef struct nir_function { - struct exec_node node; - - const char *name; - struct nir_shader *shader; - - unsigned num_params; - nir_parameter *params; - const struct glsl_type *return_type; - - /** The implementation of this function. - * - * If the function is only declared and not implemented, this is NULL. - */ - nir_function_impl *impl; -} nir_function; - -typedef struct nir_shader_compiler_options { - bool lower_fdiv; - bool lower_ffma; - bool lower_flrp; - bool lower_fpow; - bool lower_fsat; - bool lower_fsqrt; - bool lower_fmod; - bool lower_bitfield_extract; - bool lower_bitfield_insert; - bool lower_uadd_carry; - bool lower_usub_borrow; - /** lowers fneg and ineg to fsub and isub. */ - bool lower_negate; - /** lowers fsub and isub to fadd+fneg and iadd+ineg. */ - bool lower_sub; - - /* lower {slt,sge,seq,sne} to {flt,fge,feq,fne} + b2f: */ - bool lower_scmp; - - /* Does the native fdot instruction replicate its result for four - * components? If so, then opt_algebraic_late will turn all fdotN - * instructions into fdot_replicatedN instructions. - */ - bool fdot_replicates; - - /** lowers ffract to fsub+ffloor: */ - bool lower_ffract; - - /** - * Does the driver support real 32-bit integers? (Otherwise, integers - * are simulated by floats.) - */ - bool native_integers; -} nir_shader_compiler_options; - -typedef struct nir_shader_info { - const char *name; - - /* Descriptive name provided by the client; may be NULL */ - const char *label; - - /* Number of textures used by this shader */ - unsigned num_textures; - /* Number of uniform buffers used by this shader */ - unsigned num_ubos; - /* Number of atomic buffers used by this shader */ - unsigned num_abos; - /* Number of shader storage buffers used by this shader */ - unsigned num_ssbos; - /* Number of images used by this shader */ - unsigned num_images; - - /* Which inputs are actually read */ - uint64_t inputs_read; - /* Which outputs are actually written */ - uint64_t outputs_written; - /* Which system values are actually read */ - uint64_t system_values_read; - - /* Which patch inputs are actually read */ - uint32_t patch_inputs_read; - /* Which patch outputs are actually written */ - uint32_t patch_outputs_written; - - /* Whether or not this shader ever uses textureGather() */ - bool uses_texture_gather; - - /* Whether or not this shader uses the gl_ClipDistance output */ - bool uses_clip_distance_out; - - /* Whether or not separate shader objects were used */ - bool separate_shader; - - /** Was this shader linked with any transform feedback varyings? */ - bool has_transform_feedback_varyings; - - union { - struct { - /** The number of vertices recieves per input primitive */ - unsigned vertices_in; - - /** The output primitive type (GL enum value) */ - unsigned output_primitive; - - /** The maximum number of vertices the geometry shader might write. */ - unsigned vertices_out; - - /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ - unsigned invocations; - - /** Whether or not this shader uses EndPrimitive */ - bool uses_end_primitive; - - /** Whether or not this shader uses non-zero streams */ - bool uses_streams; - } gs; - - struct { - bool uses_discard; - - /** - * Whether early fragment tests are enabled as defined by - * ARB_shader_image_load_store. - */ - bool early_fragment_tests; - - /** gl_FragDepth layout for ARB_conservative_depth. */ - enum gl_frag_depth_layout depth_layout; - } fs; - - struct { - unsigned local_size[3]; - } cs; - - struct { - /** The number of vertices in the TCS output patch. */ - unsigned vertices_out; - } tcs; - }; -} nir_shader_info; - -typedef struct nir_shader { - /** list of uniforms (nir_variable) */ - struct exec_list uniforms; - - /** list of inputs (nir_variable) */ - struct exec_list inputs; - - /** list of outputs (nir_variable) */ - struct exec_list outputs; - - /** Set of driver-specific options for the shader. - * - * The memory for the options is expected to be kept in a single static - * copy by the driver. - */ - const struct nir_shader_compiler_options *options; - - /** Various bits of compile-time information about a given shader */ - struct nir_shader_info info; - - /** list of global variables in the shader (nir_variable) */ - struct exec_list globals; - - /** list of system value variables in the shader (nir_variable) */ - struct exec_list system_values; - - struct exec_list functions; /** < list of nir_function */ - - /** list of global register in the shader */ - struct exec_list registers; - - /** next available global register index */ - unsigned reg_alloc; - - /** - * the highest index a load_input_*, load_uniform_*, etc. intrinsic can - * access plus one - */ - unsigned num_inputs, num_uniforms, num_outputs; - - /** The shader stage, such as MESA_SHADER_VERTEX. */ - gl_shader_stage stage; -} nir_shader; - -#define nir_foreach_function(shader, func) \ - foreach_list_typed(nir_function, func, node, &(shader)->functions) - -nir_shader *nir_shader_create(void *mem_ctx, - gl_shader_stage stage, - const nir_shader_compiler_options *options); - -/** creates a register, including assigning it an index and adding it to the list */ -nir_register *nir_global_reg_create(nir_shader *shader); - -nir_register *nir_local_reg_create(nir_function_impl *impl); - -void nir_reg_remove(nir_register *reg); - -/** Adds a variable to the appropreate list in nir_shader */ -void nir_shader_add_variable(nir_shader *shader, nir_variable *var); - -static inline void -nir_function_impl_add_variable(nir_function_impl *impl, nir_variable *var) -{ - assert(var->data.mode == nir_var_local); - exec_list_push_tail(&impl->locals, &var->node); -} - -/** creates a variable, sets a few defaults, and adds it to the list */ -nir_variable *nir_variable_create(nir_shader *shader, - nir_variable_mode mode, - const struct glsl_type *type, - const char *name); -/** creates a local variable and adds it to the list */ -nir_variable *nir_local_variable_create(nir_function_impl *impl, - const struct glsl_type *type, - const char *name); - -/** creates a function and adds it to the shader's list of functions */ -nir_function *nir_function_create(nir_shader *shader, const char *name); - -nir_function_impl *nir_function_impl_create(nir_function *func); - -nir_block *nir_block_create(nir_shader *shader); -nir_if *nir_if_create(nir_shader *shader); -nir_loop *nir_loop_create(nir_shader *shader); - -nir_function_impl *nir_cf_node_get_function(nir_cf_node *node); - -/** requests that the given pieces of metadata be generated */ -void nir_metadata_require(nir_function_impl *impl, nir_metadata required); -/** dirties all but the preserved metadata */ -void nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved); - -/** creates an instruction with default swizzle/writemask/etc. with NULL registers */ -nir_alu_instr *nir_alu_instr_create(nir_shader *shader, nir_op op); - -nir_jump_instr *nir_jump_instr_create(nir_shader *shader, nir_jump_type type); - -nir_load_const_instr *nir_load_const_instr_create(nir_shader *shader, - unsigned num_components); - -nir_intrinsic_instr *nir_intrinsic_instr_create(nir_shader *shader, - nir_intrinsic_op op); - -nir_call_instr *nir_call_instr_create(nir_shader *shader, - nir_function *callee); - -nir_tex_instr *nir_tex_instr_create(nir_shader *shader, unsigned num_srcs); - -nir_phi_instr *nir_phi_instr_create(nir_shader *shader); - -nir_parallel_copy_instr *nir_parallel_copy_instr_create(nir_shader *shader); - -nir_ssa_undef_instr *nir_ssa_undef_instr_create(nir_shader *shader, - unsigned num_components); - -nir_deref_var *nir_deref_var_create(void *mem_ctx, nir_variable *var); -nir_deref_array *nir_deref_array_create(void *mem_ctx); -nir_deref_struct *nir_deref_struct_create(void *mem_ctx, unsigned field_index); - -nir_deref *nir_copy_deref(void *mem_ctx, nir_deref *deref); - -nir_load_const_instr * -nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref); - -/** - * NIR Cursors and Instruction Insertion API - * @{ - * - * A tiny struct representing a point to insert/extract instructions or - * control flow nodes. Helps reduce the combinatorial explosion of possible - * points to insert/extract. - * - * \sa nir_control_flow.h - */ -typedef enum { - nir_cursor_before_block, - nir_cursor_after_block, - nir_cursor_before_instr, - nir_cursor_after_instr, -} nir_cursor_option; - -typedef struct { - nir_cursor_option option; - union { - nir_block *block; - nir_instr *instr; - }; -} nir_cursor; - -static inline nir_cursor -nir_before_block(nir_block *block) -{ - nir_cursor cursor; - cursor.option = nir_cursor_before_block; - cursor.block = block; - return cursor; -} - -static inline nir_cursor -nir_after_block(nir_block *block) -{ - nir_cursor cursor; - cursor.option = nir_cursor_after_block; - cursor.block = block; - return cursor; -} - -static inline nir_cursor -nir_before_instr(nir_instr *instr) -{ - nir_cursor cursor; - cursor.option = nir_cursor_before_instr; - cursor.instr = instr; - return cursor; -} - -static inline nir_cursor -nir_after_instr(nir_instr *instr) -{ - nir_cursor cursor; - cursor.option = nir_cursor_after_instr; - cursor.instr = instr; - return cursor; -} - -static inline nir_cursor -nir_after_block_before_jump(nir_block *block) -{ - nir_instr *last_instr = nir_block_last_instr(block); - if (last_instr && last_instr->type == nir_instr_type_jump) { - return nir_before_instr(last_instr); - } else { - return nir_after_block(block); - } -} - -static inline nir_cursor -nir_before_cf_node(nir_cf_node *node) -{ - if (node->type == nir_cf_node_block) - return nir_before_block(nir_cf_node_as_block(node)); - - return nir_after_block(nir_cf_node_as_block(nir_cf_node_prev(node))); -} - -static inline nir_cursor -nir_after_cf_node(nir_cf_node *node) -{ - if (node->type == nir_cf_node_block) - return nir_after_block(nir_cf_node_as_block(node)); - - return nir_before_block(nir_cf_node_as_block(nir_cf_node_next(node))); -} - -static inline nir_cursor -nir_before_cf_list(struct exec_list *cf_list) -{ - nir_cf_node *first_node = exec_node_data(nir_cf_node, - exec_list_get_head(cf_list), node); - return nir_before_cf_node(first_node); -} - -static inline nir_cursor -nir_after_cf_list(struct exec_list *cf_list) -{ - nir_cf_node *last_node = exec_node_data(nir_cf_node, - exec_list_get_tail(cf_list), node); - return nir_after_cf_node(last_node); -} - -/** - * Insert a NIR instruction at the given cursor. - * - * Note: This does not update the cursor. - */ -void nir_instr_insert(nir_cursor cursor, nir_instr *instr); - -static inline void -nir_instr_insert_before(nir_instr *instr, nir_instr *before) -{ - nir_instr_insert(nir_before_instr(instr), before); -} - -static inline void -nir_instr_insert_after(nir_instr *instr, nir_instr *after) -{ - nir_instr_insert(nir_after_instr(instr), after); -} - -static inline void -nir_instr_insert_before_block(nir_block *block, nir_instr *before) -{ - nir_instr_insert(nir_before_block(block), before); -} - -static inline void -nir_instr_insert_after_block(nir_block *block, nir_instr *after) -{ - nir_instr_insert(nir_after_block(block), after); -} - -static inline void -nir_instr_insert_before_cf(nir_cf_node *node, nir_instr *before) -{ - nir_instr_insert(nir_before_cf_node(node), before); -} - -static inline void -nir_instr_insert_after_cf(nir_cf_node *node, nir_instr *after) -{ - nir_instr_insert(nir_after_cf_node(node), after); -} - -static inline void -nir_instr_insert_before_cf_list(struct exec_list *list, nir_instr *before) -{ - nir_instr_insert(nir_before_cf_list(list), before); -} - -static inline void -nir_instr_insert_after_cf_list(struct exec_list *list, nir_instr *after) -{ - nir_instr_insert(nir_after_cf_list(list), after); -} - -void nir_instr_remove(nir_instr *instr); - -/** @} */ - -typedef bool (*nir_foreach_ssa_def_cb)(nir_ssa_def *def, void *state); -typedef bool (*nir_foreach_dest_cb)(nir_dest *dest, void *state); -typedef bool (*nir_foreach_src_cb)(nir_src *src, void *state); -bool nir_foreach_ssa_def(nir_instr *instr, nir_foreach_ssa_def_cb cb, - void *state); -bool nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state); -bool nir_foreach_src(nir_instr *instr, nir_foreach_src_cb cb, void *state); - -nir_const_value *nir_src_as_const_value(nir_src src); -bool nir_src_is_dynamically_uniform(nir_src src); -bool nir_srcs_equal(nir_src src1, nir_src src2); -void nir_instr_rewrite_src(nir_instr *instr, nir_src *src, nir_src new_src); -void nir_instr_move_src(nir_instr *dest_instr, nir_src *dest, nir_src *src); -void nir_if_rewrite_condition(nir_if *if_stmt, nir_src new_src); -void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, - nir_dest new_dest); - -void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest, - unsigned num_components, const char *name); -void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def, - unsigned num_components, const char *name); -void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src); -void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src, - nir_instr *after_me); - -/* visits basic blocks in source-code order */ -typedef bool (*nir_foreach_block_cb)(nir_block *block, void *state); -bool nir_foreach_block(nir_function_impl *impl, nir_foreach_block_cb cb, - void *state); -bool nir_foreach_block_reverse(nir_function_impl *impl, nir_foreach_block_cb cb, - void *state); -bool nir_foreach_block_in_cf_node(nir_cf_node *node, nir_foreach_block_cb cb, - void *state); - -/* If the following CF node is an if, this function returns that if. - * Otherwise, it returns NULL. - */ -nir_if *nir_block_get_following_if(nir_block *block); - -nir_loop *nir_block_get_following_loop(nir_block *block); - -void nir_index_local_regs(nir_function_impl *impl); -void nir_index_global_regs(nir_shader *shader); -void nir_index_ssa_defs(nir_function_impl *impl); -unsigned nir_index_instrs(nir_function_impl *impl); - -void nir_index_blocks(nir_function_impl *impl); - -void nir_print_shader(nir_shader *shader, FILE *fp); -void nir_print_instr(const nir_instr *instr, FILE *fp); - -nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s); - -#ifdef DEBUG -void nir_validate_shader(nir_shader *shader); -void nir_metadata_set_validation_flag(nir_shader *shader); -void nir_metadata_check_validation_flag(nir_shader *shader); - -#include "util/debug.h" -static inline bool -should_clone_nir(void) -{ - static int should_clone = -1; - if (should_clone < 0) - should_clone = env_var_as_boolean("NIR_TEST_CLONE", false); - - return should_clone; -} -#else -static inline void nir_validate_shader(nir_shader *shader) { (void) shader; } -static inline void nir_metadata_set_validation_flag(nir_shader *shader) { (void) shader; } -static inline void nir_metadata_check_validation_flag(nir_shader *shader) { (void) shader; } -static inline bool should_clone_nir(void) { return false; } -#endif /* DEBUG */ - -#define _PASS(nir, do_pass) do { \ - do_pass \ - nir_validate_shader(nir); \ - if (should_clone_nir()) { \ - nir_shader *clone = nir_shader_clone(ralloc_parent(nir), nir); \ - ralloc_free(nir); \ - nir = clone; \ - } \ -} while (0) - -#define NIR_PASS(progress, nir, pass, ...) _PASS(nir, \ - nir_metadata_set_validation_flag(nir); \ - if (pass(nir, ##__VA_ARGS__)) { \ - progress = true; \ - nir_metadata_check_validation_flag(nir); \ - } \ -) - -#define NIR_PASS_V(nir, pass, ...) _PASS(nir, \ - pass(nir, ##__VA_ARGS__); \ -) - -void nir_calc_dominance_impl(nir_function_impl *impl); -void nir_calc_dominance(nir_shader *shader); - -nir_block *nir_dominance_lca(nir_block *b1, nir_block *b2); -bool nir_block_dominates(nir_block *parent, nir_block *child); - -void nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp); -void nir_dump_dom_tree(nir_shader *shader, FILE *fp); - -void nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp); -void nir_dump_dom_frontier(nir_shader *shader, FILE *fp); - -void nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp); -void nir_dump_cfg(nir_shader *shader, FILE *fp); - -int nir_gs_count_vertices(const nir_shader *shader); - -bool nir_split_var_copies(nir_shader *shader); - -void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); -void nir_lower_var_copies(nir_shader *shader); - -bool nir_lower_global_vars_to_local(nir_shader *shader); - -bool nir_lower_locals_to_regs(nir_shader *shader); - -void nir_lower_outputs_to_temporaries(nir_shader *shader); - -void nir_assign_var_locations(struct exec_list *var_list, - unsigned *size, - int (*type_size)(const struct glsl_type *)); - -void nir_lower_io(nir_shader *shader, - nir_variable_mode mode, - int (*type_size)(const struct glsl_type *)); -nir_src *nir_get_io_offset_src(nir_intrinsic_instr *instr); -nir_src *nir_get_io_vertex_index_src(nir_intrinsic_instr *instr); - -void nir_lower_vars_to_ssa(nir_shader *shader); - -bool nir_remove_dead_variables(nir_shader *shader); - -void nir_move_vec_src_uses_to_dest(nir_shader *shader); -bool nir_lower_vec_to_movs(nir_shader *shader); -void nir_lower_alu_to_scalar(nir_shader *shader); -void nir_lower_load_const_to_scalar(nir_shader *shader); - -void nir_lower_phis_to_scalar(nir_shader *shader); - -void nir_lower_samplers(nir_shader *shader, - const struct gl_shader_program *shader_program); - -bool nir_lower_system_values(nir_shader *shader); - -typedef struct nir_lower_tex_options { - /** - * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which - * sampler types a texture projector is lowered. - */ - unsigned lower_txp; - - /** - * If true, lower rect textures to 2D, using txs to fetch the - * texture dimensions and dividing the texture coords by the - * texture dims to normalize. - */ - bool lower_rect; - - /** - * To emulate certain texture wrap modes, this can be used - * to saturate the specified tex coord to [0.0, 1.0]. The - * bits are according to sampler #, ie. if, for example: - * - * (conf->saturate_s & (1 << n)) - * - * is true, then the s coord for sampler n is saturated. - * - * Note that clamping must happen *after* projector lowering - * so any projected texture sample instruction with a clamped - * coordinate gets automatically lowered, regardless of the - * 'lower_txp' setting. - */ - unsigned saturate_s; - unsigned saturate_t; - unsigned saturate_r; - - /* Bitmask of samplers that need swizzling. - * - * If (swizzle_result & (1 << sampler_index)), then the swizzle in - * swizzles[sampler_index] is applied to the result of the texturing - * operation. - */ - unsigned swizzle_result; - - /* A swizzle for each sampler. Values 0-3 represent x, y, z, or w swizzles - * while 4 and 5 represent 0 and 1 respectively. - */ - uint8_t swizzles[32][4]; -} nir_lower_tex_options; - -bool nir_lower_tex(nir_shader *shader, - const nir_lower_tex_options *options); - -void nir_lower_idiv(nir_shader *shader); - -void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables); -void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); - -void nir_lower_two_sided_color(nir_shader *shader); - -void nir_lower_atomics(nir_shader *shader, - const struct gl_shader_program *shader_program); -void nir_lower_to_source_mods(nir_shader *shader); - -bool nir_lower_gs_intrinsics(nir_shader *shader); - -bool nir_normalize_cubemap_coords(nir_shader *shader); - -void nir_live_ssa_defs_impl(nir_function_impl *impl); -bool nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b); - -void nir_convert_to_ssa_impl(nir_function_impl *impl); -void nir_convert_to_ssa(nir_shader *shader); - -/* If phi_webs_only is true, only convert SSA values involved in phi nodes to - * registers. If false, convert all values (even those not involved in a phi - * node) to registers. - */ -void nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only); - -bool nir_opt_algebraic(nir_shader *shader); -bool nir_opt_algebraic_late(nir_shader *shader); -bool nir_opt_constant_folding(nir_shader *shader); - -bool nir_opt_global_to_local(nir_shader *shader); - -bool nir_copy_prop(nir_shader *shader); - -bool nir_opt_cse(nir_shader *shader); - -bool nir_opt_dce(nir_shader *shader); - -bool nir_opt_dead_cf(nir_shader *shader); - -void nir_opt_gcm(nir_shader *shader); - -bool nir_opt_peephole_select(nir_shader *shader); - -bool nir_opt_remove_phis(nir_shader *shader); - -bool nir_opt_undef(nir_shader *shader); - -void nir_sweep(nir_shader *shader); - -nir_intrinsic_op nir_intrinsic_from_system_value(gl_system_value val); -gl_system_value nir_system_value_from_intrinsic(nir_intrinsic_op intrin); - -#ifdef __cplusplus -} /* extern "C" */ -#endif diff --git a/src/glsl/nir/nir_algebraic.py b/src/glsl/nir/nir_algebraic.py deleted file mode 100644 index a30652f2afd..00000000000 --- a/src/glsl/nir/nir_algebraic.py +++ /dev/null @@ -1,305 +0,0 @@ -#! /usr/bin/env python -# -# Copyright (C) 2014 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# -# Authors: -# Jason Ekstrand (jason@jlekstrand.net) - -import itertools -import struct -import sys -import mako.template -import re - -# Represents a set of variables, each with a unique id -class VarSet(object): - def __init__(self): - self.names = {} - self.ids = itertools.count() - self.immutable = False; - - def __getitem__(self, name): - if name not in self.names: - assert not self.immutable, "Unknown replacement variable: " + name - self.names[name] = self.ids.next() - - return self.names[name] - - def lock(self): - self.immutable = True - -class Value(object): - @staticmethod - def create(val, name_base, varset): - if isinstance(val, tuple): - return Expression(val, name_base, varset) - elif isinstance(val, Expression): - return val - elif isinstance(val, (str, unicode)): - return Variable(val, name_base, varset) - elif isinstance(val, (bool, int, long, float)): - return Constant(val, name_base) - - __template = mako.template.Template(""" -static const ${val.c_type} ${val.name} = { - { ${val.type_enum} }, -% if isinstance(val, Constant): - { ${hex(val)} /* ${val.value} */ }, -% elif isinstance(val, Variable): - ${val.index}, /* ${val.var_name} */ - ${'true' if val.is_constant else 'false'}, - nir_type_${ val.required_type or 'invalid' }, -% elif isinstance(val, Expression): - nir_op_${val.opcode}, - { ${', '.join(src.c_ptr for src in val.sources)} }, -% endif -};""") - - def __init__(self, name, type_str): - self.name = name - self.type_str = type_str - - @property - def type_enum(self): - return "nir_search_value_" + self.type_str - - @property - def c_type(self): - return "nir_search_" + self.type_str - - @property - def c_ptr(self): - return "&{0}.value".format(self.name) - - def render(self): - return self.__template.render(val=self, - Constant=Constant, - Variable=Variable, - Expression=Expression) - -class Constant(Value): - def __init__(self, val, name): - Value.__init__(self, name, "constant") - self.value = val - - def __hex__(self): - # Even if it's an integer, we still need to unpack as an unsigned - # int. This is because, without C99, we can only assign to the first - # element of a union in an initializer. - if isinstance(self.value, (bool)): - return 'NIR_TRUE' if self.value else 'NIR_FALSE' - if isinstance(self.value, (int, long)): - return hex(struct.unpack('I', struct.pack('i', self.value))[0]) - elif isinstance(self.value, float): - return hex(struct.unpack('I', struct.pack('f', self.value))[0]) - else: - assert False - -_var_name_re = re.compile(r"(?P#)?(?P\w+)(?:@(?P\w+))?") - -class Variable(Value): - def __init__(self, val, name, varset): - Value.__init__(self, name, "variable") - - m = _var_name_re.match(val) - assert m and m.group('name') is not None - - self.var_name = m.group('name') - self.is_constant = m.group('const') is not None - self.required_type = m.group('type') - - if self.required_type is not None: - assert self.required_type in ('float', 'bool', 'int', 'unsigned') - - self.index = varset[self.var_name] - -class Expression(Value): - def __init__(self, expr, name_base, varset): - Value.__init__(self, name_base, "expression") - assert isinstance(expr, tuple) - - self.opcode = expr[0] - self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset) - for (i, src) in enumerate(expr[1:]) ] - - def render(self): - srcs = "\n".join(src.render() for src in self.sources) - return srcs + super(Expression, self).render() - -_optimization_ids = itertools.count() - -condition_list = ['true'] - -class SearchAndReplace(object): - def __init__(self, transform): - self.id = _optimization_ids.next() - - search = transform[0] - replace = transform[1] - if len(transform) > 2: - self.condition = transform[2] - else: - self.condition = 'true' - - if self.condition not in condition_list: - condition_list.append(self.condition) - self.condition_index = condition_list.index(self.condition) - - varset = VarSet() - if isinstance(search, Expression): - self.search = search - else: - self.search = Expression(search, "search{0}".format(self.id), varset) - - varset.lock() - - if isinstance(replace, Value): - self.replace = replace - else: - self.replace = Value.create(replace, "replace{0}".format(self.id), varset) - -_algebraic_pass_template = mako.template.Template(""" -#include "nir.h" -#include "nir_search.h" - -#ifndef NIR_OPT_ALGEBRAIC_STRUCT_DEFS -#define NIR_OPT_ALGEBRAIC_STRUCT_DEFS - -struct transform { - const nir_search_expression *search; - const nir_search_value *replace; - unsigned condition_offset; -}; - -struct opt_state { - void *mem_ctx; - bool progress; - const bool *condition_flags; -}; - -#endif - -% for (opcode, xform_list) in xform_dict.iteritems(): -% for xform in xform_list: - ${xform.search.render()} - ${xform.replace.render()} -% endfor - -static const struct transform ${pass_name}_${opcode}_xforms[] = { -% for xform in xform_list: - { &${xform.search.name}, ${xform.replace.c_ptr}, ${xform.condition_index} }, -% endfor -}; -% endfor - -static bool -${pass_name}_block(nir_block *block, void *void_state) -{ - struct opt_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_alu) - continue; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - if (!alu->dest.dest.is_ssa) - continue; - - switch (alu->op) { - % for opcode in xform_dict.keys(): - case nir_op_${opcode}: - for (unsigned i = 0; i < ARRAY_SIZE(${pass_name}_${opcode}_xforms); i++) { - const struct transform *xform = &${pass_name}_${opcode}_xforms[i]; - if (state->condition_flags[xform->condition_offset] && - nir_replace_instr(alu, xform->search, xform->replace, - state->mem_ctx)) { - state->progress = true; - break; - } - } - break; - % endfor - default: - break; - } - } - - return true; -} - -static bool -${pass_name}_impl(nir_function_impl *impl, const bool *condition_flags) -{ - struct opt_state state; - - state.mem_ctx = ralloc_parent(impl); - state.progress = false; - state.condition_flags = condition_flags; - - nir_foreach_block(impl, ${pass_name}_block, &state); - - if (state.progress) - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - return state.progress; -} - - -bool -${pass_name}(nir_shader *shader) -{ - bool progress = false; - bool condition_flags[${len(condition_list)}]; - const nir_shader_compiler_options *options = shader->options; - - % for index, condition in enumerate(condition_list): - condition_flags[${index}] = ${condition}; - % endfor - - nir_foreach_function(shader, function) { - if (function->impl) - progress |= ${pass_name}_impl(function->impl, condition_flags); - } - - return progress; -} -""") - -class AlgebraicPass(object): - def __init__(self, pass_name, transforms): - self.xform_dict = {} - self.pass_name = pass_name - - for xform in transforms: - if not isinstance(xform, SearchAndReplace): - xform = SearchAndReplace(xform) - - if xform.search.opcode not in self.xform_dict: - self.xform_dict[xform.search.opcode] = [] - - self.xform_dict[xform.search.opcode].append(xform) - - def render(self): - return _algebraic_pass_template.render(pass_name=self.pass_name, - xform_dict=self.xform_dict, - condition_list=condition_list) diff --git a/src/glsl/nir/nir_array.h b/src/glsl/nir/nir_array.h deleted file mode 100644 index 1db4e8cea36..00000000000 --- a/src/glsl/nir/nir_array.h +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -typedef struct { - void *mem_ctx; - size_t size; - size_t alloc; - void *data; -} nir_array; - -static inline void -nir_array_init(nir_array *arr, void *mem_ctx) -{ - arr->mem_ctx = mem_ctx; - arr->size = 0; - arr->alloc = 0; - arr->data = NULL; -} - -static inline void -nir_array_fini(nir_array *arr) -{ - if (arr->mem_ctx) - ralloc_free(arr->data); - else - free(arr->data); -} - -#define NIR_ARRAY_INITIAL_SIZE 64 - -/* Increments the size of the array by the given ammount and returns a - * pointer to the beginning of the newly added space. - */ -static inline void * -nir_array_grow(nir_array *arr, size_t additional) -{ - size_t new_size = arr->size + additional; - if (new_size > arr->alloc) { - if (arr->alloc == 0) - arr->alloc = NIR_ARRAY_INITIAL_SIZE; - - while (new_size > arr->alloc) - arr->alloc *= 2; - - if (arr->mem_ctx) - arr->data = reralloc_size(arr->mem_ctx, arr->data, arr->alloc); - else - arr->data = realloc(arr->data, arr->alloc); - } - - void *ptr = (void *)((char *)arr->data + arr->size); - arr->size = new_size; - - return ptr; -} - -#define nir_array_add(arr, type, elem) \ - *(type *)nir_array_grow(arr, sizeof(type)) = (elem) - -#define nir_array_foreach(arr, type, elem) \ - for (type *elem = (type *)(arr)->data; \ - elem < (type *)((char *)(arr)->data + (arr)->size); elem++) - -#ifdef __cplusplus -} /* extern "C" */ -#endif diff --git a/src/glsl/nir/nir_builder.h b/src/glsl/nir/nir_builder.h deleted file mode 100644 index 88ba3a1c269..00000000000 --- a/src/glsl/nir/nir_builder.h +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright © 2014-2015 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef NIR_BUILDER_H -#define NIR_BUILDER_H - -#include "nir_control_flow.h" - -struct exec_list; - -typedef struct nir_builder { - nir_cursor cursor; - - nir_shader *shader; - nir_function_impl *impl; -} nir_builder; - -static inline void -nir_builder_init(nir_builder *build, nir_function_impl *impl) -{ - memset(build, 0, sizeof(*build)); - build->impl = impl; - build->shader = impl->function->shader; -} - -static inline void -nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx, - gl_shader_stage stage, - const nir_shader_compiler_options *options) -{ - build->shader = nir_shader_create(mem_ctx, stage, options); - nir_function *func = nir_function_create(build->shader, "main"); - build->impl = nir_function_impl_create(func); - build->cursor = nir_after_cf_list(&build->impl->body); -} - -static inline void -nir_builder_instr_insert(nir_builder *build, nir_instr *instr) -{ - nir_instr_insert(build->cursor, instr); - - /* Move the cursor forward. */ - build->cursor = nir_after_instr(instr); -} - -static inline void -nir_builder_cf_insert(nir_builder *build, nir_cf_node *cf) -{ - nir_cf_node_insert(build->cursor, cf); -} - -static inline nir_ssa_def * -nir_build_imm(nir_builder *build, unsigned num_components, nir_const_value value) -{ - nir_load_const_instr *load_const = - nir_load_const_instr_create(build->shader, num_components); - if (!load_const) - return NULL; - - load_const->value = value; - - nir_builder_instr_insert(build, &load_const->instr); - - return &load_const->def; -} - -static inline nir_ssa_def * -nir_imm_float(nir_builder *build, float x) -{ - nir_const_value v; - - memset(&v, 0, sizeof(v)); - v.f[0] = x; - - return nir_build_imm(build, 1, v); -} - -static inline nir_ssa_def * -nir_imm_vec4(nir_builder *build, float x, float y, float z, float w) -{ - nir_const_value v; - - memset(&v, 0, sizeof(v)); - v.f[0] = x; - v.f[1] = y; - v.f[2] = z; - v.f[3] = w; - - return nir_build_imm(build, 4, v); -} - -static inline nir_ssa_def * -nir_imm_int(nir_builder *build, int x) -{ - nir_const_value v; - - memset(&v, 0, sizeof(v)); - v.i[0] = x; - - return nir_build_imm(build, 1, v); -} - -static inline nir_ssa_def * -nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w) -{ - nir_const_value v; - - memset(&v, 0, sizeof(v)); - v.i[0] = x; - v.i[1] = y; - v.i[2] = z; - v.i[3] = w; - - return nir_build_imm(build, 4, v); -} - -static inline nir_ssa_def * -nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0, - nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) -{ - const nir_op_info *op_info = &nir_op_infos[op]; - nir_alu_instr *instr = nir_alu_instr_create(build->shader, op); - if (!instr) - return NULL; - - instr->src[0].src = nir_src_for_ssa(src0); - if (src1) - instr->src[1].src = nir_src_for_ssa(src1); - if (src2) - instr->src[2].src = nir_src_for_ssa(src2); - if (src3) - instr->src[3].src = nir_src_for_ssa(src3); - - /* Guess the number of components the destination temporary should have - * based on our input sizes, if it's not fixed for the op. - */ - unsigned num_components = op_info->output_size; - if (num_components == 0) { - for (unsigned i = 0; i < op_info->num_inputs; i++) { - if (op_info->input_sizes[i] == 0) - num_components = MAX2(num_components, - instr->src[i].src.ssa->num_components); - } - } - assert(num_components != 0); - - /* Make sure we don't swizzle from outside of our source vector (like if a - * scalar value was passed into a multiply with a vector). - */ - for (unsigned i = 0; i < op_info->num_inputs; i++) { - for (unsigned j = instr->src[i].src.ssa->num_components; j < 4; j++) { - instr->src[i].swizzle[j] = instr->src[i].src.ssa->num_components - 1; - } - } - - nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); - instr->dest.write_mask = (1 << num_components) - 1; - - nir_builder_instr_insert(build, &instr->instr); - - return &instr->dest.dest.ssa; -} - -#define ALU1(op) \ -static inline nir_ssa_def * \ -nir_##op(nir_builder *build, nir_ssa_def *src0) \ -{ \ - return nir_build_alu(build, nir_op_##op, src0, NULL, NULL, NULL); \ -} - -#define ALU2(op) \ -static inline nir_ssa_def * \ -nir_##op(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) \ -{ \ - return nir_build_alu(build, nir_op_##op, src0, src1, NULL, NULL); \ -} - -#define ALU3(op) \ -static inline nir_ssa_def * \ -nir_##op(nir_builder *build, nir_ssa_def *src0, \ - nir_ssa_def *src1, nir_ssa_def *src2) \ -{ \ - return nir_build_alu(build, nir_op_##op, src0, src1, src2, NULL); \ -} - -#define ALU4(op) \ -static inline nir_ssa_def * \ -nir_##op(nir_builder *build, nir_ssa_def *src0, \ - nir_ssa_def *src1, nir_ssa_def *src2, nir_ssa_def *src3) \ -{ \ - return nir_build_alu(build, nir_op_##op, src0, src1, src2, src3); \ -} - -#include "nir_builder_opcodes.h" - -static inline nir_ssa_def * -nir_vec(nir_builder *build, nir_ssa_def **comp, unsigned num_components) -{ - switch (num_components) { - case 4: - return nir_vec4(build, comp[0], comp[1], comp[2], comp[3]); - case 3: - return nir_vec3(build, comp[0], comp[1], comp[2]); - case 2: - return nir_vec2(build, comp[0], comp[1]); - case 1: - return comp[0]; - default: - unreachable("bad component count"); - return NULL; - } -} - -/** - * Similar to nir_fmov, but takes a nir_alu_src instead of a nir_ssa_def. - */ -static inline nir_ssa_def * -nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) -{ - nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov); - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); - mov->dest.write_mask = (1 << num_components) - 1; - mov->src[0] = src; - nir_builder_instr_insert(build, &mov->instr); - - return &mov->dest.dest.ssa; -} - -static inline nir_ssa_def * -nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components) -{ - nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov); - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL); - mov->dest.write_mask = (1 << num_components) - 1; - mov->src[0] = src; - nir_builder_instr_insert(build, &mov->instr); - - return &mov->dest.dest.ssa; -} - -/** - * Construct an fmov or imov that reswizzles the source's components. - */ -static inline nir_ssa_def * -nir_swizzle(nir_builder *build, nir_ssa_def *src, unsigned swiz[4], - unsigned num_components, bool use_fmov) -{ - nir_alu_src alu_src = { NIR_SRC_INIT }; - alu_src.src = nir_src_for_ssa(src); - for (unsigned i = 0; i < num_components; i++) - alu_src.swizzle[i] = swiz[i]; - - return use_fmov ? nir_fmov_alu(build, alu_src, num_components) : - nir_imov_alu(build, alu_src, num_components); -} - -static inline nir_ssa_def * -nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c) -{ - unsigned swizzle[4] = {c, c, c, c}; - return nir_swizzle(b, def, swizzle, 1, false); -} - -/** - * Turns a nir_src into a nir_ssa_def * so it can be passed to - * nir_build_alu()-based builder calls. - * - * See nir_ssa_for_alu_src() for alu instructions. - */ -static inline nir_ssa_def * -nir_ssa_for_src(nir_builder *build, nir_src src, int num_components) -{ - if (src.is_ssa && src.ssa->num_components == num_components) - return src.ssa; - - nir_alu_src alu = { NIR_SRC_INIT }; - alu.src = src; - for (int j = 0; j < 4; j++) - alu.swizzle[j] = j; - - return nir_imov_alu(build, alu, num_components); -} - -/** - * Similar to nir_ssa_for_src(), but for alu src's, respecting the - * nir_alu_src's swizzle. - */ -static inline nir_ssa_def * -nir_ssa_for_alu_src(nir_builder *build, nir_alu_instr *instr, unsigned srcn) -{ - static uint8_t trivial_swizzle[4] = { 0, 1, 2, 3 }; - nir_alu_src *src = &instr->src[srcn]; - unsigned num_components = nir_ssa_alu_instr_src_components(instr, srcn); - - if (src->src.is_ssa && (src->src.ssa->num_components == num_components) && - !src->abs && !src->negate && - (memcmp(src->swizzle, trivial_swizzle, num_components) == 0)) - return src->src.ssa; - - return nir_imov_alu(build, *src, num_components); -} - -static inline nir_ssa_def * -nir_load_var(nir_builder *build, nir_variable *var) -{ - const unsigned num_components = glsl_get_vector_elements(var->type); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var); - load->num_components = num_components; - load->variables[0] = nir_deref_var_create(load, var); - nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); - nir_builder_instr_insert(build, &load->instr); - return &load->dest.ssa; -} - -static inline void -nir_store_var(nir_builder *build, nir_variable *var, nir_ssa_def *value, - unsigned writemask) -{ - const unsigned num_components = glsl_get_vector_elements(var->type); - - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(build->shader, nir_intrinsic_store_var); - store->num_components = num_components; - store->const_index[0] = writemask; - store->variables[0] = nir_deref_var_create(store, var); - store->src[0] = nir_src_for_ssa(value); - nir_builder_instr_insert(build, &store->instr); -} - -static inline nir_ssa_def * -nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index) -{ - nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op); - load->num_components = nir_intrinsic_infos[op].dest_components; - load->const_index[0] = index; - nir_ssa_dest_init(&load->instr, &load->dest, - nir_intrinsic_infos[op].dest_components, NULL); - nir_builder_instr_insert(build, &load->instr); - return &load->dest.ssa; -} - -#endif /* NIR_BUILDER_H */ diff --git a/src/glsl/nir/nir_builder_opcodes_h.py b/src/glsl/nir/nir_builder_opcodes_h.py deleted file mode 100644 index e27206ea8fc..00000000000 --- a/src/glsl/nir/nir_builder_opcodes_h.py +++ /dev/null @@ -1,38 +0,0 @@ -#! /usr/bin/env python - -template = """\ -/* Copyright (C) 2015 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#ifndef _NIR_BUILDER_OPCODES_ -#define _NIR_BUILDER_OPCODES_ - -% for name, opcode in sorted(opcodes.iteritems()): -ALU${opcode.num_inputs}(${name}); -% endfor - -#endif /* _NIR_BUILDER_OPCODES_ */""" - -from nir_opcodes import opcodes -from mako.template import Template - -print Template(template).render(opcodes=opcodes) diff --git a/src/glsl/nir/nir_clone.c b/src/glsl/nir/nir_clone.c deleted file mode 100644 index 5eff743d835..00000000000 --- a/src/glsl/nir/nir_clone.c +++ /dev/null @@ -1,659 +0,0 @@ -/* - * Copyright © 2015 Red Hat - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "nir.h" -#include "nir_control_flow_private.h" - -/* Secret Decoder Ring: - * clone_foo(): - * Allocate and clone a foo. - * __clone_foo(): - * Clone body of foo (ie. parent class, embedded struct, etc) - */ - -typedef struct { - /* maps orig ptr -> cloned ptr: */ - struct hash_table *ptr_table; - - /* List of phi sources. */ - struct list_head phi_srcs; - - /* new shader object, used as memctx for just about everything else: */ - nir_shader *ns; -} clone_state; - -static void -init_clone_state(clone_state *state) -{ - state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - list_inithead(&state->phi_srcs); -} - -static void -free_clone_state(clone_state *state) -{ - _mesa_hash_table_destroy(state->ptr_table, NULL); -} - -static void * -lookup_ptr(clone_state *state, const void *ptr) -{ - struct hash_entry *entry; - - if (!ptr) - return NULL; - - entry = _mesa_hash_table_search(state->ptr_table, ptr); - assert(entry && "Failed to find pointer!"); - if (!entry) - return NULL; - - return entry->data; -} - -static void -store_ptr(clone_state *state, void *nptr, const void *ptr) -{ - _mesa_hash_table_insert(state->ptr_table, ptr, nptr); -} - -static nir_constant * -clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) -{ - nir_constant *nc = ralloc(nvar, nir_constant); - - nc->value = c->value; - nc->num_elements = c->num_elements; - nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); - for (unsigned i = 0; i < c->num_elements; i++) { - nc->elements[i] = clone_constant(state, c->elements[i], nvar); - } - - return nc; -} - -/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid - * having to deal with locals and globals separately: - */ -static nir_variable * -clone_variable(clone_state *state, const nir_variable *var) -{ - nir_variable *nvar = rzalloc(state->ns, nir_variable); - store_ptr(state, nvar, var); - - nvar->type = var->type; - nvar->name = ralloc_strdup(nvar, var->name); - nvar->data = var->data; - nvar->num_state_slots = var->num_state_slots; - nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots); - memcpy(nvar->state_slots, var->state_slots, - var->num_state_slots * sizeof(nir_state_slot)); - if (var->constant_initializer) { - nvar->constant_initializer = - clone_constant(state, var->constant_initializer, nvar); - } - nvar->interface_type = var->interface_type; - - return nvar; -} - -/* clone list of nir_variable: */ -static void -clone_var_list(clone_state *state, struct exec_list *dst, - const struct exec_list *list) -{ - exec_list_make_empty(dst); - foreach_list_typed(nir_variable, var, node, list) { - nir_variable *nvar = clone_variable(state, var); - exec_list_push_tail(dst, &nvar->node); - } -} - -/* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create() - * to avoid having to deal with locals and globals separately: - */ -static nir_register * -clone_register(clone_state *state, const nir_register *reg) -{ - nir_register *nreg = rzalloc(state->ns, nir_register); - store_ptr(state, nreg, reg); - - nreg->num_components = reg->num_components; - nreg->num_array_elems = reg->num_array_elems; - nreg->index = reg->index; - nreg->name = ralloc_strdup(nreg, reg->name); - nreg->is_global = reg->is_global; - nreg->is_packed = reg->is_packed; - - /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */ - list_inithead(&nreg->uses); - list_inithead(&nreg->defs); - list_inithead(&nreg->if_uses); - - return nreg; -} - -/* clone list of nir_register: */ -static void -clone_reg_list(clone_state *state, struct exec_list *dst, - const struct exec_list *list) -{ - exec_list_make_empty(dst); - foreach_list_typed(nir_register, reg, node, list) { - nir_register *nreg = clone_register(state, reg); - exec_list_push_tail(dst, &nreg->node); - } -} - -static void -__clone_src(clone_state *state, void *ninstr_or_if, - nir_src *nsrc, const nir_src *src) -{ - nsrc->is_ssa = src->is_ssa; - if (src->is_ssa) { - nsrc->ssa = lookup_ptr(state, src->ssa); - } else { - nsrc->reg.reg = lookup_ptr(state, src->reg.reg); - if (src->reg.indirect) { - nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); - __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); - } - nsrc->reg.base_offset = src->reg.base_offset; - } -} - -static void -__clone_dst(clone_state *state, nir_instr *ninstr, - nir_dest *ndst, const nir_dest *dst) -{ - ndst->is_ssa = dst->is_ssa; - if (dst->is_ssa) { - nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name); - store_ptr(state, &ndst->ssa, &dst->ssa); - } else { - ndst->reg.reg = lookup_ptr(state, dst->reg.reg); - if (dst->reg.indirect) { - ndst->reg.indirect = ralloc(ninstr, nir_src); - __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); - } - ndst->reg.base_offset = dst->reg.base_offset; - } -} - -static nir_deref *clone_deref(clone_state *state, const nir_deref *deref, - nir_instr *ninstr, nir_deref *parent); - -static nir_deref_var * -clone_deref_var(clone_state *state, const nir_deref_var *dvar, - nir_instr *ninstr) -{ - nir_variable *nvar = lookup_ptr(state, dvar->var); - nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar); - - if (dvar->deref.child) - ndvar->deref.child = clone_deref(state, dvar->deref.child, - ninstr, &ndvar->deref); - - return ndvar; -} - -static nir_deref_array * -clone_deref_array(clone_state *state, const nir_deref_array *darr, - nir_instr *ninstr, nir_deref *parent) -{ - nir_deref_array *ndarr = nir_deref_array_create(parent); - - ndarr->deref.type = darr->deref.type; - if (darr->deref.child) - ndarr->deref.child = clone_deref(state, darr->deref.child, - ninstr, &ndarr->deref); - - ndarr->deref_array_type = darr->deref_array_type; - ndarr->base_offset = darr->base_offset; - if (ndarr->deref_array_type == nir_deref_array_type_indirect) - __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect); - - return ndarr; -} - -static nir_deref_struct * -clone_deref_struct(clone_state *state, const nir_deref_struct *dstr, - nir_instr *ninstr, nir_deref *parent) -{ - nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index); - - ndstr->deref.type = dstr->deref.type; - if (dstr->deref.child) - ndstr->deref.child = clone_deref(state, dstr->deref.child, - ninstr, &ndstr->deref); - - return ndstr; -} - -static nir_deref * -clone_deref(clone_state *state, const nir_deref *dref, - nir_instr *ninstr, nir_deref *parent) -{ - switch (dref->deref_type) { - case nir_deref_type_array: - return &clone_deref_array(state, nir_deref_as_array(dref), - ninstr, parent)->deref; - case nir_deref_type_struct: - return &clone_deref_struct(state, nir_deref_as_struct(dref), - ninstr, parent)->deref; - default: - unreachable("bad deref type"); - return NULL; - } -} - -static nir_alu_instr * -clone_alu(clone_state *state, const nir_alu_instr *alu) -{ - nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op); - - __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest); - nalu->dest.saturate = alu->dest.saturate; - nalu->dest.write_mask = alu->dest.write_mask; - - for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { - __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src); - nalu->src[i].negate = alu->src[i].negate; - nalu->src[i].abs = alu->src[i].abs; - memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, - sizeof(nalu->src[i].swizzle)); - } - - return nalu; -} - -static nir_intrinsic_instr * -clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr) -{ - nir_intrinsic_instr *nitr = - nir_intrinsic_instr_create(state->ns, itr->intrinsic); - - unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables; - unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs; - - if (nir_intrinsic_infos[itr->intrinsic].has_dest) - __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest); - - nitr->num_components = itr->num_components; - memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index)); - - for (unsigned i = 0; i < num_variables; i++) { - nitr->variables[i] = clone_deref_var(state, itr->variables[i], - &nitr->instr); - } - - for (unsigned i = 0; i < num_srcs; i++) - __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]); - - return nitr; -} - -static nir_load_const_instr * -clone_load_const(clone_state *state, const nir_load_const_instr *lc) -{ - nir_load_const_instr *nlc = - nir_load_const_instr_create(state->ns, lc->def.num_components); - - memcpy(&nlc->value, &lc->value, sizeof(nlc->value)); - - store_ptr(state, &nlc->def, &lc->def); - - return nlc; -} - -static nir_ssa_undef_instr * -clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa) -{ - nir_ssa_undef_instr *nsa = - nir_ssa_undef_instr_create(state->ns, sa->def.num_components); - - store_ptr(state, &nsa->def, &sa->def); - - return nsa; -} - -static nir_tex_instr * -clone_tex(clone_state *state, const nir_tex_instr *tex) -{ - nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs); - - ntex->sampler_dim = tex->sampler_dim; - ntex->dest_type = tex->dest_type; - ntex->op = tex->op; - __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest); - for (unsigned i = 0; i < ntex->num_srcs; i++) { - ntex->src[i].src_type = tex->src[i].src_type; - __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src); - } - ntex->coord_components = tex->coord_components; - ntex->is_array = tex->is_array; - ntex->is_shadow = tex->is_shadow; - ntex->is_new_style_shadow = tex->is_new_style_shadow; - memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset)); - ntex->component = tex->component; - ntex->sampler_index = tex->sampler_index; - ntex->sampler_array_size = tex->sampler_array_size; - if (tex->sampler) - ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr); - - return ntex; -} - -static nir_phi_instr * -clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk) -{ - nir_phi_instr *nphi = nir_phi_instr_create(state->ns); - - __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest); - - /* Cloning a phi node is a bit different from other instructions. The - * sources of phi instructions are the only time where we can use an SSA - * def before it is defined. In order to handle this, we just copy over - * the sources from the old phi instruction directly and then fix them up - * in a second pass once all the instrutions in the function have been - * properly cloned. - * - * In order to ensure that the copied sources (which are the same as the - * old phi instruction's sources for now) don't get inserted into the old - * shader's use-def lists, we have to add the phi instruction *before* we - * set up its sources. - */ - nir_instr_insert_after_block(nblk, &nphi->instr); - - foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { - nir_phi_src *nsrc = ralloc(nphi, nir_phi_src); - - /* Just copy the old source for now. */ - memcpy(nsrc, src, sizeof(*src)); - - /* Since we're not letting nir_insert_instr handle use/def stuff for us, - * we have to set the parent_instr manually. It doesn't really matter - * when we do it, so we might as well do it here. - */ - nsrc->src.parent_instr = &nphi->instr; - - /* Stash it in the list of phi sources. We'll walk this list and fix up - * sources at the very end of clone_function_impl. - */ - list_add(&nsrc->src.use_link, &state->phi_srcs); - - exec_list_push_tail(&nphi->srcs, &nsrc->node); - } - - return nphi; -} - -static nir_jump_instr * -clone_jump(clone_state *state, const nir_jump_instr *jmp) -{ - nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type); - - return njmp; -} - -static nir_call_instr * -clone_call(clone_state *state, const nir_call_instr *call) -{ - nir_function *ncallee = lookup_ptr(state, call->callee); - nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee); - - for (unsigned i = 0; i < ncall->num_params; i++) - ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr); - - ncall->return_deref = clone_deref_var(state, call->return_deref, - &ncall->instr); - - return ncall; -} - -static nir_instr * -clone_instr(clone_state *state, const nir_instr *instr) -{ - switch (instr->type) { - case nir_instr_type_alu: - return &clone_alu(state, nir_instr_as_alu(instr))->instr; - case nir_instr_type_intrinsic: - return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr; - case nir_instr_type_load_const: - return &clone_load_const(state, nir_instr_as_load_const(instr))->instr; - case nir_instr_type_ssa_undef: - return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr; - case nir_instr_type_tex: - return &clone_tex(state, nir_instr_as_tex(instr))->instr; - case nir_instr_type_phi: - unreachable("Cannot clone phis with clone_instr"); - case nir_instr_type_jump: - return &clone_jump(state, nir_instr_as_jump(instr))->instr; - case nir_instr_type_call: - return &clone_call(state, nir_instr_as_call(instr))->instr; - case nir_instr_type_parallel_copy: - unreachable("Cannot clone parallel copies"); - default: - unreachable("bad instr type"); - return NULL; - } -} - -static nir_block * -clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) -{ - /* Don't actually create a new block. Just use the one from the tail of - * the list. NIR guarantees that the tail of the list is a block and that - * no two blocks are side-by-side in the IR; It should be empty. - */ - nir_block *nblk = - exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); - assert(nblk->cf_node.type == nir_cf_node_block); - assert(exec_list_is_empty(&nblk->instr_list)); - - /* We need this for phi sources */ - store_ptr(state, nblk, blk); - - nir_foreach_instr(blk, instr) { - if (instr->type == nir_instr_type_phi) { - /* Phi instructions are a bit of a special case when cloning because - * we don't want inserting the instruction to automatically handle - * use/defs for us. Instead, we need to wait until all the - * blocks/instructions are in so that we can set their sources up. - */ - clone_phi(state, nir_instr_as_phi(instr), nblk); - } else { - nir_instr *ninstr = clone_instr(state, instr); - nir_instr_insert_after_block(nblk, ninstr); - } - } - - return nblk; -} - -static void -clone_cf_list(clone_state *state, struct exec_list *dst, - const struct exec_list *list); - -static nir_if * -clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i) -{ - nir_if *ni = nir_if_create(state->ns); - - __clone_src(state, ni, &ni->condition, &i->condition); - - nir_cf_node_insert_end(cf_list, &ni->cf_node); - - clone_cf_list(state, &ni->then_list, &i->then_list); - clone_cf_list(state, &ni->else_list, &i->else_list); - - return ni; -} - -static nir_loop * -clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop) -{ - nir_loop *nloop = nir_loop_create(state->ns); - - nir_cf_node_insert_end(cf_list, &nloop->cf_node); - - clone_cf_list(state, &nloop->body, &loop->body); - - return nloop; -} - -/* clone list of nir_cf_node: */ -static void -clone_cf_list(clone_state *state, struct exec_list *dst, - const struct exec_list *list) -{ - foreach_list_typed(nir_cf_node, cf, node, list) { - switch (cf->type) { - case nir_cf_node_block: - clone_block(state, dst, nir_cf_node_as_block(cf)); - break; - case nir_cf_node_if: - clone_if(state, dst, nir_cf_node_as_if(cf)); - break; - case nir_cf_node_loop: - clone_loop(state, dst, nir_cf_node_as_loop(cf)); - break; - default: - unreachable("bad cf type"); - } - } -} - -static nir_function_impl * -clone_function_impl(clone_state *state, const nir_function_impl *fi, - nir_function *nfxn) -{ - nir_function_impl *nfi = nir_function_impl_create(nfxn); - - clone_var_list(state, &nfi->locals, &fi->locals); - clone_reg_list(state, &nfi->registers, &fi->registers); - nfi->reg_alloc = fi->reg_alloc; - - nfi->num_params = fi->num_params; - nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); - for (unsigned i = 0; i < fi->num_params; i++) { - nfi->params[i] = lookup_ptr(state, fi->params[i]); - } - nfi->return_var = lookup_ptr(state, fi->return_var); - - assert(list_empty(&state->phi_srcs)); - - clone_cf_list(state, &nfi->body, &fi->body); - - /* After we've cloned almost everything, we have to walk the list of phi - * sources and fix them up. Thanks to loops, the block and SSA value for a - * phi source may not be defined when we first encounter it. Instead, we - * add it to the phi_srcs list and we fix it up here. - */ - list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { - src->pred = lookup_ptr(state, src->pred); - assert(src->src.is_ssa); - src->src.ssa = lookup_ptr(state, src->src.ssa); - - /* Remove from this list and place in the uses of the SSA def */ - list_del(&src->src.use_link); - list_addtail(&src->src.use_link, &src->src.ssa->uses); - } - assert(list_empty(&state->phi_srcs)); - - /* All metadata is invalidated in the cloning process */ - nfi->valid_metadata = 0; - - return nfi; -} - -static nir_function * -clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) -{ - assert(ns == state->ns); - nir_function *nfxn = nir_function_create(ns, fxn->name); - - /* Needed for call instructions */ - store_ptr(state, nfxn, fxn); - - nfxn->num_params = fxn->num_params; - nfxn->params = ralloc_array(state->ns, nir_parameter, fxn->num_params); - memcpy(nfxn->params, fxn->params, sizeof(nir_parameter) * fxn->num_params); - - nfxn->return_type = fxn->return_type; - - /* At first glance, it looks like we should clone the function_impl here. - * However, call instructions need to be able to reference at least the - * function and those will get processed as we clone the function_impl's. - * We stop here and do function_impls as a second pass. - */ - - return nfxn; -} - -nir_shader * -nir_shader_clone(void *mem_ctx, const nir_shader *s) -{ - clone_state state; - init_clone_state(&state); - - nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options); - state.ns = ns; - - clone_var_list(&state, &ns->uniforms, &s->uniforms); - clone_var_list(&state, &ns->inputs, &s->inputs); - clone_var_list(&state, &ns->outputs, &s->outputs); - clone_var_list(&state, &ns->globals, &s->globals); - clone_var_list(&state, &ns->system_values, &s->system_values); - - /* Go through and clone functions */ - foreach_list_typed(nir_function, fxn, node, &s->functions) - clone_function(&state, fxn, ns); - - /* Only after all functions are cloned can we clone the actual function - * implementations. This is because nir_call_instr's need to reference the - * functions of other functions and we don't know what order the functions - * will have in the list. - */ - nir_foreach_function(s, fxn) { - nir_function *nfxn = lookup_ptr(&state, fxn); - clone_function_impl(&state, fxn->impl, nfxn); - } - - clone_reg_list(&state, &ns->registers, &s->registers); - ns->reg_alloc = s->reg_alloc; - - ns->info = s->info; - ns->info.name = ralloc_strdup(ns, ns->info.name); - if (ns->info.label) - ns->info.label = ralloc_strdup(ns, ns->info.label); - - ns->num_inputs = s->num_inputs; - ns->num_uniforms = s->num_uniforms; - ns->num_outputs = s->num_outputs; - - free_clone_state(&state); - - return ns; -} diff --git a/src/glsl/nir/nir_constant_expressions.h b/src/glsl/nir/nir_constant_expressions.h deleted file mode 100644 index 97997f2e514..00000000000 --- a/src/glsl/nir/nir_constant_expressions.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright © 2014 Connor Abbott - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" - -nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components, - nir_const_value *src); diff --git a/src/glsl/nir/nir_constant_expressions.py b/src/glsl/nir/nir_constant_expressions.py deleted file mode 100644 index 32784f6398d..00000000000 --- a/src/glsl/nir/nir_constant_expressions.py +++ /dev/null @@ -1,336 +0,0 @@ -#! /usr/bin/python2 -template = """\ -/* - * Copyright (C) 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - */ - -#include -#include "main/core.h" -#include "util/rounding.h" /* for _mesa_roundeven */ -#include "util/half_float.h" -#include "nir_constant_expressions.h" - -/** - * Evaluate one component of packSnorm4x8. - */ -static uint8_t -pack_snorm_1x8(float x) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * packSnorm4x8 - * ------------ - * The conversion for component c of v to fixed point is done as - * follows: - * - * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) - * - * We must first cast the float to an int, because casting a negative - * float to a uint is undefined. - */ - return (uint8_t) (int) - _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); -} - -/** - * Evaluate one component of packSnorm2x16. - */ -static uint16_t -pack_snorm_1x16(float x) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * packSnorm2x16 - * ------------- - * The conversion for component c of v to fixed point is done as - * follows: - * - * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) - * - * We must first cast the float to an int, because casting a negative - * float to a uint is undefined. - */ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); -} - -/** - * Evaluate one component of unpackSnorm4x8. - */ -static float -unpack_snorm_1x8(uint8_t u) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * unpackSnorm4x8 - * -------------- - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackSnorm4x8: clamp(f / 127.0, -1, +1) - */ - return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); -} - -/** - * Evaluate one component of unpackSnorm2x16. - */ -static float -unpack_snorm_1x16(uint16_t u) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * unpackSnorm2x16 - * --------------- - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) - */ - return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); -} - -/** - * Evaluate one component packUnorm4x8. - */ -static uint8_t -pack_unorm_1x8(float x) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * packUnorm4x8 - * ------------ - * The conversion for component c of v to fixed point is done as - * follows: - * - * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) - */ - return (uint8_t) (int) - _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); -} - -/** - * Evaluate one component packUnorm2x16. - */ -static uint16_t -pack_unorm_1x16(float x) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * packUnorm2x16 - * ------------- - * The conversion for component c of v to fixed point is done as - * follows: - * - * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) - */ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); -} - -/** - * Evaluate one component of unpackUnorm4x8. - */ -static float -unpack_unorm_1x8(uint8_t u) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * unpackUnorm4x8 - * -------------- - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackUnorm4x8: f / 255.0 - */ - return (float) u / 255.0f; -} - -/** - * Evaluate one component of unpackUnorm2x16. - */ -static float -unpack_unorm_1x16(uint16_t u) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * unpackUnorm2x16 - * --------------- - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackUnorm2x16: f / 65535.0 - */ - return (float) u / 65535.0f; -} - -/** - * Evaluate one component of packHalf2x16. - */ -static uint16_t -pack_half_1x16(float x) -{ - return _mesa_float_to_half(x); -} - -/** - * Evaluate one component of unpackHalf2x16. - */ -static float -unpack_half_1x16(uint16_t u) -{ - return _mesa_half_to_float(u); -} - -/* Some typed vector structures to make things like src0.y work */ -% for type in ["float", "int", "uint", "bool"]: -struct ${type}_vec { - ${type} x; - ${type} y; - ${type} z; - ${type} w; -}; -% endfor - -% for name, op in sorted(opcodes.iteritems()): -static nir_const_value -evaluate_${name}(unsigned num_components, nir_const_value *_src) -{ - nir_const_value _dst_val = { { {0, 0, 0, 0} } }; - - ## For each non-per-component input, create a variable srcN that - ## contains x, y, z, and w elements which are filled in with the - ## appropriately-typed values. - % for j in range(op.num_inputs): - % if op.input_sizes[j] == 0: - <% continue %> - % elif "src" + str(j) not in op.const_expr: - ## Avoid unused variable warnings - <% continue %> - %endif - - struct ${op.input_types[j]}_vec src${j} = { - % for k in range(op.input_sizes[j]): - % if op.input_types[j] == "bool": - _src[${j}].u[${k}] != 0, - % else: - _src[${j}].${op.input_types[j][:1]}[${k}], - % endif - % endfor - }; - % endfor - - % if op.output_size == 0: - ## For per-component instructions, we need to iterate over the - ## components and apply the constant expression one component - ## at a time. - for (unsigned _i = 0; _i < num_components; _i++) { - ## For each per-component input, create a variable srcN that - ## contains the value of the current (_i'th) component. - % for j in range(op.num_inputs): - % if op.input_sizes[j] != 0: - <% continue %> - % elif "src" + str(j) not in op.const_expr: - ## Avoid unused variable warnings - <% continue %> - % elif op.input_types[j] == "bool": - bool src${j} = _src[${j}].u[_i] != 0; - % else: - ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i]; - % endif - % endfor - - ## Create an appropriately-typed variable dst and assign the - ## result of the const_expr to it. If const_expr already contains - ## writes to dst, just include const_expr directly. - % if "dst" in op.const_expr: - ${op.output_type} dst; - ${op.const_expr} - % else: - ${op.output_type} dst = ${op.const_expr}; - % endif - - ## Store the current component of the actual destination to the - ## value of dst. - % if op.output_type == "bool": - ## Sanitize the C value to a proper NIR bool - _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE; - % else: - _dst_val.${op.output_type[:1]}[_i] = dst; - % endif - } - % else: - ## In the non-per-component case, create a struct dst with - ## appropriately-typed elements x, y, z, and w and assign the result - ## of the const_expr to all components of dst, or include the - ## const_expr directly if it writes to dst already. - struct ${op.output_type}_vec dst; - - % if "dst" in op.const_expr: - ${op.const_expr} - % else: - ## Splat the value to all components. This way expressions which - ## write the same value to all components don't need to explicitly - ## write to dest. One such example is fnoise which has a - ## const_expr of 0.0f. - dst.x = dst.y = dst.z = dst.w = ${op.const_expr}; - % endif - - ## For each component in the destination, copy the value of dst to - ## the actual destination. - % for k in range(op.output_size): - % if op.output_type == "bool": - ## Sanitize the C value to a proper NIR bool - _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE; - % else: - _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]}; - % endif - % endfor - % endif - - return _dst_val; -} -% endfor - -nir_const_value -nir_eval_const_opcode(nir_op op, unsigned num_components, - nir_const_value *src) -{ - switch (op) { -% for name in sorted(opcodes.iterkeys()): - case nir_op_${name}: { - return evaluate_${name}(num_components, src); - break; - } -% endfor - default: - unreachable("shouldn't get here"); - } -}""" - -from nir_opcodes import opcodes -from mako.template import Template - -print Template(template).render(opcodes=opcodes) diff --git a/src/glsl/nir/nir_control_flow.c b/src/glsl/nir/nir_control_flow.c deleted file mode 100644 index 96395a41615..00000000000 --- a/src/glsl/nir/nir_control_flow.c +++ /dev/null @@ -1,808 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir_control_flow_private.h" - -/** - * \name Control flow modification - * - * These functions modify the control flow tree while keeping the control flow - * graph up-to-date. The invariants respected are: - * 1. Each then statement, else statement, or loop body must have at least one - * control flow node. - * 2. Each if-statement and loop must have one basic block before it and one - * after. - * 3. Two basic blocks cannot be directly next to each other. - * 4. If a basic block has a jump instruction, there must be only one and it - * must be at the end of the block. - * 5. The CFG must always be connected - this means that we must insert a fake - * CFG edge for loops with no break statement. - * - * The purpose of the second one is so that we have places to insert code during - * GCM, as well as eliminating the possibility of critical edges. - */ -/*@{*/ - -static bool -block_ends_in_jump(nir_block *block) -{ - return !exec_list_is_empty(&block->instr_list) && - nir_block_last_instr(block)->type == nir_instr_type_jump; -} - -static inline void -block_add_pred(nir_block *block, nir_block *pred) -{ - _mesa_set_add(block->predecessors, pred); -} - -static inline void -block_remove_pred(nir_block *block, nir_block *pred) -{ - struct set_entry *entry = _mesa_set_search(block->predecessors, pred); - - assert(entry); - - _mesa_set_remove(block->predecessors, entry); -} - -static void -link_blocks(nir_block *pred, nir_block *succ1, nir_block *succ2) -{ - pred->successors[0] = succ1; - if (succ1 != NULL) - block_add_pred(succ1, pred); - - pred->successors[1] = succ2; - if (succ2 != NULL) - block_add_pred(succ2, pred); -} - -static void -unlink_blocks(nir_block *pred, nir_block *succ) -{ - if (pred->successors[0] == succ) { - pred->successors[0] = pred->successors[1]; - pred->successors[1] = NULL; - } else { - assert(pred->successors[1] == succ); - pred->successors[1] = NULL; - } - - block_remove_pred(succ, pred); -} - -static void -unlink_block_successors(nir_block *block) -{ - if (block->successors[1] != NULL) - unlink_blocks(block, block->successors[1]); - if (block->successors[0] != NULL) - unlink_blocks(block, block->successors[0]); -} - -static void -link_non_block_to_block(nir_cf_node *node, nir_block *block) -{ - if (node->type == nir_cf_node_if) { - /* - * We're trying to link an if to a block after it; this just means linking - * the last block of the then and else branches. - */ - - nir_if *if_stmt = nir_cf_node_as_if(node); - - nir_cf_node *last_then = nir_if_last_then_node(if_stmt); - assert(last_then->type == nir_cf_node_block); - nir_block *last_then_block = nir_cf_node_as_block(last_then); - - nir_cf_node *last_else = nir_if_last_else_node(if_stmt); - assert(last_else->type == nir_cf_node_block); - nir_block *last_else_block = nir_cf_node_as_block(last_else); - - if (!block_ends_in_jump(last_then_block)) { - unlink_block_successors(last_then_block); - link_blocks(last_then_block, block, NULL); - } - - if (!block_ends_in_jump(last_else_block)) { - unlink_block_successors(last_else_block); - link_blocks(last_else_block, block, NULL); - } - } else { - assert(node->type == nir_cf_node_loop); - - /* - * We can only get to this codepath if we're inserting a new loop, or - * at least a loop with no break statements; we can't insert break - * statements into a loop when we haven't inserted it into the CFG - * because we wouldn't know which block comes after the loop - * and therefore, which block should be the successor of the block with - * the break). Therefore, we need to insert a fake edge (see invariant - * #5). - */ - - nir_loop *loop = nir_cf_node_as_loop(node); - - nir_cf_node *last = nir_loop_last_cf_node(loop); - assert(last->type == nir_cf_node_block); - nir_block *last_block = nir_cf_node_as_block(last); - - last_block->successors[1] = block; - block_add_pred(block, last_block); - } -} - -static void -link_block_to_non_block(nir_block *block, nir_cf_node *node) -{ - if (node->type == nir_cf_node_if) { - /* - * We're trying to link a block to an if after it; this just means linking - * the block to the first block of the then and else branches. - */ - - nir_if *if_stmt = nir_cf_node_as_if(node); - - nir_cf_node *first_then = nir_if_first_then_node(if_stmt); - assert(first_then->type == nir_cf_node_block); - nir_block *first_then_block = nir_cf_node_as_block(first_then); - - nir_cf_node *first_else = nir_if_first_else_node(if_stmt); - assert(first_else->type == nir_cf_node_block); - nir_block *first_else_block = nir_cf_node_as_block(first_else); - - unlink_block_successors(block); - link_blocks(block, first_then_block, first_else_block); - } else { - /* - * For similar reasons as the corresponding case in - * link_non_block_to_block(), don't worry about if the loop header has - * any predecessors that need to be unlinked. - */ - - assert(node->type == nir_cf_node_loop); - - nir_loop *loop = nir_cf_node_as_loop(node); - - nir_cf_node *loop_header = nir_loop_first_cf_node(loop); - assert(loop_header->type == nir_cf_node_block); - nir_block *loop_header_block = nir_cf_node_as_block(loop_header); - - unlink_block_successors(block); - link_blocks(block, loop_header_block, NULL); - } - -} - -/** - * Replace a block's successor with a different one. - */ -static void -replace_successor(nir_block *block, nir_block *old_succ, nir_block *new_succ) -{ - if (block->successors[0] == old_succ) { - block->successors[0] = new_succ; - } else { - assert(block->successors[1] == old_succ); - block->successors[1] = new_succ; - } - - block_remove_pred(old_succ, block); - block_add_pred(new_succ, block); -} - -/** - * Takes a basic block and inserts a new empty basic block before it, making its - * predecessors point to the new block. This essentially splits the block into - * an empty header and a body so that another non-block CF node can be inserted - * between the two. Note that this does *not* link the two basic blocks, so - * some kind of cleanup *must* be performed after this call. - */ - -static nir_block * -split_block_beginning(nir_block *block) -{ - nir_block *new_block = nir_block_create(ralloc_parent(block)); - new_block->cf_node.parent = block->cf_node.parent; - exec_node_insert_node_before(&block->cf_node.node, &new_block->cf_node.node); - - struct set_entry *entry; - set_foreach(block->predecessors, entry) { - nir_block *pred = (nir_block *) entry->key; - replace_successor(pred, block, new_block); - } - - /* Any phi nodes must stay part of the new block, or else their - * sourcse will be messed up. This will reverse the order of the phi's, but - * order shouldn't matter. - */ - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - exec_node_remove(&instr->node); - instr->block = new_block; - exec_list_push_head(&new_block->instr_list, &instr->node); - } - - return new_block; -} - -static void -rewrite_phi_preds(nir_block *block, nir_block *old_pred, nir_block *new_pred) -{ - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - nir_foreach_phi_src(phi, src) { - if (src->pred == old_pred) { - src->pred = new_pred; - break; - } - } - } -} - -static void -insert_phi_undef(nir_block *block, nir_block *pred) -{ - nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(ralloc_parent(phi), - phi->dest.ssa.num_components); - nir_instr_insert_before_cf_list(&impl->body, &undef->instr); - nir_phi_src *src = ralloc(phi, nir_phi_src); - src->pred = pred; - src->src.parent_instr = &phi->instr; - src->src.is_ssa = true; - src->src.ssa = &undef->def; - - list_addtail(&src->src.use_link, &undef->def.uses); - - exec_list_push_tail(&phi->srcs, &src->node); - } -} - -/** - * Moves the successors of source to the successors of dest, leaving both - * successors of source NULL. - */ - -static void -move_successors(nir_block *source, nir_block *dest) -{ - nir_block *succ1 = source->successors[0]; - nir_block *succ2 = source->successors[1]; - - if (succ1) { - unlink_blocks(source, succ1); - rewrite_phi_preds(succ1, source, dest); - } - - if (succ2) { - unlink_blocks(source, succ2); - rewrite_phi_preds(succ2, source, dest); - } - - unlink_block_successors(dest); - link_blocks(dest, succ1, succ2); -} - -/* Given a basic block with no successors that has been inserted into the - * control flow tree, gives it the successors it would normally have assuming - * it doesn't end in a jump instruction. Also inserts phi sources with undefs - * if necessary. - */ -static void -block_add_normal_succs(nir_block *block) -{ - if (exec_node_is_tail_sentinel(block->cf_node.node.next)) { - nir_cf_node *parent = block->cf_node.parent; - if (parent->type == nir_cf_node_if) { - nir_cf_node *next = nir_cf_node_next(parent); - assert(next->type == nir_cf_node_block); - nir_block *next_block = nir_cf_node_as_block(next); - - link_blocks(block, next_block, NULL); - } else { - assert(parent->type == nir_cf_node_loop); - nir_loop *loop = nir_cf_node_as_loop(parent); - - nir_cf_node *head = nir_loop_first_cf_node(loop); - assert(head->type == nir_cf_node_block); - nir_block *head_block = nir_cf_node_as_block(head); - - link_blocks(block, head_block, NULL); - insert_phi_undef(head_block, block); - } - } else { - nir_cf_node *next = nir_cf_node_next(&block->cf_node); - if (next->type == nir_cf_node_if) { - nir_if *next_if = nir_cf_node_as_if(next); - - nir_cf_node *first_then = nir_if_first_then_node(next_if); - assert(first_then->type == nir_cf_node_block); - nir_block *first_then_block = nir_cf_node_as_block(first_then); - - nir_cf_node *first_else = nir_if_first_else_node(next_if); - assert(first_else->type == nir_cf_node_block); - nir_block *first_else_block = nir_cf_node_as_block(first_else); - - link_blocks(block, first_then_block, first_else_block); - } else { - assert(next->type == nir_cf_node_loop); - nir_loop *next_loop = nir_cf_node_as_loop(next); - - nir_cf_node *first = nir_loop_first_cf_node(next_loop); - assert(first->type == nir_cf_node_block); - nir_block *first_block = nir_cf_node_as_block(first); - - link_blocks(block, first_block, NULL); - insert_phi_undef(first_block, block); - } - } -} - -static nir_block * -split_block_end(nir_block *block) -{ - nir_block *new_block = nir_block_create(ralloc_parent(block)); - new_block->cf_node.parent = block->cf_node.parent; - exec_node_insert_after(&block->cf_node.node, &new_block->cf_node.node); - - if (block_ends_in_jump(block)) { - /* Figure out what successor block would've had if it didn't have a jump - * instruction, and make new_block have that successor. - */ - block_add_normal_succs(new_block); - } else { - move_successors(block, new_block); - } - - return new_block; -} - -static nir_block * -split_block_before_instr(nir_instr *instr) -{ - assert(instr->type != nir_instr_type_phi); - nir_block *new_block = split_block_beginning(instr->block); - - nir_foreach_instr_safe(instr->block, cur_instr) { - if (cur_instr == instr) - break; - - exec_node_remove(&cur_instr->node); - cur_instr->block = new_block; - exec_list_push_tail(&new_block->instr_list, &cur_instr->node); - } - - return new_block; -} - -/* Splits a basic block at the point specified by the cursor. The "before" and - * "after" arguments are filled out with the blocks resulting from the split - * if non-NULL. Note that the "beginning" of the block is actually interpreted - * as before the first non-phi instruction, and it's illegal to split a block - * before a phi instruction. - */ - -static void -split_block_cursor(nir_cursor cursor, - nir_block **_before, nir_block **_after) -{ - nir_block *before, *after; - switch (cursor.option) { - case nir_cursor_before_block: - after = cursor.block; - before = split_block_beginning(cursor.block); - break; - - case nir_cursor_after_block: - before = cursor.block; - after = split_block_end(cursor.block); - break; - - case nir_cursor_before_instr: - after = cursor.instr->block; - before = split_block_before_instr(cursor.instr); - break; - - case nir_cursor_after_instr: - /* We lower this to split_block_before_instr() so that we can keep the - * after-a-jump-instr case contained to split_block_end(). - */ - if (nir_instr_is_last(cursor.instr)) { - before = cursor.instr->block; - after = split_block_end(cursor.instr->block); - } else { - after = cursor.instr->block; - before = split_block_before_instr(nir_instr_next(cursor.instr)); - } - break; - - default: - unreachable("not reached"); - } - - if (_before) - *_before = before; - if (_after) - *_after = after; -} - -/** - * Inserts a non-basic block between two basic blocks and links them together. - */ - -static void -insert_non_block(nir_block *before, nir_cf_node *node, nir_block *after) -{ - node->parent = before->cf_node.parent; - exec_node_insert_after(&before->cf_node.node, &node->node); - link_block_to_non_block(before, node); - link_non_block_to_block(node, after); -} - -/* walk up the control flow tree to find the innermost enclosed loop */ -static nir_loop * -nearest_loop(nir_cf_node *node) -{ - while (node->type != nir_cf_node_loop) { - node = node->parent; - } - - return nir_cf_node_as_loop(node); -} - -/* - * update the CFG after a jump instruction has been added to the end of a block - */ - -void -nir_handle_add_jump(nir_block *block) -{ - nir_instr *instr = nir_block_last_instr(block); - nir_jump_instr *jump_instr = nir_instr_as_jump(instr); - - unlink_block_successors(block); - - nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); - nir_metadata_preserve(impl, nir_metadata_none); - - if (jump_instr->type == nir_jump_break || - jump_instr->type == nir_jump_continue) { - nir_loop *loop = nearest_loop(&block->cf_node); - - if (jump_instr->type == nir_jump_continue) { - nir_cf_node *first_node = nir_loop_first_cf_node(loop); - assert(first_node->type == nir_cf_node_block); - nir_block *first_block = nir_cf_node_as_block(first_node); - link_blocks(block, first_block, NULL); - } else { - nir_cf_node *after = nir_cf_node_next(&loop->cf_node); - assert(after->type == nir_cf_node_block); - nir_block *after_block = nir_cf_node_as_block(after); - link_blocks(block, after_block, NULL); - - /* If we inserted a fake link, remove it */ - nir_cf_node *last = nir_loop_last_cf_node(loop); - assert(last->type == nir_cf_node_block); - nir_block *last_block = nir_cf_node_as_block(last); - if (last_block->successors[1] != NULL) - unlink_blocks(last_block, after_block); - } - } else { - assert(jump_instr->type == nir_jump_return); - link_blocks(block, impl->end_block, NULL); - } -} - -static void -remove_phi_src(nir_block *block, nir_block *pred) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - nir_foreach_phi_src_safe(phi, src) { - if (src->pred == pred) { - list_del(&src->src.use_link); - exec_node_remove(&src->node); - } - } - } -} - -/* Removes the successor of a block with a jump, and inserts a fake edge for - * infinite loops. Note that the jump to be eliminated may be free-floating. - */ - -static void -unlink_jump(nir_block *block, nir_jump_type type, bool add_normal_successors) -{ - nir_block *next = block->successors[0]; - - if (block->successors[0]) - remove_phi_src(block->successors[0], block); - if (block->successors[1]) - remove_phi_src(block->successors[1], block); - - unlink_block_successors(block); - if (add_normal_successors) - block_add_normal_succs(block); - - /* If we've just removed a break, and the block we were jumping to (after - * the loop) now has zero predecessors, we've created a new infinite loop. - * - * NIR doesn't allow blocks (other than the start block) to have zero - * predecessors. In particular, dominance assumes all blocks are reachable. - * So, we insert a "fake link" by making successors[1] point after the loop. - * - * Note that we have to do this after unlinking/recreating the block's - * successors. If we removed a "break" at the end of the loop, then - * block == last_block, so block->successors[0] would already be "next", - * and adding a fake link would create two identical successors. Doing - * this afterward works, as we'll have changed block->successors[0] to - * be the top of the loop. - */ - if (type == nir_jump_break && next->predecessors->entries == 0) { - nir_loop *loop = - nir_cf_node_as_loop(nir_cf_node_prev(&next->cf_node)); - - /* insert fake link */ - nir_cf_node *last = nir_loop_last_cf_node(loop); - assert(last->type == nir_cf_node_block); - nir_block *last_block = nir_cf_node_as_block(last); - - last_block->successors[1] = next; - block_add_pred(next, last_block); - } -} - -void -nir_handle_remove_jump(nir_block *block, nir_jump_type type) -{ - unlink_jump(block, type, true); - - nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); - nir_metadata_preserve(impl, nir_metadata_none); -} - -static void -update_if_uses(nir_cf_node *node) -{ - if (node->type != nir_cf_node_if) - return; - - nir_if *if_stmt = nir_cf_node_as_if(node); - - if_stmt->condition.parent_if = if_stmt; - if (if_stmt->condition.is_ssa) { - list_addtail(&if_stmt->condition.use_link, - &if_stmt->condition.ssa->if_uses); - } else { - list_addtail(&if_stmt->condition.use_link, - &if_stmt->condition.reg.reg->if_uses); - } -} - -/** - * Stitch two basic blocks together into one. The aggregate must have the same - * predecessors as the first and the same successors as the second. - */ - -static void -stitch_blocks(nir_block *before, nir_block *after) -{ - /* - * We move after into before, so we have to deal with up to 2 successors vs. - * possibly a large number of predecessors. - * - * TODO: special case when before is empty and after isn't? - */ - - if (block_ends_in_jump(before)) { - assert(exec_list_is_empty(&after->instr_list)); - if (after->successors[0]) - remove_phi_src(after->successors[0], after); - if (after->successors[1]) - remove_phi_src(after->successors[1], after); - unlink_block_successors(after); - exec_node_remove(&after->cf_node.node); - } else { - move_successors(after, before); - - foreach_list_typed(nir_instr, instr, node, &after->instr_list) { - instr->block = before; - } - - exec_list_append(&before->instr_list, &after->instr_list); - exec_node_remove(&after->cf_node.node); - } -} - -void -nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node) -{ - nir_block *before, *after; - - split_block_cursor(cursor, &before, &after); - - if (node->type == nir_cf_node_block) { - nir_block *block = nir_cf_node_as_block(node); - exec_node_insert_after(&before->cf_node.node, &block->cf_node.node); - block->cf_node.parent = before->cf_node.parent; - /* stitch_blocks() assumes that any block that ends with a jump has - * already been setup with the correct successors, so we need to set - * up jumps here as the block is being inserted. - */ - if (block_ends_in_jump(block)) - nir_handle_add_jump(block); - - stitch_blocks(block, after); - stitch_blocks(before, block); - } else { - update_if_uses(node); - insert_non_block(before, node, after); - } -} - -static bool -replace_ssa_def_uses(nir_ssa_def *def, void *void_impl) -{ - nir_function_impl *impl = void_impl; - void *mem_ctx = ralloc_parent(impl); - - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(mem_ctx, def->num_components); - nir_instr_insert_before_cf_list(&impl->body, &undef->instr); - nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(&undef->def)); - return true; -} - -static void -cleanup_cf_node(nir_cf_node *node, nir_function_impl *impl) -{ - switch (node->type) { - case nir_cf_node_block: { - nir_block *block = nir_cf_node_as_block(node); - /* We need to walk the instructions and clean up defs/uses */ - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_jump) { - nir_jump_type jump_type = nir_instr_as_jump(instr)->type; - unlink_jump(block, jump_type, false); - } else { - nir_foreach_ssa_def(instr, replace_ssa_def_uses, impl); - nir_instr_remove(instr); - } - } - break; - } - - case nir_cf_node_if: { - nir_if *if_stmt = nir_cf_node_as_if(node); - foreach_list_typed(nir_cf_node, child, node, &if_stmt->then_list) - cleanup_cf_node(child, impl); - foreach_list_typed(nir_cf_node, child, node, &if_stmt->else_list) - cleanup_cf_node(child, impl); - - list_del(&if_stmt->condition.use_link); - break; - } - - case nir_cf_node_loop: { - nir_loop *loop = nir_cf_node_as_loop(node); - foreach_list_typed(nir_cf_node, child, node, &loop->body) - cleanup_cf_node(child, impl); - break; - } - case nir_cf_node_function: { - nir_function_impl *impl = nir_cf_node_as_function(node); - foreach_list_typed(nir_cf_node, child, node, &impl->body) - cleanup_cf_node(child, impl); - break; - } - default: - unreachable("Invalid CF node type"); - } -} - -void -nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end) -{ - nir_block *block_begin, *block_end, *block_before, *block_after; - - /* In the case where begin points to an instruction in some basic block and - * end points to the end of the same basic block, we rely on the fact that - * splitting on an instruction moves earlier instructions into a new basic - * block. If the later instructions were moved instead, then the end cursor - * would be pointing to the same place that begin used to point to, which - * is obviously not what we want. - */ - split_block_cursor(begin, &block_before, &block_begin); - split_block_cursor(end, &block_end, &block_after); - - extracted->impl = nir_cf_node_get_function(&block_begin->cf_node); - exec_list_make_empty(&extracted->list); - - /* Dominance and other block-related information is toast. */ - nir_metadata_preserve(extracted->impl, nir_metadata_none); - - nir_cf_node *cf_node = &block_begin->cf_node; - nir_cf_node *cf_node_end = &block_end->cf_node; - while (true) { - nir_cf_node *next = nir_cf_node_next(cf_node); - - exec_node_remove(&cf_node->node); - cf_node->parent = NULL; - exec_list_push_tail(&extracted->list, &cf_node->node); - - if (cf_node == cf_node_end) - break; - - cf_node = next; - } - - stitch_blocks(block_before, block_after); -} - -void -nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor) -{ - nir_block *before, *after; - - split_block_cursor(cursor, &before, &after); - - foreach_list_typed_safe(nir_cf_node, node, node, &cf_list->list) { - exec_node_remove(&node->node); - node->parent = before->cf_node.parent; - exec_node_insert_node_before(&after->cf_node.node, &node->node); - } - - stitch_blocks(before, - nir_cf_node_as_block(nir_cf_node_next(&before->cf_node))); - stitch_blocks(nir_cf_node_as_block(nir_cf_node_prev(&after->cf_node)), - after); -} - -void -nir_cf_delete(nir_cf_list *cf_list) -{ - foreach_list_typed(nir_cf_node, node, node, &cf_list->list) { - cleanup_cf_node(node, cf_list->impl); - } -} diff --git a/src/glsl/nir/nir_control_flow.h b/src/glsl/nir/nir_control_flow.h deleted file mode 100644 index b71382fc597..00000000000 --- a/src/glsl/nir/nir_control_flow.h +++ /dev/null @@ -1,162 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" - -#pragma once - -#ifdef __cplusplus -extern "C" { -#endif - -/** NIR Control Flow Modification - * - * This file contains various API's that make modifying control flow in NIR, - * while maintaining the invariants checked by the validator, much easier. - * There are two parts to this: - * - * 1. Inserting control flow (if's and loops) in various places, for creating - * IR either from scratch or as part of some lowering pass. - * 2. Taking existing pieces of the IR and either moving them around or - * deleting them. - */ - -/** Control flow insertion. */ - -/** puts a control flow node where the cursor is */ -void nir_cf_node_insert(nir_cursor cursor, nir_cf_node *node); - -/** puts a control flow node immediately after another control flow node */ -static inline void -nir_cf_node_insert_after(nir_cf_node *node, nir_cf_node *after) -{ - nir_cf_node_insert(nir_after_cf_node(node), after); -} - -/** puts a control flow node immediately before another control flow node */ -static inline void -nir_cf_node_insert_before(nir_cf_node *node, nir_cf_node *before) -{ - nir_cf_node_insert(nir_before_cf_node(node), before); -} - -/** puts a control flow node at the beginning of a list from an if, loop, or function */ -static inline void -nir_cf_node_insert_begin(struct exec_list *list, nir_cf_node *node) -{ - nir_cf_node_insert(nir_before_cf_list(list), node); -} - -/** puts a control flow node at the end of a list from an if, loop, or function */ -static inline void -nir_cf_node_insert_end(struct exec_list *list, nir_cf_node *node) -{ - nir_cf_node_insert(nir_after_cf_list(list), node); -} - - -/** Control flow motion. - * - * These functions let you take a part of a control flow list (basically - * equivalent to a series of statement in GLSL) and "extract" it from the IR, - * so that it's a free-floating piece of IR that can be either re-inserted - * somewhere else or deleted entirely. A few notes on using it: - * - * 1. Phi nodes are considered attached to the piece of control flow that - * their sources come from. There are three places where phi nodes can - * occur, which are the three places where a block can have multiple - * predecessors: - * - * 1) After an if statement, if neither branch ends in a jump. - * 2) After a loop, if there are multiple break's. - * 3) At the beginning of a loop. - * - * For #1, the phi node is considered to be part of the if, and for #2 and - * #3 the phi node is considered to be part of the loop. This allows us to - * keep phi's intact, but it means that phi nodes cannot be separated from - * the control flow they come from. For example, extracting an if without - * extracting all the phi nodes after it is not allowed, and neither is - * extracting only some of the phi nodes at the beginning of a block. It - * also means that extracting from the beginning of a basic block actually - * means extracting from the first non-phi instruction, since there's no - * situation where extracting phi nodes without extracting what comes - * before them makes any sense. - * - * 2. Phi node sources are guaranteed to remain valid, meaning that they still - * correspond one-to-one with the predecessors of the basic block they're - * part of. In addition, the original sources will be preserved unless they - * correspond to a break or continue that was deleted. However, no attempt - * is made to ensure that SSA form is maintained. In particular, it is - * *not* guaranteed that definitions of SSA values will dominate all their - * uses after all is said and done. Either the caller must ensure that this - * is the case, or it must insert extra phi nodes to restore SSA. - * - * 3. It is invalid to move a piece of IR with a break/continue outside of the - * loop it references. Doing this will result in invalid - * successors/predecessors and phi node sources. - * - * 4. It is invalid to move a piece of IR from one function implementation to - * another. - * - * 5. Extracting a control flow list will leave lots of dangling references to - * and from other pieces of the IR. It also leaves things in a not 100% - * consistent state. This means that some things (e.g. inserting - * instructions) might not work reliably on the extracted control flow. It - * also means that extracting control flow without re-inserting it or - * deleting it is a Bad Thing (tm). - */ - -typedef struct { - struct exec_list list; - nir_function_impl *impl; /* for cleaning up if the list is deleted */ -} nir_cf_list; - -void nir_cf_extract(nir_cf_list *extracted, nir_cursor begin, nir_cursor end); - -void nir_cf_reinsert(nir_cf_list *cf_list, nir_cursor cursor); - -void nir_cf_delete(nir_cf_list *cf_list); - -static inline void -nir_cf_list_extract(nir_cf_list *extracted, struct exec_list *cf_list) -{ - nir_cf_extract(extracted, nir_before_cf_list(cf_list), - nir_after_cf_list(cf_list)); -} - -/** removes a control flow node, doing any cleanup necessary */ -static inline void -nir_cf_node_remove(nir_cf_node *node) -{ - nir_cf_list list; - nir_cf_extract(&list, nir_before_cf_node(node), nir_after_cf_node(node)); - nir_cf_delete(&list); -} - -#ifdef __cplusplus -} -#endif diff --git a/src/glsl/nir/nir_control_flow_private.h b/src/glsl/nir/nir_control_flow_private.h deleted file mode 100644 index f32b57a8cef..00000000000 --- a/src/glsl/nir/nir_control_flow_private.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir_control_flow.h" - -#pragma once - -/* Internal control-flow modification functions used when inserting/removing - * instructions. - */ - -void nir_handle_add_jump(nir_block *block); -void nir_handle_remove_jump(nir_block *block, nir_jump_type type); diff --git a/src/glsl/nir/nir_dominance.c b/src/glsl/nir/nir_dominance.c deleted file mode 100644 index b345b85e8a0..00000000000 --- a/src/glsl/nir/nir_dominance.c +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" - -/* - * Implements the algorithms for computing the dominance tree and the - * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper, - * Harvey, and Kennedy. - */ - -typedef struct { - nir_function_impl *impl; - bool progress; -} dom_state; - -static bool -init_block_cb(nir_block *block, void *_state) -{ - dom_state *state = (dom_state *) _state; - if (block == nir_start_block(state->impl)) - block->imm_dom = block; - else - block->imm_dom = NULL; - block->num_dom_children = 0; - - struct set_entry *entry; - set_foreach(block->dom_frontier, entry) { - _mesa_set_remove(block->dom_frontier, entry); - } - - return true; -} - -static nir_block * -intersect(nir_block *b1, nir_block *b2) -{ - while (b1 != b2) { - /* - * Note, the comparisons here are the opposite of what the paper says - * because we index blocks from beginning -> end (i.e. reverse - * post-order) instead of post-order like they assume. - */ - while (b1->index > b2->index) - b1 = b1->imm_dom; - while (b2->index > b1->index) - b2 = b2->imm_dom; - } - - return b1; -} - -static bool -calc_dominance_cb(nir_block *block, void *_state) -{ - dom_state *state = (dom_state *) _state; - if (block == nir_start_block(state->impl)) - return true; - - nir_block *new_idom = NULL; - struct set_entry *entry; - set_foreach(block->predecessors, entry) { - nir_block *pred = (nir_block *) entry->key; - - if (pred->imm_dom) { - if (new_idom) - new_idom = intersect(pred, new_idom); - else - new_idom = pred; - } - } - - assert(new_idom); - if (block->imm_dom != new_idom) { - block->imm_dom = new_idom; - state->progress = true; - } - - return true; -} - -static bool -calc_dom_frontier_cb(nir_block *block, void *state) -{ - (void) state; - - if (block->predecessors->entries > 1) { - struct set_entry *entry; - set_foreach(block->predecessors, entry) { - nir_block *runner = (nir_block *) entry->key; - while (runner != block->imm_dom) { - _mesa_set_add(runner->dom_frontier, block); - runner = runner->imm_dom; - } - } - } - - return true; -} - -/* - * Compute each node's children in the dominance tree from the immediate - * dominator information. We do this in three stages: - * - * 1. Calculate the number of children each node has - * 2. Allocate arrays, setting the number of children to 0 again - * 3. For each node, add itself to its parent's list of children, using - * num_dom_children as an index - at the end of this step, num_dom_children - * for each node will be the same as it was at the end of step #1. - */ - -static bool -block_count_children(nir_block *block, void *state) -{ - (void) state; - - if (block->imm_dom) - block->imm_dom->num_dom_children++; - - return true; -} - -static bool -block_alloc_children(nir_block *block, void *state) -{ - void *mem_ctx = state; - - block->dom_children = ralloc_array(mem_ctx, nir_block *, - block->num_dom_children); - block->num_dom_children = 0; - - return true; -} - -static bool -block_add_child(nir_block *block, void *state) -{ - (void) state; - - if (block->imm_dom) - block->imm_dom->dom_children[block->imm_dom->num_dom_children++] = block; - - return true; -} - -static void -calc_dom_children(nir_function_impl* impl) -{ - void *mem_ctx = ralloc_parent(impl); - - nir_foreach_block(impl, block_count_children, NULL); - nir_foreach_block(impl, block_alloc_children, mem_ctx); - nir_foreach_block(impl, block_add_child, NULL); -} - -static void -calc_dfs_indicies(nir_block *block, unsigned *index) -{ - block->dom_pre_index = (*index)++; - - for (unsigned i = 0; i < block->num_dom_children; i++) - calc_dfs_indicies(block->dom_children[i], index); - - block->dom_post_index = (*index)++; -} - -void -nir_calc_dominance_impl(nir_function_impl *impl) -{ - if (impl->valid_metadata & nir_metadata_dominance) - return; - - nir_metadata_require(impl, nir_metadata_block_index); - - dom_state state; - state.impl = impl; - state.progress = true; - - nir_foreach_block(impl, init_block_cb, &state); - - while (state.progress) { - state.progress = false; - nir_foreach_block(impl, calc_dominance_cb, &state); - } - - nir_foreach_block(impl, calc_dom_frontier_cb, &state); - - nir_block *start_block = nir_start_block(impl); - start_block->imm_dom = NULL; - - calc_dom_children(impl); - - unsigned dfs_index = 0; - calc_dfs_indicies(start_block, &dfs_index); -} - -void -nir_calc_dominance(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_calc_dominance_impl(function->impl); - } -} - -/** - * Computes the least common anscestor of two blocks. If one of the blocks - * is null, the other block is returned. - */ -nir_block * -nir_dominance_lca(nir_block *b1, nir_block *b2) -{ - if (b1 == NULL) - return b2; - - if (b2 == NULL) - return b1; - - assert(nir_cf_node_get_function(&b1->cf_node) == - nir_cf_node_get_function(&b2->cf_node)); - - assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata & - nir_metadata_dominance); - - return intersect(b1, b2); -} - -/** - * Returns true if parent dominates child - */ -bool -nir_block_dominates(nir_block *parent, nir_block *child) -{ - assert(nir_cf_node_get_function(&parent->cf_node) == - nir_cf_node_get_function(&child->cf_node)); - - assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata & - nir_metadata_dominance); - - return child->dom_pre_index >= parent->dom_pre_index && - child->dom_post_index <= parent->dom_post_index; -} - -static bool -dump_block_dom(nir_block *block, void *state) -{ - FILE *fp = state; - if (block->imm_dom) - fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index); - return true; -} - -void -nir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp) -{ - fprintf(fp, "digraph doms_%s {\n", impl->function->name); - nir_foreach_block(impl, dump_block_dom, fp); - fprintf(fp, "}\n\n"); -} - -void -nir_dump_dom_tree(nir_shader *shader, FILE *fp) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_dump_dom_tree_impl(function->impl, fp); - } -} - -static bool -dump_block_dom_frontier(nir_block *block, void *state) -{ - FILE *fp = state; - - fprintf(fp, "DF(%u) = {", block->index); - struct set_entry *entry; - set_foreach(block->dom_frontier, entry) { - nir_block *df = (nir_block *) entry->key; - fprintf(fp, "%u, ", df->index); - } - fprintf(fp, "}\n"); - return true; -} - -void -nir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp) -{ - nir_foreach_block(impl, dump_block_dom_frontier, fp); -} - -void -nir_dump_dom_frontier(nir_shader *shader, FILE *fp) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_dump_dom_frontier_impl(function->impl, fp); - } -} - -static bool -dump_block_succs(nir_block *block, void *state) -{ - FILE *fp = state; - if (block->successors[0]) - fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index); - if (block->successors[1]) - fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index); - return true; -} - -void -nir_dump_cfg_impl(nir_function_impl *impl, FILE *fp) -{ - fprintf(fp, "digraph cfg_%s {\n", impl->function->name); - nir_foreach_block(impl, dump_block_succs, fp); - fprintf(fp, "}\n\n"); -} - -void -nir_dump_cfg(nir_shader *shader, FILE *fp) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_dump_cfg_impl(function->impl, fp); - } -} diff --git a/src/glsl/nir/nir_from_ssa.c b/src/glsl/nir/nir_from_ssa.c deleted file mode 100644 index 8bc9f24e406..00000000000 --- a/src/glsl/nir/nir_from_ssa.c +++ /dev/null @@ -1,805 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" -#include "nir_vla.h" - -/* - * This file implements an out-of-SSA pass as described in "Revisiting - * Out-of-SSA Translation for Correctness, Code Quality, and Efficiency" by - * Boissinot et. al. - */ - -struct from_ssa_state { - void *mem_ctx; - void *dead_ctx; - bool phi_webs_only; - struct hash_table *merge_node_table; - nir_instr *instr; - nir_function_impl *impl; -}; - -/* Returns true if a dominates b */ -static bool -ssa_def_dominates(nir_ssa_def *a, nir_ssa_def *b) -{ - if (a->live_index == 0) { - /* SSA undefs always dominate */ - return true; - } else if (b->live_index < a->live_index) { - return false; - } else if (a->parent_instr->block == b->parent_instr->block) { - return a->live_index <= b->live_index; - } else { - return nir_block_dominates(a->parent_instr->block, - b->parent_instr->block); - } -} - - -/* The following data structure, which I have named merge_set is a way of - * representing a set registers of non-interfering registers. This is - * based on the concept of a "dominence forest" presented in "Fast Copy - * Coalescing and Live-Range Identification" by Budimlic et. al. but the - * implementation concept is taken from "Revisiting Out-of-SSA Translation - * for Correctness, Code Quality, and Efficiency" by Boissinot et. al.. - * - * Each SSA definition is associated with a merge_node and the association - * is represented by a combination of a hash table and the "def" parameter - * in the merge_node structure. The merge_set stores a linked list of - * merge_node's in dominence order of the ssa definitions. (Since the - * liveness analysis pass indexes the SSA values in dominence order for us, - * this is an easy thing to keep up.) It is assumed that no pair of the - * nodes in a given set interfere. Merging two sets or checking for - * interference can be done in a single linear-time merge-sort walk of the - * two lists of nodes. - */ -struct merge_set; - -typedef struct { - struct exec_node node; - struct merge_set *set; - nir_ssa_def *def; -} merge_node; - -typedef struct merge_set { - struct exec_list nodes; - unsigned size; - nir_register *reg; -} merge_set; - -#if 0 -static void -merge_set_dump(merge_set *set, FILE *fp) -{ - nir_ssa_def *dom[set->size]; - int dom_idx = -1; - - foreach_list_typed(merge_node, node, node, &set->nodes) { - while (dom_idx >= 0 && !ssa_def_dominates(dom[dom_idx], node->def)) - dom_idx--; - - for (int i = 0; i <= dom_idx; i++) - fprintf(fp, " "); - - if (node->def->name) - fprintf(fp, "ssa_%d /* %s */\n", node->def->index, node->def->name); - else - fprintf(fp, "ssa_%d\n", node->def->index); - - dom[++dom_idx] = node->def; - } -} -#endif - -static merge_node * -get_merge_node(nir_ssa_def *def, struct from_ssa_state *state) -{ - struct hash_entry *entry = - _mesa_hash_table_search(state->merge_node_table, def); - if (entry) - return entry->data; - - merge_set *set = ralloc(state->dead_ctx, merge_set); - exec_list_make_empty(&set->nodes); - set->size = 1; - set->reg = NULL; - - merge_node *node = ralloc(state->dead_ctx, merge_node); - node->set = set; - node->def = def; - exec_list_push_head(&set->nodes, &node->node); - - _mesa_hash_table_insert(state->merge_node_table, def, node); - - return node; -} - -static bool -merge_nodes_interfere(merge_node *a, merge_node *b) -{ - return nir_ssa_defs_interfere(a->def, b->def); -} - -/* Merges b into a */ -static merge_set * -merge_merge_sets(merge_set *a, merge_set *b) -{ - struct exec_node *an = exec_list_get_head(&a->nodes); - struct exec_node *bn = exec_list_get_head(&b->nodes); - while (!exec_node_is_tail_sentinel(bn)) { - merge_node *a_node = exec_node_data(merge_node, an, node); - merge_node *b_node = exec_node_data(merge_node, bn, node); - - if (exec_node_is_tail_sentinel(an) || - a_node->def->live_index > b_node->def->live_index) { - struct exec_node *next = bn->next; - exec_node_remove(bn); - exec_node_insert_node_before(an, bn); - exec_node_data(merge_node, bn, node)->set = a; - bn = next; - } else { - an = an->next; - } - } - - a->size += b->size; - b->size = 0; - - return a; -} - -/* Checks for any interference between two merge sets - * - * This is an implementation of Algorithm 2 in "Revisiting Out-of-SSA - * Translation for Correctness, Code Quality, and Efficiency" by - * Boissinot et. al. - */ -static bool -merge_sets_interfere(merge_set *a, merge_set *b) -{ - NIR_VLA(merge_node *, dom, a->size + b->size); - int dom_idx = -1; - - struct exec_node *an = exec_list_get_head(&a->nodes); - struct exec_node *bn = exec_list_get_head(&b->nodes); - while (!exec_node_is_tail_sentinel(an) || - !exec_node_is_tail_sentinel(bn)) { - - merge_node *current; - if (exec_node_is_tail_sentinel(an)) { - current = exec_node_data(merge_node, bn, node); - bn = bn->next; - } else if (exec_node_is_tail_sentinel(bn)) { - current = exec_node_data(merge_node, an, node); - an = an->next; - } else { - merge_node *a_node = exec_node_data(merge_node, an, node); - merge_node *b_node = exec_node_data(merge_node, bn, node); - - if (a_node->def->live_index <= b_node->def->live_index) { - current = a_node; - an = an->next; - } else { - current = b_node; - bn = bn->next; - } - } - - while (dom_idx >= 0 && - !ssa_def_dominates(dom[dom_idx]->def, current->def)) - dom_idx--; - - if (dom_idx >= 0 && merge_nodes_interfere(current, dom[dom_idx])) - return true; - - dom[++dom_idx] = current; - } - - return false; -} - -static bool -add_parallel_copy_to_end_of_block(nir_block *block, void *void_state) -{ - struct from_ssa_state *state = void_state; - - bool need_end_copy = false; - if (block->successors[0]) { - nir_instr *instr = nir_block_first_instr(block->successors[0]); - if (instr && instr->type == nir_instr_type_phi) - need_end_copy = true; - } - - if (block->successors[1]) { - nir_instr *instr = nir_block_first_instr(block->successors[1]); - if (instr && instr->type == nir_instr_type_phi) - need_end_copy = true; - } - - if (need_end_copy) { - /* If one of our successors has at least one phi node, we need to - * create a parallel copy at the end of the block but before the jump - * (if there is one). - */ - nir_parallel_copy_instr *pcopy = - nir_parallel_copy_instr_create(state->dead_ctx); - - nir_instr_insert(nir_after_block_before_jump(block), &pcopy->instr); - } - - return true; -} - -static nir_parallel_copy_instr * -get_parallel_copy_at_end_of_block(nir_block *block) -{ - nir_instr *last_instr = nir_block_last_instr(block); - if (last_instr == NULL) - return NULL; - - /* The last instruction may be a jump in which case the parallel copy is - * right before it. - */ - if (last_instr->type == nir_instr_type_jump) - last_instr = nir_instr_prev(last_instr); - - if (last_instr && last_instr->type == nir_instr_type_parallel_copy) - return nir_instr_as_parallel_copy(last_instr); - else - return NULL; -} - -/** Isolate phi nodes with parallel copies - * - * In order to solve the dependency problems with the sources and - * destinations of phi nodes, we first isolate them by adding parallel - * copies to the beginnings and ends of basic blocks. For every block with - * phi nodes, we add a parallel copy immediately following the last phi - * node that copies the destinations of all of the phi nodes to new SSA - * values. We also add a parallel copy to the end of every block that has - * a successor with phi nodes that, for each phi node in each successor, - * copies the corresponding sorce of the phi node and adjust the phi to - * used the destination of the parallel copy. - * - * In SSA form, each value has exactly one definition. What this does is - * ensure that each value used in a phi also has exactly one use. The - * destinations of phis are only used by the parallel copy immediately - * following the phi nodes and. Thanks to the parallel copy at the end of - * the predecessor block, the sources of phi nodes are are the only use of - * that value. This allows us to immediately assign all the sources and - * destinations of any given phi node to the same register without worrying - * about interference at all. We do coalescing to get rid of the parallel - * copies where possible. - * - * Before this pass can be run, we have to iterate over the blocks with - * add_parallel_copy_to_end_of_block to ensure that the parallel copies at - * the ends of blocks exist. We can create the ones at the beginnings as - * we go, but the ones at the ends of blocks need to be created ahead of - * time because of potential back-edges in the CFG. - */ -static bool -isolate_phi_nodes_block(nir_block *block, void *void_state) -{ - struct from_ssa_state *state = void_state; - - nir_instr *last_phi_instr = NULL; - nir_foreach_instr(block, instr) { - /* Phi nodes only ever come at the start of a block */ - if (instr->type != nir_instr_type_phi) - break; - - last_phi_instr = instr; - } - - /* If we don't have any phi's, then there's nothing for us to do. */ - if (last_phi_instr == NULL) - return true; - - /* If we have phi nodes, we need to create a parallel copy at the - * start of this block but after the phi nodes. - */ - nir_parallel_copy_instr *block_pcopy = - nir_parallel_copy_instr_create(state->dead_ctx); - nir_instr_insert_after(last_phi_instr, &block_pcopy->instr); - - nir_foreach_instr(block, instr) { - /* Phi nodes only ever come at the start of a block */ - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - assert(phi->dest.is_ssa); - nir_foreach_phi_src(phi, src) { - nir_parallel_copy_instr *pcopy = - get_parallel_copy_at_end_of_block(src->pred); - assert(pcopy); - - nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx, - nir_parallel_copy_entry); - nir_ssa_dest_init(&pcopy->instr, &entry->dest, - phi->dest.ssa.num_components, src->src.ssa->name); - exec_list_push_tail(&pcopy->entries, &entry->node); - - assert(src->src.is_ssa); - nir_instr_rewrite_src(&pcopy->instr, &entry->src, src->src); - - nir_instr_rewrite_src(&phi->instr, &src->src, - nir_src_for_ssa(&entry->dest.ssa)); - } - - nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx, - nir_parallel_copy_entry); - nir_ssa_dest_init(&block_pcopy->instr, &entry->dest, - phi->dest.ssa.num_components, phi->dest.ssa.name); - exec_list_push_tail(&block_pcopy->entries, &entry->node); - - nir_ssa_def_rewrite_uses(&phi->dest.ssa, - nir_src_for_ssa(&entry->dest.ssa)); - - nir_instr_rewrite_src(&block_pcopy->instr, &entry->src, - nir_src_for_ssa(&phi->dest.ssa)); - } - - return true; -} - -static bool -coalesce_phi_nodes_block(nir_block *block, void *void_state) -{ - struct from_ssa_state *state = void_state; - - nir_foreach_instr(block, instr) { - /* Phi nodes only ever come at the start of a block */ - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - - assert(phi->dest.is_ssa); - merge_node *dest_node = get_merge_node(&phi->dest.ssa, state); - - nir_foreach_phi_src(phi, src) { - assert(src->src.is_ssa); - merge_node *src_node = get_merge_node(src->src.ssa, state); - if (src_node->set != dest_node->set) - merge_merge_sets(dest_node->set, src_node->set); - } - } - - return true; -} - -static void -aggressive_coalesce_parallel_copy(nir_parallel_copy_instr *pcopy, - struct from_ssa_state *state) -{ - nir_foreach_parallel_copy_entry(pcopy, entry) { - if (!entry->src.is_ssa) - continue; - - /* Since load_const instructions are SSA only, we can't replace their - * destinations with registers and, therefore, can't coalesce them. - */ - if (entry->src.ssa->parent_instr->type == nir_instr_type_load_const) - continue; - - /* Don't try and coalesce these */ - if (entry->dest.ssa.num_components != entry->src.ssa->num_components) - continue; - - merge_node *src_node = get_merge_node(entry->src.ssa, state); - merge_node *dest_node = get_merge_node(&entry->dest.ssa, state); - - if (src_node->set == dest_node->set) - continue; - - if (!merge_sets_interfere(src_node->set, dest_node->set)) - merge_merge_sets(src_node->set, dest_node->set); - } -} - -static bool -aggressive_coalesce_block(nir_block *block, void *void_state) -{ - struct from_ssa_state *state = void_state; - - nir_parallel_copy_instr *start_pcopy = NULL; - nir_foreach_instr(block, instr) { - /* Phi nodes only ever come at the start of a block */ - if (instr->type != nir_instr_type_phi) { - if (instr->type != nir_instr_type_parallel_copy) - break; /* The parallel copy must be right after the phis */ - - start_pcopy = nir_instr_as_parallel_copy(instr); - - aggressive_coalesce_parallel_copy(start_pcopy, state); - - break; - } - } - - nir_parallel_copy_instr *end_pcopy = - get_parallel_copy_at_end_of_block(block); - - if (end_pcopy && end_pcopy != start_pcopy) - aggressive_coalesce_parallel_copy(end_pcopy, state); - - return true; -} - -static bool -rewrite_ssa_def(nir_ssa_def *def, void *void_state) -{ - struct from_ssa_state *state = void_state; - nir_register *reg; - - struct hash_entry *entry = - _mesa_hash_table_search(state->merge_node_table, def); - if (entry) { - /* In this case, we're part of a phi web. Use the web's register. */ - merge_node *node = (merge_node *)entry->data; - - /* If it doesn't have a register yet, create one. Note that all of - * the things in the merge set should be the same so it doesn't - * matter which node's definition we use. - */ - if (node->set->reg == NULL) { - node->set->reg = nir_local_reg_create(state->impl); - node->set->reg->name = def->name; - node->set->reg->num_components = def->num_components; - node->set->reg->num_array_elems = 0; - } - - reg = node->set->reg; - } else { - if (state->phi_webs_only) - return true; - - /* We leave load_const SSA values alone. They act as immediates to - * the backend. If it got coalesced into a phi, that's ok. - */ - if (def->parent_instr->type == nir_instr_type_load_const) - return true; - - reg = nir_local_reg_create(state->impl); - reg->name = def->name; - reg->num_components = def->num_components; - reg->num_array_elems = 0; - } - - nir_ssa_def_rewrite_uses(def, nir_src_for_reg(reg)); - assert(list_empty(&def->uses) && list_empty(&def->if_uses)); - - if (def->parent_instr->type == nir_instr_type_ssa_undef) { - /* If it's an ssa_undef instruction, remove it since we know we just got - * rid of all its uses. - */ - nir_instr *parent_instr = def->parent_instr; - nir_instr_remove(parent_instr); - ralloc_steal(state->dead_ctx, parent_instr); - return true; - } - - assert(def->parent_instr->type != nir_instr_type_load_const); - - /* At this point we know a priori that this SSA def is part of a - * nir_dest. We can use exec_node_data to get the dest pointer. - */ - nir_dest *dest = exec_node_data(nir_dest, def, ssa); - - nir_instr_rewrite_dest(state->instr, dest, nir_dest_for_reg(reg)); - - return true; -} - -/* Resolves ssa definitions to registers. While we're at it, we also - * remove phi nodes. - */ -static bool -resolve_registers_block(nir_block *block, void *void_state) -{ - struct from_ssa_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - state->instr = instr; - nir_foreach_ssa_def(instr, rewrite_ssa_def, state); - - if (instr->type == nir_instr_type_phi) { - nir_instr_remove(instr); - ralloc_steal(state->dead_ctx, instr); - } - } - state->instr = NULL; - - return true; -} - -static void -emit_copy(nir_parallel_copy_instr *pcopy, nir_src src, nir_src dest_src, - void *mem_ctx) -{ - assert(!dest_src.is_ssa && - dest_src.reg.indirect == NULL && - dest_src.reg.base_offset == 0); - - if (src.is_ssa) - assert(src.ssa->num_components >= dest_src.reg.reg->num_components); - else - assert(src.reg.reg->num_components >= dest_src.reg.reg->num_components); - - nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov); - nir_src_copy(&mov->src[0].src, &src, mov); - mov->dest.dest = nir_dest_for_reg(dest_src.reg.reg); - mov->dest.write_mask = (1 << dest_src.reg.reg->num_components) - 1; - - nir_instr_insert_before(&pcopy->instr, &mov->instr); -} - -/* Resolves a single parallel copy operation into a sequence of mov's - * - * This is based on Algorithm 1 from "Revisiting Out-of-SSA Translation for - * Correctness, Code Quality, and Efficiency" by Boissinot et. al.. - * However, I never got the algorithm to work as written, so this version - * is slightly modified. - * - * The algorithm works by playing this little shell game with the values. - * We start by recording where every source value is and which source value - * each destination value should receive. We then grab any copy whose - * destination is "empty", i.e. not used as a source, and do the following: - * - Find where its source value currently lives - * - Emit the move instruction - * - Set the location of the source value to the destination - * - Mark the location containing the source value - * - Mark the destination as no longer needing to be copied - * - * When we run out of "empty" destinations, we have a cycle and so we - * create a temporary register, copy to that register, and mark the value - * we copied as living in that temporary. Now, the cycle is broken, so we - * can continue with the above steps. - */ -static void -resolve_parallel_copy(nir_parallel_copy_instr *pcopy, - struct from_ssa_state *state) -{ - unsigned num_copies = 0; - nir_foreach_parallel_copy_entry(pcopy, entry) { - /* Sources may be SSA */ - if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) - continue; - - num_copies++; - } - - if (num_copies == 0) { - /* Hooray, we don't need any copies! */ - nir_instr_remove(&pcopy->instr); - return; - } - - /* The register/source corresponding to the given index */ - NIR_VLA_ZERO(nir_src, values, num_copies * 2); - - /* The current location of a given piece of data. We will use -1 for "null" */ - NIR_VLA_FILL(int, loc, num_copies * 2, -1); - - /* The piece of data that the given piece of data is to be copied from. We will use -1 for "null" */ - NIR_VLA_FILL(int, pred, num_copies * 2, -1); - - /* The destinations we have yet to properly fill */ - NIR_VLA(int, to_do, num_copies * 2); - int to_do_idx = -1; - - /* Now we set everything up: - * - All values get assigned a temporary index - * - Current locations are set from sources - * - Predicessors are recorded from sources and destinations - */ - int num_vals = 0; - nir_foreach_parallel_copy_entry(pcopy, entry) { - /* Sources may be SSA */ - if (!entry->src.is_ssa && entry->src.reg.reg == entry->dest.reg.reg) - continue; - - int src_idx = -1; - for (int i = 0; i < num_vals; ++i) { - if (nir_srcs_equal(values[i], entry->src)) - src_idx = i; - } - if (src_idx < 0) { - src_idx = num_vals++; - values[src_idx] = entry->src; - } - - nir_src dest_src = nir_src_for_reg(entry->dest.reg.reg); - - int dest_idx = -1; - for (int i = 0; i < num_vals; ++i) { - if (nir_srcs_equal(values[i], dest_src)) { - /* Each destination of a parallel copy instruction should be - * unique. A destination may get used as a source, so we still - * have to walk the list. However, the predecessor should not, - * at this point, be set yet, so we should have -1 here. - */ - assert(pred[i] == -1); - dest_idx = i; - } - } - if (dest_idx < 0) { - dest_idx = num_vals++; - values[dest_idx] = dest_src; - } - - loc[src_idx] = src_idx; - pred[dest_idx] = src_idx; - - to_do[++to_do_idx] = dest_idx; - } - - /* Currently empty destinations we can go ahead and fill */ - NIR_VLA(int, ready, num_copies * 2); - int ready_idx = -1; - - /* Mark the ones that are ready for copying. We know an index is a - * destination if it has a predecessor and it's ready for copying if - * it's not marked as containing data. - */ - for (int i = 0; i < num_vals; i++) { - if (pred[i] != -1 && loc[i] == -1) - ready[++ready_idx] = i; - } - - while (to_do_idx >= 0) { - while (ready_idx >= 0) { - int b = ready[ready_idx--]; - int a = pred[b]; - emit_copy(pcopy, values[loc[a]], values[b], state->mem_ctx); - - /* If any other copies want a they can find it at b */ - loc[a] = b; - - /* b has been filled, mark it as not needing to be copied */ - pred[b] = -1; - - /* If a needs to be filled, it's ready for copying now */ - if (pred[a] != -1) - ready[++ready_idx] = a; - } - int b = to_do[to_do_idx--]; - if (pred[b] == -1) - continue; - - /* If we got here, then we don't have any more trivial copies that we - * can do. We have to break a cycle, so we create a new temporary - * register for that purpose. Normally, if going out of SSA after - * register allocation, you would want to avoid creating temporary - * registers. However, we are going out of SSA before register - * allocation, so we would rather not create extra register - * dependencies for the backend to deal with. If it wants, the - * backend can coalesce the (possibly multiple) temporaries. - */ - assert(num_vals < num_copies * 2); - nir_register *reg = nir_local_reg_create(state->impl); - reg->name = "copy_temp"; - reg->num_array_elems = 0; - if (values[b].is_ssa) - reg->num_components = values[b].ssa->num_components; - else - reg->num_components = values[b].reg.reg->num_components; - values[num_vals].is_ssa = false; - values[num_vals].reg.reg = reg; - - emit_copy(pcopy, values[b], values[num_vals], state->mem_ctx); - loc[b] = num_vals; - ready[++ready_idx] = b; - num_vals++; - } - - nir_instr_remove(&pcopy->instr); -} - -/* Resolves the parallel copies in a block. Each block can have at most - * two: One at the beginning, right after all the phi noces, and one at - * the end (or right before the final jump if it exists). - */ -static bool -resolve_parallel_copies_block(nir_block *block, void *void_state) -{ - struct from_ssa_state *state = void_state; - - /* At this point, we have removed all of the phi nodes. If a parallel - * copy existed right after the phi nodes in this block, it is now the - * first instruction. - */ - nir_instr *first_instr = nir_block_first_instr(block); - if (first_instr == NULL) - return true; /* Empty, nothing to do. */ - - if (first_instr->type == nir_instr_type_parallel_copy) { - nir_parallel_copy_instr *pcopy = nir_instr_as_parallel_copy(first_instr); - - resolve_parallel_copy(pcopy, state); - } - - /* It's possible that the above code already cleaned up the end parallel - * copy. However, doing so removed it form the instructions list so we - * won't find it here. Therefore, it's safe to go ahead and just look - * for one and clean it up if it exists. - */ - nir_parallel_copy_instr *end_pcopy = - get_parallel_copy_at_end_of_block(block); - if (end_pcopy) - resolve_parallel_copy(end_pcopy, state); - - return true; -} - -static void -nir_convert_from_ssa_impl(nir_function_impl *impl, bool phi_webs_only) -{ - struct from_ssa_state state; - - state.mem_ctx = ralloc_parent(impl); - state.dead_ctx = ralloc_context(NULL); - state.impl = impl; - state.phi_webs_only = phi_webs_only; - state.merge_node_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - nir_foreach_block(impl, add_parallel_copy_to_end_of_block, &state); - nir_foreach_block(impl, isolate_phi_nodes_block, &state); - - /* Mark metadata as dirty before we ask for liveness analysis */ - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - nir_metadata_require(impl, nir_metadata_live_ssa_defs | - nir_metadata_dominance); - - nir_foreach_block(impl, coalesce_phi_nodes_block, &state); - nir_foreach_block(impl, aggressive_coalesce_block, &state); - - nir_foreach_block(impl, resolve_registers_block, &state); - - nir_foreach_block(impl, resolve_parallel_copies_block, &state); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - /* Clean up dead instructions and the hash tables */ - _mesa_hash_table_destroy(state.merge_node_table, NULL); - ralloc_free(state.dead_ctx); -} - -void -nir_convert_from_ssa(nir_shader *shader, bool phi_webs_only) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_convert_from_ssa_impl(function->impl, phi_webs_only); - } -} diff --git a/src/glsl/nir/nir_gs_count_vertices.c b/src/glsl/nir/nir_gs_count_vertices.c deleted file mode 100644 index db15d160ee7..00000000000 --- a/src/glsl/nir/nir_gs_count_vertices.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "nir.h" -#include "nir_builder.h" - -static nir_intrinsic_instr * -as_intrinsic(nir_instr *instr, nir_intrinsic_op op) -{ - if (instr->type != nir_instr_type_intrinsic) - return NULL; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic != op) - return NULL; - - return intrin; -} - -static nir_intrinsic_instr * -as_set_vertex_count(nir_instr *instr) -{ - return as_intrinsic(instr, nir_intrinsic_set_vertex_count); -} - -/** - * If a geometry shader emits a constant number of vertices, return the - * number of vertices. Otherwise, return -1 (unknown). - * - * This only works if you've used nir_lower_gs_intrinsics() to do vertex - * counting at the NIR level. - */ -int -nir_gs_count_vertices(const nir_shader *shader) -{ - int count = -1; - - nir_foreach_function(shader, function) { - if (!function->impl) - continue; - - /* set_vertex_count intrinsics only appear in predecessors of the - * end block. So we don't need to walk all of them. - */ - struct set_entry *entry; - set_foreach(function->impl->end_block->predecessors, entry) { - nir_block *block = (nir_block *) entry->key; - - nir_foreach_instr_reverse(block, instr) { - nir_intrinsic_instr *intrin = as_set_vertex_count(instr); - if (!intrin) - continue; - - nir_const_value *val = nir_src_as_const_value(intrin->src[0]); - /* We've found a non-constant value. Bail. */ - if (!val) - return -1; - - if (count == -1) - count = val->i[0]; - - /* We've found contradictory set_vertex_count intrinsics. - * This can happen if there are early-returns in main() and - * different paths emit different numbers of vertices. - */ - if (count != val->i[0]) - return -1; - } - } - } - - return count; -} diff --git a/src/glsl/nir/nir_instr_set.c b/src/glsl/nir/nir_instr_set.c deleted file mode 100644 index d3f939fe805..00000000000 --- a/src/glsl/nir/nir_instr_set.c +++ /dev/null @@ -1,519 +0,0 @@ -/* - * Copyright © 2014 Connor Abbott - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "nir_instr_set.h" -#include "nir_vla.h" - -#define HASH(hash, data) _mesa_fnv32_1a_accumulate((hash), (data)) - -static uint32_t -hash_src(uint32_t hash, const nir_src *src) -{ - assert(src->is_ssa); - hash = HASH(hash, src->ssa); - return hash; -} - -static uint32_t -hash_alu_src(uint32_t hash, const nir_alu_src *src, unsigned num_components) -{ - hash = HASH(hash, src->abs); - hash = HASH(hash, src->negate); - - for (unsigned i = 0; i < num_components; i++) - hash = HASH(hash, src->swizzle[i]); - - hash = hash_src(hash, &src->src); - return hash; -} - -static uint32_t -hash_alu(uint32_t hash, const nir_alu_instr *instr) -{ - hash = HASH(hash, instr->op); - hash = HASH(hash, instr->dest.dest.ssa.num_components); - - if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { - assert(nir_op_infos[instr->op].num_inputs == 2); - uint32_t hash0 = hash_alu_src(hash, &instr->src[0], - nir_ssa_alu_instr_src_components(instr, 0)); - uint32_t hash1 = hash_alu_src(hash, &instr->src[1], - nir_ssa_alu_instr_src_components(instr, 1)); - /* For commutative operations, we need some commutative way of - * combining the hashes. One option would be to XOR them but that - * means that anything with two identical sources will hash to 0 and - * that's common enough we probably don't want the guaranteed - * collision. Either addition or multiplication will also work. - */ - hash = hash0 * hash1; - } else { - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - hash = hash_alu_src(hash, &instr->src[i], - nir_ssa_alu_instr_src_components(instr, i)); - } - } - - return hash; -} - -static uint32_t -hash_load_const(uint32_t hash, const nir_load_const_instr *instr) -{ - hash = HASH(hash, instr->def.num_components); - - hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f, - instr->def.num_components - * sizeof(instr->value.f[0])); - - return hash; -} - -static int -cmp_phi_src(const void *data1, const void *data2) -{ - nir_phi_src *src1 = *(nir_phi_src **)data1; - nir_phi_src *src2 = *(nir_phi_src **)data2; - return src1->pred - src2->pred; -} - -static uint32_t -hash_phi(uint32_t hash, const nir_phi_instr *instr) -{ - hash = HASH(hash, instr->instr.block); - - /* sort sources by predecessor, since the order shouldn't matter */ - unsigned num_preds = instr->instr.block->predecessors->entries; - NIR_VLA(nir_phi_src *, srcs, num_preds); - unsigned i = 0; - nir_foreach_phi_src(instr, src) { - srcs[i++] = src; - } - - qsort(srcs, num_preds, sizeof(nir_phi_src *), cmp_phi_src); - - for (i = 0; i < num_preds; i++) { - hash = hash_src(hash, &srcs[i]->src); - hash = HASH(hash, srcs[i]->pred); - } - - return hash; -} - -static uint32_t -hash_intrinsic(uint32_t hash, const nir_intrinsic_instr *instr) -{ - const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; - hash = HASH(hash, instr->intrinsic); - - if (info->has_dest) - hash = HASH(hash, instr->dest.ssa.num_components); - - assert(info->num_variables == 0); - - hash = _mesa_fnv32_1a_accumulate_block(hash, instr->const_index, - info->num_indices - * sizeof(instr->const_index[0])); - return hash; -} - -static uint32_t -hash_tex(uint32_t hash, const nir_tex_instr *instr) -{ - hash = HASH(hash, instr->op); - hash = HASH(hash, instr->num_srcs); - - for (unsigned i = 0; i < instr->num_srcs; i++) { - hash = HASH(hash, instr->src[i].src_type); - hash = hash_src(hash, &instr->src[i].src); - } - - hash = HASH(hash, instr->coord_components); - hash = HASH(hash, instr->sampler_dim); - hash = HASH(hash, instr->is_array); - hash = HASH(hash, instr->is_shadow); - hash = HASH(hash, instr->is_new_style_shadow); - hash = HASH(hash, instr->const_offset); - unsigned component = instr->component; - hash = HASH(hash, component); - hash = HASH(hash, instr->sampler_index); - hash = HASH(hash, instr->sampler_array_size); - - assert(!instr->sampler); - - return hash; -} - -/* Computes a hash of an instruction for use in a hash table. Note that this - * will only work for instructions where instr_can_rewrite() returns true, and - * it should return identical hashes for two instructions that are the same - * according nir_instrs_equal(). - */ - -static uint32_t -hash_instr(const void *data) -{ - const nir_instr *instr = data; - uint32_t hash = _mesa_fnv32_1a_offset_bias; - - switch (instr->type) { - case nir_instr_type_alu: - hash = hash_alu(hash, nir_instr_as_alu(instr)); - break; - case nir_instr_type_load_const: - hash = hash_load_const(hash, nir_instr_as_load_const(instr)); - break; - case nir_instr_type_phi: - hash = hash_phi(hash, nir_instr_as_phi(instr)); - break; - case nir_instr_type_intrinsic: - hash = hash_intrinsic(hash, nir_instr_as_intrinsic(instr)); - break; - case nir_instr_type_tex: - hash = hash_tex(hash, nir_instr_as_tex(instr)); - break; - default: - unreachable("Invalid instruction type"); - } - - return hash; -} - -bool -nir_srcs_equal(nir_src src1, nir_src src2) -{ - if (src1.is_ssa) { - if (src2.is_ssa) { - return src1.ssa == src2.ssa; - } else { - return false; - } - } else { - if (src2.is_ssa) { - return false; - } else { - if ((src1.reg.indirect == NULL) != (src2.reg.indirect == NULL)) - return false; - - if (src1.reg.indirect) { - if (!nir_srcs_equal(*src1.reg.indirect, *src2.reg.indirect)) - return false; - } - - return src1.reg.reg == src2.reg.reg && - src1.reg.base_offset == src2.reg.base_offset; - } - } -} - -static bool -nir_alu_srcs_equal(const nir_alu_instr *alu1, const nir_alu_instr *alu2, - unsigned src1, unsigned src2) -{ - if (alu1->src[src1].abs != alu2->src[src2].abs || - alu1->src[src1].negate != alu2->src[src2].negate) - return false; - - for (unsigned i = 0; i < nir_ssa_alu_instr_src_components(alu1, src1); i++) { - if (alu1->src[src1].swizzle[i] != alu2->src[src2].swizzle[i]) - return false; - } - - return nir_srcs_equal(alu1->src[src1].src, alu2->src[src2].src); -} - -/* Returns "true" if two instructions are equal. Note that this will only - * work for the subset of instructions defined by instr_can_rewrite(). Also, - * it should only return "true" for instructions that hash_instr() will return - * the same hash for (ignoring collisions, of course). - */ - -static bool -nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2) -{ - if (instr1->type != instr2->type) - return false; - - switch (instr1->type) { - case nir_instr_type_alu: { - nir_alu_instr *alu1 = nir_instr_as_alu(instr1); - nir_alu_instr *alu2 = nir_instr_as_alu(instr2); - - if (alu1->op != alu2->op) - return false; - - /* TODO: We can probably acutally do something more inteligent such - * as allowing different numbers and taking a maximum or something - * here */ - if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components) - return false; - - if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { - assert(nir_op_infos[alu1->op].num_inputs == 2); - return (nir_alu_srcs_equal(alu1, alu2, 0, 0) && - nir_alu_srcs_equal(alu1, alu2, 1, 1)) || - (nir_alu_srcs_equal(alu1, alu2, 0, 1) && - nir_alu_srcs_equal(alu1, alu2, 1, 0)); - } else { - for (unsigned i = 0; i < nir_op_infos[alu1->op].num_inputs; i++) { - if (!nir_alu_srcs_equal(alu1, alu2, i, i)) - return false; - } - } - return true; - } - case nir_instr_type_tex: { - nir_tex_instr *tex1 = nir_instr_as_tex(instr1); - nir_tex_instr *tex2 = nir_instr_as_tex(instr2); - - if (tex1->op != tex2->op) - return false; - - if (tex1->num_srcs != tex2->num_srcs) - return false; - for (unsigned i = 0; i < tex1->num_srcs; i++) { - if (tex1->src[i].src_type != tex2->src[i].src_type || - !nir_srcs_equal(tex1->src[i].src, tex2->src[i].src)) { - return false; - } - } - - if (tex1->coord_components != tex2->coord_components || - tex1->sampler_dim != tex2->sampler_dim || - tex1->is_array != tex2->is_array || - tex1->is_shadow != tex2->is_shadow || - tex1->is_new_style_shadow != tex2->is_new_style_shadow || - memcmp(tex1->const_offset, tex2->const_offset, - sizeof(tex1->const_offset)) != 0 || - tex1->component != tex2->component || - tex1->sampler_index != tex2->sampler_index || - tex1->sampler_array_size != tex2->sampler_array_size) { - return false; - } - - /* Don't support un-lowered sampler derefs currently. */ - assert(!tex1->sampler && !tex2->sampler); - - return true; - } - case nir_instr_type_load_const: { - nir_load_const_instr *load1 = nir_instr_as_load_const(instr1); - nir_load_const_instr *load2 = nir_instr_as_load_const(instr2); - - if (load1->def.num_components != load2->def.num_components) - return false; - - return memcmp(load1->value.f, load2->value.f, - load1->def.num_components * sizeof(*load2->value.f)) == 0; - } - case nir_instr_type_phi: { - nir_phi_instr *phi1 = nir_instr_as_phi(instr1); - nir_phi_instr *phi2 = nir_instr_as_phi(instr2); - - if (phi1->instr.block != phi2->instr.block) - return false; - - nir_foreach_phi_src(phi1, src1) { - nir_foreach_phi_src(phi2, src2) { - if (src1->pred == src2->pred) { - if (!nir_srcs_equal(src1->src, src2->src)) - return false; - - break; - } - } - } - - return true; - } - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intrinsic1 = nir_instr_as_intrinsic(instr1); - nir_intrinsic_instr *intrinsic2 = nir_instr_as_intrinsic(instr2); - const nir_intrinsic_info *info = - &nir_intrinsic_infos[intrinsic1->intrinsic]; - - if (intrinsic1->intrinsic != intrinsic2->intrinsic || - intrinsic1->num_components != intrinsic2->num_components) - return false; - - if (info->has_dest && intrinsic1->dest.ssa.num_components != - intrinsic2->dest.ssa.num_components) - return false; - - for (unsigned i = 0; i < info->num_srcs; i++) { - if (!nir_srcs_equal(intrinsic1->src[i], intrinsic2->src[i])) - return false; - } - - assert(info->num_variables == 0); - - for (unsigned i = 0; i < info->num_indices; i++) { - if (intrinsic1->const_index[i] != intrinsic2->const_index[i]) - return false; - } - - return true; - } - case nir_instr_type_call: - case nir_instr_type_jump: - case nir_instr_type_ssa_undef: - case nir_instr_type_parallel_copy: - default: - unreachable("Invalid instruction type"); - } - - return false; -} - -static bool -src_is_ssa(nir_src *src, void *data) -{ - (void) data; - return src->is_ssa; -} - -static bool -dest_is_ssa(nir_dest *dest, void *data) -{ - (void) data; - return dest->is_ssa; -} - -/* This function determines if uses of an instruction can safely be rewritten - * to use another identical instruction instead. Note that this function must - * be kept in sync with hash_instr() and nir_instrs_equal() -- only - * instructions that pass this test will be handed on to those functions, and - * conversely they must handle everything that this function returns true for. - */ - -static bool -instr_can_rewrite(nir_instr *instr) -{ - /* We only handle SSA. */ - if (!nir_foreach_dest(instr, dest_is_ssa, NULL) || - !nir_foreach_src(instr, src_is_ssa, NULL)) - return false; - - switch (instr->type) { - case nir_instr_type_alu: - case nir_instr_type_load_const: - case nir_instr_type_phi: - return true; - case nir_instr_type_tex: { - nir_tex_instr *tex = nir_instr_as_tex(instr); - - /* Don't support un-lowered sampler derefs currently. */ - if (tex->sampler) - return false; - - return true; - } - case nir_instr_type_intrinsic: { - const nir_intrinsic_info *info = - &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; - return (info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && - (info->flags & NIR_INTRINSIC_CAN_REORDER) && - info->num_variables == 0; /* not implemented yet */ - } - case nir_instr_type_call: - case nir_instr_type_jump: - case nir_instr_type_ssa_undef: - return false; - case nir_instr_type_parallel_copy: - default: - unreachable("Invalid instruction type"); - } - - return false; -} - -static nir_ssa_def * -nir_instr_get_dest_ssa_def(nir_instr *instr) -{ - switch (instr->type) { - case nir_instr_type_alu: - assert(nir_instr_as_alu(instr)->dest.dest.is_ssa); - return &nir_instr_as_alu(instr)->dest.dest.ssa; - case nir_instr_type_load_const: - return &nir_instr_as_load_const(instr)->def; - case nir_instr_type_phi: - assert(nir_instr_as_phi(instr)->dest.is_ssa); - return &nir_instr_as_phi(instr)->dest.ssa; - case nir_instr_type_intrinsic: - assert(nir_instr_as_intrinsic(instr)->dest.is_ssa); - return &nir_instr_as_intrinsic(instr)->dest.ssa; - case nir_instr_type_tex: - assert(nir_instr_as_tex(instr)->dest.is_ssa); - return &nir_instr_as_tex(instr)->dest.ssa; - default: - unreachable("We never ask for any of these"); - } -} - -static bool -cmp_func(const void *data1, const void *data2) -{ - return nir_instrs_equal(data1, data2); -} - -struct set * -nir_instr_set_create(void *mem_ctx) -{ - return _mesa_set_create(mem_ctx, hash_instr, cmp_func); -} - -void -nir_instr_set_destroy(struct set *instr_set) -{ - _mesa_set_destroy(instr_set, NULL); -} - -bool -nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr) -{ - if (!instr_can_rewrite(instr)) - return false; - - struct set_entry *entry = _mesa_set_search(instr_set, instr); - if (entry) { - nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr); - nir_ssa_def *new_def = - nir_instr_get_dest_ssa_def((nir_instr *) entry->key); - nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def)); - return true; - } - - _mesa_set_add(instr_set, instr); - return false; -} - -void -nir_instr_set_remove(struct set *instr_set, nir_instr *instr) -{ - if (!instr_can_rewrite(instr)) - return; - - struct set_entry *entry = _mesa_set_search(instr_set, instr); - if (entry) - _mesa_set_remove(instr_set, entry); -} - diff --git a/src/glsl/nir/nir_instr_set.h b/src/glsl/nir/nir_instr_set.h deleted file mode 100644 index 939e8ddbf58..00000000000 --- a/src/glsl/nir/nir_instr_set.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright © 2014 Connor Abbott - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once - -#include "nir.h" - -/** - * This file defines functions for creating, destroying, and manipulating an - * "instruction set," which is an abstraction for finding duplicate - * instructions using a hash set. Note that the question of whether an - * instruction is actually a duplicate (e.g. whether it has any side effects) - * is handled transparently. The user can pass any instruction to - * nir_instr_set_add_or_rewrite() and nir_instr_set_remove(), and if the - * instruction isn't safe to rewrite or isn't supported, it's silently - * removed. - */ - -/*@{*/ - -/** Creates an instruction set, using a given ralloc mem_ctx */ -struct set *nir_instr_set_create(void *mem_ctx); - -/** Destroys an instruction set. */ -void nir_instr_set_destroy(struct set *instr_set); - -/** - * Adds an instruction to an instruction set if it doesn't exist, or if it - * does already exist, rewrites all uses of it to point to the other - * already-inserted instruction. Returns 'true' if the uses of the instruction - * were rewritten. - */ -bool nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr); - -/** - * Removes an instruction from an instruction set, so that other instructions - * won't be merged with it. - */ -void nir_instr_set_remove(struct set *instr_set, nir_instr *instr); - -/*@}*/ - diff --git a/src/glsl/nir/nir_intrinsics.c b/src/glsl/nir/nir_intrinsics.c deleted file mode 100644 index a7c868c39af..00000000000 --- a/src/glsl/nir/nir_intrinsics.c +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" - -#define OPCODE(name) nir_intrinsic_##name - -#define INTRINSIC(_name, _num_srcs, _src_components, _has_dest, \ - _dest_components, _num_variables, _num_indices, _flags) \ -{ \ - .name = #_name, \ - .num_srcs = _num_srcs, \ - .src_components = _src_components, \ - .has_dest = _has_dest, \ - .dest_components = _dest_components, \ - .num_variables = _num_variables, \ - .num_indices = _num_indices, \ - .flags = _flags \ -}, - -#define LAST_INTRINSIC(name) - -const nir_intrinsic_info nir_intrinsic_infos[nir_num_intrinsics] = { -#include "nir_intrinsics.h" -}; \ No newline at end of file diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h deleted file mode 100644 index 62eead4878a..00000000000 --- a/src/glsl/nir/nir_intrinsics.h +++ /dev/null @@ -1,316 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -/** - * This header file defines all the available intrinsics in one place. It - * expands to a list of macros of the form: - * - * INTRINSIC(name, num_srcs, src_components, has_dest, dest_components, - * num_variables, num_indices, flags) - * - * Which should correspond one-to-one with the nir_intrinsic_info structure. It - * is included in both ir.h to create the nir_intrinsic enum (with members of - * the form nir_intrinsic_(name)) and and in opcodes.c to create - * nir_intrinsic_infos, which is a const array of nir_intrinsic_info structures - * for each intrinsic. - */ - -#define ARR(...) { __VA_ARGS__ } - - -INTRINSIC(load_var, 0, ARR(), true, 0, 1, 0, NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(store_var, 1, ARR(0), false, 0, 1, 1, 0) -INTRINSIC(copy_var, 0, ARR(), false, 0, 2, 0, 0) - -/* - * Interpolation of input. The interp_var_at* intrinsics are similar to the - * load_var intrinsic acting an a shader input except that they interpolate - * the input differently. The at_sample and at_offset intrinsics take an - * aditional source that is a integer sample id or a vec2 position offset - * respectively. - */ - -INTRINSIC(interp_var_at_centroid, 0, ARR(0), true, 0, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -INTRINSIC(interp_var_at_sample, 1, ARR(1), true, 0, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -INTRINSIC(interp_var_at_offset, 1, ARR(2), true, 0, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - -/* - * Ask the driver for the size of a given buffer. It takes the buffer index - * as source. - */ -INTRINSIC(get_buffer_size, 1, ARR(1), true, 1, 0, 0, - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - -/* - * a barrier is an intrinsic with no inputs/outputs but which can't be moved - * around/optimized in general - */ -#define BARRIER(name) INTRINSIC(name, 0, ARR(), false, 0, 0, 0, 0) - -BARRIER(barrier) -BARRIER(discard) - -/* - * Memory barrier with semantics analogous to the memoryBarrier() GLSL - * intrinsic. - */ -BARRIER(memory_barrier) - -/* - * Shader clock intrinsic with semantics analogous to the clock2x32ARB() - * GLSL intrinsic. - * The latter can be used as code motion barrier, which is currently not - * feasible with NIR. - */ -INTRINSIC(shader_clock, 0, ARR(), true, 1, 0, 0, NIR_INTRINSIC_CAN_ELIMINATE) - -/* - * Memory barrier with semantics analogous to the compute shader - * groupMemoryBarrier(), memoryBarrierAtomicCounter(), memoryBarrierBuffer(), - * memoryBarrierImage() and memoryBarrierShared() GLSL intrinsics. - */ -BARRIER(group_memory_barrier) -BARRIER(memory_barrier_atomic_counter) -BARRIER(memory_barrier_buffer) -BARRIER(memory_barrier_image) -BARRIER(memory_barrier_shared) - -/** A conditional discard, with a single boolean source. */ -INTRINSIC(discard_if, 1, ARR(1), false, 0, 0, 0, 0) - -/** - * Basic Geometry Shader intrinsics. - * - * emit_vertex implements GLSL's EmitStreamVertex() built-in. It takes a single - * index, which is the stream ID to write to. - * - * end_primitive implements GLSL's EndPrimitive() built-in. - */ -INTRINSIC(emit_vertex, 0, ARR(), false, 0, 0, 1, 0) -INTRINSIC(end_primitive, 0, ARR(), false, 0, 0, 1, 0) - -/** - * Geometry Shader intrinsics with a vertex count. - * - * Alternatively, drivers may implement these intrinsics, and use - * nir_lower_gs_intrinsics() to convert from the basic intrinsics. - * - * These maintain a count of the number of vertices emitted, as an additional - * unsigned integer source. - */ -INTRINSIC(emit_vertex_with_counter, 1, ARR(1), false, 0, 0, 1, 0) -INTRINSIC(end_primitive_with_counter, 1, ARR(1), false, 0, 0, 1, 0) -INTRINSIC(set_vertex_count, 1, ARR(1), false, 0, 0, 0, 0) - -/* - * Atomic counters - * - * The *_var variants take an atomic_uint nir_variable, while the other, - * lowered, variants take a constant buffer index and register offset. - */ - -#define ATOMIC(name, flags) \ - INTRINSIC(atomic_counter_##name##_var, 0, ARR(), true, 1, 1, 0, flags) \ - INTRINSIC(atomic_counter_##name, 1, ARR(1), true, 1, 0, 1, flags) - -ATOMIC(inc, 0) -ATOMIC(dec, 0) -ATOMIC(read, NIR_INTRINSIC_CAN_ELIMINATE) - -/* - * Image load, store and atomic intrinsics. - * - * All image intrinsics take an image target passed as a nir_variable. Image - * variables contain a number of memory and layout qualifiers that influence - * the semantics of the intrinsic. - * - * All image intrinsics take a four-coordinate vector and a sample index as - * first two sources, determining the location within the image that will be - * accessed by the intrinsic. Components not applicable to the image target - * in use are undefined. Image store takes an additional four-component - * argument with the value to be written, and image atomic operations take - * either one or two additional scalar arguments with the same meaning as in - * the ARB_shader_image_load_store specification. - */ -INTRINSIC(image_load, 2, ARR(4, 1), true, 4, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE) -INTRINSIC(image_store, 3, ARR(4, 1, 4), false, 0, 1, 0, 0) -INTRINSIC(image_atomic_add, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_min, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_max, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_and, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_or, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_xor, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_exchange, 3, ARR(4, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_atomic_comp_swap, 4, ARR(4, 1, 1, 1), true, 1, 1, 0, 0) -INTRINSIC(image_size, 0, ARR(), true, 4, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -INTRINSIC(image_samples, 0, ARR(), true, 1, 1, 0, - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - -/* - * SSBO atomic intrinsics - * - * All of the SSBO atomic memory operations read a value from memory, - * compute a new value using one of the operations below, write the new - * value to memory, and return the original value read. - * - * All operations take 3 sources except CompSwap that takes 4. These - * sources represent: - * - * 0: The SSBO buffer index. - * 1: The offset into the SSBO buffer of the variable that the atomic - * operation will operate on. - * 2: The data parameter to the atomic function (i.e. the value to add - * in ssbo_atomic_add, etc). - * 3: For CompSwap only: the second data parameter. - */ -INTRINSIC(ssbo_atomic_add, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_imin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_umin, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_imax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_umax, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_and, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_or, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_xor, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_exchange, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) -INTRINSIC(ssbo_atomic_comp_swap, 4, ARR(1, 1, 1, 1), true, 1, 0, 0, 0) - -/* - * CS shared variable atomic intrinsics - * - * All of the shared variable atomic memory operations read a value from - * memory, compute a new value using one of the operations below, write the - * new value to memory, and return the original value read. - * - * All operations take 2 sources except CompSwap that takes 3. These - * sources represent: - * - * 0: The offset into the shared variable storage region that the atomic - * operation will operate on. - * 1: The data parameter to the atomic function (i.e. the value to add - * in shared_atomic_add, etc). - * 2: For CompSwap only: the second data parameter. - */ -INTRINSIC(shared_atomic_add, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_imin, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_umin, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_imax, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_umax, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_and, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_or, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_xor, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_exchange, 2, ARR(1, 1), true, 1, 0, 0, 0) -INTRINSIC(shared_atomic_comp_swap, 3, ARR(1, 1, 1), true, 1, 0, 0, 0) - -#define SYSTEM_VALUE(name, components, num_indices) \ - INTRINSIC(load_##name, 0, ARR(), true, components, 0, num_indices, \ - NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - -SYSTEM_VALUE(front_face, 1, 0) -SYSTEM_VALUE(vertex_id, 1, 0) -SYSTEM_VALUE(vertex_id_zero_base, 1, 0) -SYSTEM_VALUE(base_vertex, 1, 0) -SYSTEM_VALUE(instance_id, 1, 0) -SYSTEM_VALUE(base_instance, 1, 0) -SYSTEM_VALUE(draw_id, 1, 0) -SYSTEM_VALUE(sample_id, 1, 0) -SYSTEM_VALUE(sample_pos, 2, 0) -SYSTEM_VALUE(sample_mask_in, 1, 0) -SYSTEM_VALUE(primitive_id, 1, 0) -SYSTEM_VALUE(invocation_id, 1, 0) -SYSTEM_VALUE(tess_coord, 3, 0) -SYSTEM_VALUE(tess_level_outer, 4, 0) -SYSTEM_VALUE(tess_level_inner, 2, 0) -SYSTEM_VALUE(patch_vertices_in, 1, 0) -SYSTEM_VALUE(local_invocation_id, 3, 0) -SYSTEM_VALUE(work_group_id, 3, 0) -SYSTEM_VALUE(user_clip_plane, 4, 1) /* const_index[0] is user_clip_plane[idx] */ -SYSTEM_VALUE(num_work_groups, 3, 0) -SYSTEM_VALUE(helper_invocation, 1, 0) - -/* - * Load operations pull data from some piece of GPU memory. All load - * operations operate in terms of offsets into some piece of theoretical - * memory. Loads from externally visible memory (UBO and SSBO) simply take a - * byte offset as a source. Loads from opaque memory (uniforms, inputs, etc.) - * take a base+offset pair where the base (const_index[0]) gives the location - * of the start of the variable being loaded and and the offset source is a - * offset into that variable. - * - * Some load operations such as UBO/SSBO load and per_vertex loads take an - * additional source to specify which UBO/SSBO/vertex to load from. - * - * The exact address type depends on the lowering pass that generates the - * load/store intrinsics. Typically, this is vec4 units for things such as - * varying slots and float units for fragment shader inputs. UBO and SSBO - * offsets are always in bytes. - */ - -#define LOAD(name, srcs, indices, flags) \ - INTRINSIC(load_##name, srcs, ARR(1, 1, 1, 1), true, 0, 0, indices, flags) - -/* src[] = { offset }. const_index[] = { base } */ -LOAD(uniform, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -/* src[] = { buffer_index, offset }. No const_index */ -LOAD(ubo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -/* src[] = { offset }. const_index[] = { base } */ -LOAD(input, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -/* src[] = { vertex, offset }. const_index[] = { base } */ -LOAD(per_vertex_input, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -/* src[] = { buffer_index, offset }. No const_index */ -LOAD(ssbo, 2, 0, NIR_INTRINSIC_CAN_ELIMINATE) -/* src[] = { offset }. const_index[] = { base } */ -LOAD(output, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) -/* src[] = { vertex, offset }. const_index[] = { base } */ -LOAD(per_vertex_output, 2, 1, NIR_INTRINSIC_CAN_ELIMINATE) -/* src[] = { offset }. const_index[] = { base } */ -LOAD(shared, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) - -/* - * Stores work the same way as loads, except now the first source is the value - * to store and the second (and possibly third) source specify where to store - * the value. SSBO and shared memory stores also have a write mask as - * const_index[0]. - */ - -#define STORE(name, srcs, indices, flags) \ - INTRINSIC(store_##name, srcs, ARR(0, 1, 1, 1), false, 0, 0, indices, flags) - -/* src[] = { value, offset }. const_index[] = { base, write_mask } */ -STORE(output, 2, 2, 0) -/* src[] = { value, vertex, offset }. const_index[] = { base, write_mask } */ -STORE(per_vertex_output, 3, 2, 0) -/* src[] = { value, block_index, offset }. const_index[] = { write_mask } */ -STORE(ssbo, 3, 1, 0) -/* src[] = { value, offset }. const_index[] = { base, write_mask } */ -STORE(shared, 2, 2, 0) - -LAST_INTRINSIC(store_shared) diff --git a/src/glsl/nir/nir_liveness.c b/src/glsl/nir/nir_liveness.c deleted file mode 100644 index 05f79d7bc61..00000000000 --- a/src/glsl/nir/nir_liveness.c +++ /dev/null @@ -1,297 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - */ - -#include "nir.h" -#include "nir_worklist.h" -#include "nir_vla.h" - -/* - * Basic liveness analysis. This works only in SSA form. - * - * This liveness pass treats phi nodes as being melded to the space between - * blocks so that the destinations of a phi are in the livein of the block - * in which it resides and the sources are in the liveout of the - * corresponding block. By formulating the liveness information in this - * way, we ensure that the definition of any variable dominates its entire - * live range. This is true because the only way that the definition of an - * SSA value may not dominate a use is if the use is in a phi node and the - * uses in phi no are in the live-out of the corresponding predecessor - * block but not in the live-in of the block containing the phi node. - */ - -struct live_ssa_defs_state { - unsigned num_ssa_defs; - unsigned bitset_words; - - nir_block_worklist worklist; -}; - -static bool -index_ssa_def(nir_ssa_def *def, void *void_state) -{ - struct live_ssa_defs_state *state = void_state; - - if (def->parent_instr->type == nir_instr_type_ssa_undef) - def->live_index = 0; - else - def->live_index = state->num_ssa_defs++; - - return true; -} - -static bool -index_ssa_definitions_block(nir_block *block, void *state) -{ - nir_foreach_instr(block, instr) - nir_foreach_ssa_def(instr, index_ssa_def, state); - - return true; -} - -/* Initialize the liveness data to zero and add the given block to the - * worklist. - */ -static bool -init_liveness_block(nir_block *block, void *void_state) -{ - struct live_ssa_defs_state *state = void_state; - - block->live_in = reralloc(block, block->live_in, BITSET_WORD, - state->bitset_words); - memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); - - block->live_out = reralloc(block, block->live_out, BITSET_WORD, - state->bitset_words); - memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD)); - - nir_block_worklist_push_head(&state->worklist, block); - - return true; -} - -static bool -set_src_live(nir_src *src, void *void_live) -{ - BITSET_WORD *live = void_live; - - if (!src->is_ssa) - return true; - - if (src->ssa->live_index == 0) - return true; /* undefined variables are never live */ - - BITSET_SET(live, src->ssa->live_index); - - return true; -} - -static bool -set_ssa_def_dead(nir_ssa_def *def, void *void_live) -{ - BITSET_WORD *live = void_live; - - BITSET_CLEAR(live, def->live_index); - - return true; -} - -/** Propagates the live in of succ across the edge to the live out of pred - * - * Phi nodes exist "between" blocks and all the phi nodes at the start of a - * block act "in parallel". When we propagate from the live_in of one - * block to the live out of the other, we have to kill any writes from phis - * and make live any sources. - * - * Returns true if updating live out of pred added anything - */ -static bool -propagate_across_edge(nir_block *pred, nir_block *succ, - struct live_ssa_defs_state *state) -{ - NIR_VLA(BITSET_WORD, live, state->bitset_words); - memcpy(live, succ->live_in, state->bitset_words * sizeof *live); - - nir_foreach_instr(succ, instr) { - if (instr->type != nir_instr_type_phi) - break; - nir_phi_instr *phi = nir_instr_as_phi(instr); - - assert(phi->dest.is_ssa); - set_ssa_def_dead(&phi->dest.ssa, live); - } - - nir_foreach_instr(succ, instr) { - if (instr->type != nir_instr_type_phi) - break; - nir_phi_instr *phi = nir_instr_as_phi(instr); - - nir_foreach_phi_src(phi, src) { - if (src->pred == pred) { - set_src_live(&src->src, live); - break; - } - } - } - - BITSET_WORD progress = 0; - for (unsigned i = 0; i < state->bitset_words; ++i) { - progress |= live[i] & ~pred->live_out[i]; - pred->live_out[i] |= live[i]; - } - return progress != 0; -} - -void -nir_live_ssa_defs_impl(nir_function_impl *impl) -{ - struct live_ssa_defs_state state; - - /* We start at 1 because we reserve the index value of 0 for ssa_undef - * instructions. Those are never live, so their liveness information - * can be compacted into a single bit. - */ - state.num_ssa_defs = 1; - nir_foreach_block(impl, index_ssa_definitions_block, &state); - - nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL); - - /* We now know how many unique ssa definitions we have and we can go - * ahead and allocate live_in and live_out sets and add all of the - * blocks to the worklist. - */ - state.bitset_words = BITSET_WORDS(state.num_ssa_defs); - nir_foreach_block(impl, init_liveness_block, &state); - - /* We're now ready to work through the worklist and update the liveness - * sets of each of the blocks. By the time we get to this point, every - * block in the function implementation has been pushed onto the - * worklist in reverse order. As long as we keep the worklist - * up-to-date as we go, everything will get covered. - */ - while (!nir_block_worklist_is_empty(&state.worklist)) { - /* We pop them off in the reverse order we pushed them on. This way - * the first walk of the instructions is backwards so we only walk - * once in the case of no control flow. - */ - nir_block *block = nir_block_worklist_pop_head(&state.worklist); - - memcpy(block->live_in, block->live_out, - state.bitset_words * sizeof(BITSET_WORD)); - - nir_if *following_if = nir_block_get_following_if(block); - if (following_if) - set_src_live(&following_if->condition, block->live_in); - - nir_foreach_instr_reverse(block, instr) { - /* Phi nodes are handled seperately so we want to skip them. Since - * we are going backwards and they are at the beginning, we can just - * break as soon as we see one. - */ - if (instr->type == nir_instr_type_phi) - break; - - nir_foreach_ssa_def(instr, set_ssa_def_dead, block->live_in); - nir_foreach_src(instr, set_src_live, block->live_in); - } - - /* Walk over all of the predecessors of the current block updating - * their live in with the live out of this one. If anything has - * changed, add the predecessor to the work list so that we ensure - * that the new information is used. - */ - struct set_entry *entry; - set_foreach(block->predecessors, entry) { - nir_block *pred = (nir_block *)entry->key; - if (propagate_across_edge(pred, block, &state)) - nir_block_worklist_push_tail(&state.worklist, pred); - } - } - - nir_block_worklist_fini(&state.worklist); -} - -static bool -src_does_not_use_def(nir_src *src, void *def) -{ - return !src->is_ssa || src->ssa != (nir_ssa_def *)def; -} - -static bool -search_for_use_after_instr(nir_instr *start, nir_ssa_def *def) -{ - /* Only look for a use strictly after the given instruction */ - struct exec_node *node = start->node.next; - while (!exec_node_is_tail_sentinel(node)) { - nir_instr *instr = exec_node_data(nir_instr, node, node); - if (!nir_foreach_src(instr, src_does_not_use_def, def)) - return true; - node = node->next; - } - return false; -} - -/* Returns true if def is live at instr assuming that def comes before - * instr in a pre DFS search of the dominance tree. - */ -static bool -nir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr) -{ - if (BITSET_TEST(instr->block->live_out, def->live_index)) { - /* Since def dominates instr, if def is in the liveout of the block, - * it's live at instr - */ - return true; - } else { - if (BITSET_TEST(instr->block->live_in, def->live_index) || - def->parent_instr->block == instr->block) { - /* In this case it is either live coming into instr's block or it - * is defined in the same block. In this case, we simply need to - * see if it is used after instr. - */ - return search_for_use_after_instr(instr, def); - } else { - return false; - } - } -} - -bool -nir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b) -{ - if (a->parent_instr == b->parent_instr) { - /* Two variables defined at the same time interfere assuming at - * least one isn't dead. - */ - return true; - } else if (a->live_index == 0 || b->live_index == 0) { - /* If either variable is an ssa_undef, then there's no interference */ - return false; - } else if (a->live_index < b->live_index) { - return nir_ssa_def_is_live_at(a, b->parent_instr); - } else { - return nir_ssa_def_is_live_at(b, a->parent_instr); - } -} diff --git a/src/glsl/nir/nir_lower_alu_to_scalar.c b/src/glsl/nir/nir_lower_alu_to_scalar.c deleted file mode 100644 index 0a27e66cf0f..00000000000 --- a/src/glsl/nir/nir_lower_alu_to_scalar.c +++ /dev/null @@ -1,210 +0,0 @@ -/* - * Copyright © 2014-2015 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "nir.h" -#include "nir_builder.h" - -/** @file nir_lower_alu_to_scalar.c - * - * Replaces nir_alu_instr operations with more than one channel used in the - * arguments with individual per-channel operations. - */ - -static void -nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components) -{ - nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL); - instr->dest.write_mask = (1 << num_components) - 1; -} - -static void -lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op, - nir_builder *builder) -{ - unsigned num_components = nir_op_infos[instr->op].input_sizes[0]; - - nir_ssa_def *last = NULL; - for (unsigned i = 0; i < num_components; i++) { - nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op); - nir_alu_ssa_dest_init(chan, 1); - nir_alu_src_copy(&chan->src[0], &instr->src[0], chan); - chan->src[0].swizzle[0] = chan->src[0].swizzle[i]; - if (nir_op_infos[chan_op].num_inputs > 1) { - assert(nir_op_infos[chan_op].num_inputs == 2); - nir_alu_src_copy(&chan->src[1], &instr->src[1], chan); - chan->src[1].swizzle[0] = chan->src[1].swizzle[i]; - } - - nir_builder_instr_insert(builder, &chan->instr); - - if (i == 0) { - last = &chan->dest.dest.ssa; - } else { - last = nir_build_alu(builder, merge_op, - last, &chan->dest.dest.ssa, NULL, NULL); - } - } - - assert(instr->dest.write_mask == 1); - nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(last)); - nir_instr_remove(&instr->instr); -} - -static void -lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b) -{ - unsigned num_src = nir_op_infos[instr->op].num_inputs; - unsigned i, chan; - - assert(instr->dest.dest.is_ssa); - assert(instr->dest.write_mask != 0); - - b->cursor = nir_before_instr(&instr->instr); - -#define LOWER_REDUCTION(name, chan, merge) \ - case name##2: \ - case name##3: \ - case name##4: \ - lower_reduction(instr, chan, merge, b); \ - return; - - switch (instr->op) { - case nir_op_vec4: - case nir_op_vec3: - case nir_op_vec2: - /* We don't need to scalarize these ops, they're the ones generated to - * group up outputs into a value that can be SSAed. - */ - return; - - case nir_op_unpack_unorm_4x8: - case nir_op_unpack_snorm_4x8: - case nir_op_unpack_unorm_2x16: - case nir_op_unpack_snorm_2x16: - /* There is no scalar version of these ops, unless we were to break it - * down to bitshifts and math (which is definitely not intended). - */ - return; - - case nir_op_unpack_half_2x16: - /* We could split this into unpack_half_2x16_split_[xy], but should - * we? - */ - return; - - case nir_op_fdph: { - nir_ssa_def *sum[4]; - for (unsigned i = 0; i < 3; i++) { - sum[i] = nir_fmul(b, nir_channel(b, instr->src[0].src.ssa, - instr->src[0].swizzle[i]), - nir_channel(b, instr->src[1].src.ssa, - instr->src[1].swizzle[i])); - } - sum[3] = nir_channel(b, instr->src[1].src.ssa, instr->src[1].swizzle[3]); - - nir_ssa_def *val = nir_fadd(b, nir_fadd(b, sum[0], sum[1]), - nir_fadd(b, sum[2], sum[3])); - - nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(val)); - nir_instr_remove(&instr->instr); - return; - } - - LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd); - LOWER_REDUCTION(nir_op_ball_fequal, nir_op_feq, nir_op_iand); - LOWER_REDUCTION(nir_op_ball_iequal, nir_op_ieq, nir_op_iand); - LOWER_REDUCTION(nir_op_bany_fnequal, nir_op_fne, nir_op_ior); - LOWER_REDUCTION(nir_op_bany_inequal, nir_op_ine, nir_op_ior); - LOWER_REDUCTION(nir_op_fall_equal, nir_op_seq, nir_op_fand); - LOWER_REDUCTION(nir_op_fany_nequal, nir_op_sne, nir_op_for); - - default: - break; - } - - if (instr->dest.dest.ssa.num_components == 1) - return; - - unsigned num_components = instr->dest.dest.ssa.num_components; - nir_ssa_def *comps[] = { NULL, NULL, NULL, NULL }; - - for (chan = 0; chan < 4; chan++) { - if (!(instr->dest.write_mask & (1 << chan))) - continue; - - nir_alu_instr *lower = nir_alu_instr_create(b->shader, instr->op); - for (i = 0; i < num_src; i++) { - /* We only handle same-size-as-dest (input_sizes[] == 0) or scalar - * args (input_sizes[] == 1). - */ - assert(nir_op_infos[instr->op].input_sizes[i] < 2); - unsigned src_chan = (nir_op_infos[instr->op].input_sizes[i] == 1 ? - 0 : chan); - - nir_alu_src_copy(&lower->src[i], &instr->src[i], lower); - for (int j = 0; j < 4; j++) - lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan]; - } - - nir_alu_ssa_dest_init(lower, 1); - lower->dest.saturate = instr->dest.saturate; - comps[chan] = &lower->dest.dest.ssa; - - nir_builder_instr_insert(b, &lower->instr); - } - - nir_ssa_def *vec = nir_vec(b, comps, num_components); - - nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, nir_src_for_ssa(vec)); - - nir_instr_remove(&instr->instr); -} - -static bool -lower_alu_to_scalar_block(nir_block *block, void *builder) -{ - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_alu) - lower_alu_instr_scalar(nir_instr_as_alu(instr), builder); - } - - return true; -} - -static void -nir_lower_alu_to_scalar_impl(nir_function_impl *impl) -{ - nir_builder builder; - nir_builder_init(&builder, impl); - - nir_foreach_block(impl, lower_alu_to_scalar_block, &builder); -} - -void -nir_lower_alu_to_scalar(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_lower_alu_to_scalar_impl(function->impl); - } -} diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c deleted file mode 100644 index 1aa78e18a85..00000000000 --- a/src/glsl/nir/nir_lower_atomics.c +++ /dev/null @@ -1,166 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "ir_uniform.h" -#include "nir.h" -#include "main/config.h" -#include - -typedef struct { - const struct gl_shader_program *shader_program; - nir_shader *shader; -} lower_atomic_state; - -/* - * replace atomic counter intrinsics that use a variable with intrinsics - * that directly store the buffer index and byte offset - */ - -static void -lower_instr(nir_intrinsic_instr *instr, - lower_atomic_state *state) -{ - nir_intrinsic_op op; - switch (instr->intrinsic) { - case nir_intrinsic_atomic_counter_read_var: - op = nir_intrinsic_atomic_counter_read; - break; - - case nir_intrinsic_atomic_counter_inc_var: - op = nir_intrinsic_atomic_counter_inc; - break; - - case nir_intrinsic_atomic_counter_dec_var: - op = nir_intrinsic_atomic_counter_dec; - break; - - default: - return; - } - - if (instr->variables[0]->var->data.mode != nir_var_uniform && - instr->variables[0]->var->data.mode != nir_var_shader_storage) - return; /* atomics passed as function arguments can't be lowered */ - - void *mem_ctx = ralloc_parent(instr); - unsigned uniform_loc = instr->variables[0]->var->data.location; - - nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op); - new_instr->const_index[0] = - state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index; - - nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1); - offset_const->value.u[0] = instr->variables[0]->var->data.offset; - - nir_instr_insert_before(&instr->instr, &offset_const->instr); - - nir_ssa_def *offset_def = &offset_const->def; - - nir_deref *tail = &instr->variables[0]->deref; - while (tail->child != NULL) { - assert(tail->child->deref_type == nir_deref_type_array); - nir_deref_array *deref_array = nir_deref_as_array(tail->child); - tail = tail->child; - - unsigned child_array_elements = tail->child != NULL ? - glsl_get_aoa_size(tail->type) : 1; - - offset_const->value.u[0] += deref_array->base_offset * - child_array_elements * ATOMIC_COUNTER_SIZE; - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - nir_load_const_instr *atomic_counter_size = - nir_load_const_instr_create(mem_ctx, 1); - atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE; - nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr); - - nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul); - nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); - mul->dest.write_mask = 0x1; - nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul); - mul->src[1].src.is_ssa = true; - mul->src[1].src.ssa = &atomic_counter_size->def; - nir_instr_insert_before(&instr->instr, &mul->instr); - - nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd); - nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); - add->dest.write_mask = 0x1; - add->src[0].src.is_ssa = true; - add->src[0].src.ssa = &mul->dest.dest.ssa; - add->src[1].src.is_ssa = true; - add->src[1].src.ssa = offset_def; - nir_instr_insert_before(&instr->instr, &add->instr); - - offset_def = &add->dest.dest.ssa; - } - } - - new_instr->src[0].is_ssa = true; - new_instr->src[0].ssa = offset_def; - - if (instr->dest.is_ssa) { - nir_ssa_dest_init(&new_instr->instr, &new_instr->dest, - instr->dest.ssa.num_components, NULL); - nir_ssa_def_rewrite_uses(&instr->dest.ssa, - nir_src_for_ssa(&new_instr->dest.ssa)); - } else { - nir_dest_copy(&new_instr->dest, &instr->dest, mem_ctx); - } - - nir_instr_insert_before(&instr->instr, &new_instr->instr); - nir_instr_remove(&instr->instr); -} - -static bool -lower_block(nir_block *block, void *state) -{ - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_intrinsic) - lower_instr(nir_instr_as_intrinsic(instr), - (lower_atomic_state *) state); - } - - return true; -} - -void -nir_lower_atomics(nir_shader *shader, - const struct gl_shader_program *shader_program) -{ - lower_atomic_state state = { - .shader = shader, - .shader_program = shader_program, - }; - - nir_foreach_function(shader, function) { - if (function->impl) { - nir_foreach_block(function->impl, lower_block, (void *) &state); - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance); - } - } -} diff --git a/src/glsl/nir/nir_lower_clip.c b/src/glsl/nir/nir_lower_clip.c deleted file mode 100644 index 0ca6a289396..00000000000 --- a/src/glsl/nir/nir_lower_clip.c +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Copyright © 2015 Red Hat - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "nir.h" -#include "nir_builder.h" - -#define MAX_CLIP_PLANES 8 - -/* Generates the lowering code for user-clip-planes, generating CLIPDIST - * from UCP[n] + CLIPVERTEX or POSITION. Additionally, an optional pass - * for fragment shaders to insert conditional kill's based on the inter- - * polated CLIPDIST - * - * NOTE: should be run after nir_lower_outputs_to_temporaries() (or at - * least in scenarios where you can count on each output written once - * and only once). - */ - - -static nir_variable * -create_clipdist_var(nir_shader *shader, unsigned drvloc, - bool output, gl_varying_slot slot) -{ - nir_variable *var = rzalloc(shader, nir_variable); - - var->data.driver_location = drvloc; - var->type = glsl_vec4_type(); - var->data.mode = output ? nir_var_shader_out : nir_var_shader_in; - var->name = ralloc_asprintf(var, "clipdist_%d", drvloc); - var->data.index = 0; - var->data.location = slot; - - if (output) { - exec_list_push_tail(&shader->outputs, &var->node); - shader->num_outputs++; /* TODO use type_size() */ - } - else { - exec_list_push_tail(&shader->inputs, &var->node); - shader->num_inputs++; /* TODO use type_size() */ - } - return var; -} - -static void -store_clipdist_output(nir_builder *b, nir_variable *out, nir_ssa_def **val) -{ - nir_intrinsic_instr *store; - - store = nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_output); - store->num_components = 4; - store->const_index[0] = out->data.driver_location; - store->const_index[1] = 0xf; /* wrmask */ - store->src[0].ssa = nir_vec4(b, val[0], val[1], val[2], val[3]); - store->src[0].is_ssa = true; - store->src[1] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_builder_instr_insert(b, &store->instr); -} - -static void -load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val) -{ - nir_intrinsic_instr *load; - - load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); - load->num_components = 4; - load->const_index[0] = in->data.driver_location; - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); - nir_builder_instr_insert(b, &load->instr); - - val[0] = nir_channel(b, &load->dest.ssa, 0); - val[1] = nir_channel(b, &load->dest.ssa, 1); - val[2] = nir_channel(b, &load->dest.ssa, 2); - val[3] = nir_channel(b, &load->dest.ssa, 3); -} - -struct find_output_state -{ - unsigned drvloc; - nir_ssa_def *def; -}; - -static bool -find_output_in_block(nir_block *block, void *void_state) -{ - struct find_output_state *state = void_state; - nir_foreach_instr(block, instr) { - - if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - if ((intr->intrinsic == nir_intrinsic_store_output) && - intr->const_index[0] == state->drvloc) { - assert(state->def == NULL); - assert(intr->src[0].is_ssa); - assert(nir_src_as_const_value(intr->src[1])); - state->def = intr->src[0].ssa; - -#if !defined(DEBUG) - /* for debug builds, scan entire shader to assert - * if output is written multiple times. For release - * builds just assume all is well and bail when we - * find first: - */ - return false; -#endif - } - } - } - - return true; -} - -/* TODO: maybe this would be a useful helper? - * NOTE: assumes each output is written exactly once (and unconditionally) - * so if needed nir_lower_outputs_to_temporaries() - */ -static nir_ssa_def * -find_output(nir_shader *shader, unsigned drvloc) -{ - struct find_output_state state = { - .drvloc = drvloc, - }; - - nir_foreach_function(shader, function) { - if (function->impl) { - nir_foreach_block_reverse(function->impl, - find_output_in_block, &state); - } - } - - return state.def; -} - -/* - * VS lowering - */ - -static void -lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables, - nir_ssa_def *cv, nir_variable **out) -{ - nir_ssa_def *clipdist[MAX_CLIP_PLANES]; - nir_builder b; - - nir_builder_init(&b, impl); - - /* NIR should ensure that, even in case of loops/if-else, there - * should be only a single predecessor block to end_block, which - * makes the perfect place to insert the clipdist calculations. - * - * NOTE: in case of early return's, these would have to be lowered - * to jumps to end_block predecessor in a previous pass. Not sure - * if there is a good way to sanity check this, but for now the - * users of this pass don't support sub-routines. - */ - assert(impl->end_block->predecessors->entries == 1); - b.cursor = nir_after_cf_list(&impl->body); - - for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { - if (ucp_enables & (1 << plane)) { - nir_ssa_def *ucp = - nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, plane); - - /* calculate clipdist[plane] - dot(ucp, cv): */ - clipdist[plane] = nir_fdot4(&b, ucp, cv); - } - else { - /* 0.0 == don't-clip == disabled: */ - clipdist[plane] = nir_imm_float(&b, 0.0); - } - } - - if (ucp_enables & 0x0f) - store_clipdist_output(&b, out[0], &clipdist[0]); - if (ucp_enables & 0xf0) - store_clipdist_output(&b, out[1], &clipdist[4]); - - nir_metadata_preserve(impl, nir_metadata_dominance); -} - -/* ucp_enables is bitmask of enabled ucp's. Actual ucp values are - * passed in to shader via user_clip_plane system-values - */ -void -nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables) -{ - int clipvertex = -1; - int position = -1; - int maxloc = -1; - nir_ssa_def *cv; - nir_variable *out[2] = { NULL }; - - if (!ucp_enables) - return; - - /* find clipvertex/position outputs: */ - nir_foreach_variable(var, &shader->outputs) { - int loc = var->data.driver_location; - - /* keep track of last used driver-location.. we'll be - * appending CLIP_DIST0/CLIP_DIST1 after last existing - * output: - */ - maxloc = MAX2(maxloc, loc); - - switch (var->data.location) { - case VARYING_SLOT_POS: - position = loc; - break; - case VARYING_SLOT_CLIP_VERTEX: - clipvertex = loc; - break; - case VARYING_SLOT_CLIP_DIST0: - case VARYING_SLOT_CLIP_DIST1: - /* if shader is already writing CLIPDIST, then - * there should be no user-clip-planes to deal - * with. - */ - return; - } - } - - if (clipvertex != -1) - cv = find_output(shader, clipvertex); - else if (position != -1) - cv = find_output(shader, position); - else - return; - - /* insert CLIPDIST outputs: */ - if (ucp_enables & 0x0f) - out[0] = - create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST0); - if (ucp_enables & 0xf0) - out[1] = - create_clipdist_var(shader, ++maxloc, true, VARYING_SLOT_CLIP_DIST1); - - nir_foreach_function(shader, function) { - if (!strcmp(function->name, "main")) - lower_clip_vs(function->impl, ucp_enables, cv, out); - } -} - -/* - * FS lowering - */ - -static void -lower_clip_fs(nir_function_impl *impl, unsigned ucp_enables, - nir_variable **in) -{ - nir_ssa_def *clipdist[MAX_CLIP_PLANES]; - nir_builder b; - - nir_builder_init(&b, impl); - b.cursor = nir_before_cf_list(&impl->body); - - if (ucp_enables & 0x0f) - load_clipdist_input(&b, in[0], &clipdist[0]); - if (ucp_enables & 0xf0) - load_clipdist_input(&b, in[1], &clipdist[4]); - - for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) { - if (ucp_enables & (1 << plane)) { - nir_intrinsic_instr *discard; - nir_ssa_def *cond; - - cond = nir_flt(&b, clipdist[plane], nir_imm_float(&b, 0.0)); - - discard = nir_intrinsic_instr_create(b.shader, - nir_intrinsic_discard_if); - discard->src[0] = nir_src_for_ssa(cond); - nir_builder_instr_insert(&b, &discard->instr); - } - } -} - -/* insert conditional kill based on interpolated CLIPDIST - */ -void -nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables) -{ - nir_variable *in[2]; - int maxloc = -1; - - if (!ucp_enables) - return; - - nir_foreach_variable(var, &shader->inputs) { - int loc = var->data.driver_location; - - /* keep track of last used driver-location.. we'll be - * appending CLIP_DIST0/CLIP_DIST1 after last existing - * input: - */ - maxloc = MAX2(maxloc, loc); - } - - /* The shader won't normally have CLIPDIST inputs, so we - * must add our own: - */ - /* insert CLIPDIST outputs: */ - if (ucp_enables & 0x0f) - in[0] = - create_clipdist_var(shader, ++maxloc, false, - VARYING_SLOT_CLIP_DIST0); - if (ucp_enables & 0xf0) - in[1] = - create_clipdist_var(shader, ++maxloc, false, - VARYING_SLOT_CLIP_DIST1); - - nir_foreach_function(shader, function) { - if (!strcmp(function->name, "main")) - lower_clip_fs(function->impl, ucp_enables, in); - } -} diff --git a/src/glsl/nir/nir_lower_global_vars_to_local.c b/src/glsl/nir/nir_lower_global_vars_to_local.c deleted file mode 100644 index 7b4cd4ee8dc..00000000000 --- a/src/glsl/nir/nir_lower_global_vars_to_local.c +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -/* - * This lowering pass detects when a global variable is only being used by - * one function and makes it local to that function - */ - -#include "nir.h" - -struct global_to_local_state { - nir_function_impl *impl; - /* A hash table keyed on variable pointers that stores the unique - * nir_function_impl that uses the given variable. If a variable is - * used in multiple functions, the data for the given key will be NULL. - */ - struct hash_table *var_func_table; -}; - -static bool -mark_global_var_uses_block(nir_block *block, void *void_state) -{ - struct global_to_local_state *state = void_state; - - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - unsigned num_vars = nir_intrinsic_infos[intrin->intrinsic].num_variables; - - for (unsigned i = 0; i < num_vars; i++) { - nir_variable *var = intrin->variables[i]->var; - if (var->data.mode != nir_var_global) - continue; - - struct hash_entry *entry = - _mesa_hash_table_search(state->var_func_table, var); - - if (entry) { - if (entry->data != state->impl) - entry->data = NULL; - } else { - _mesa_hash_table_insert(state->var_func_table, var, state->impl); - } - } - } - - return true; -} - -bool -nir_lower_global_vars_to_local(nir_shader *shader) -{ - struct global_to_local_state state; - bool progress = false; - - state.var_func_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - nir_foreach_function(shader, function) { - if (function->impl) { - state.impl = function->impl; - nir_foreach_block(function->impl, mark_global_var_uses_block, &state); - } - } - - struct hash_entry *entry; - hash_table_foreach(state.var_func_table, entry) { - nir_variable *var = (void *)entry->key; - nir_function_impl *impl = entry->data; - - assert(var->data.mode == nir_var_global); - - if (impl != NULL) { - exec_node_remove(&var->node); - var->data.mode = nir_var_local; - exec_list_push_tail(&impl->locals, &var->node); - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance | - nir_metadata_live_ssa_defs); - progress = true; - } - } - - _mesa_hash_table_destroy(state.var_func_table, NULL); - - return progress; -} diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c b/src/glsl/nir/nir_lower_gs_intrinsics.c deleted file mode 100644 index fdff1656b4d..00000000000 --- a/src/glsl/nir/nir_lower_gs_intrinsics.c +++ /dev/null @@ -1,219 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "nir.h" -#include "nir_builder.h" - -/** - * \file nir_lower_gs_intrinsics.c - * - * Geometry Shaders can call EmitVertex()/EmitStreamVertex() to output an - * arbitrary number of vertices. However, the shader must declare the maximum - * number of vertices that it will ever output - further attempts to emit - * vertices result in undefined behavior according to the GLSL specification. - * - * Drivers might use this maximum number of vertices to allocate enough space - * to hold the geometry shader's output. Some drivers (such as i965) need to - * implement "safety checks" which ensure that the shader hasn't emitted too - * many vertices, to avoid overflowing that space and trashing other memory. - * - * The count of emitted vertices can also be useful in buffer offset - * calculations, so drivers know where to write the GS output. - * - * However, for simple geometry shaders that emit a statically determinable - * number of vertices, this extra bookkeeping is unnecessary and inefficient. - * By tracking the vertex count in NIR, we allow constant folding/propagation - * and dead control flow optimizations to eliminate most of it where possible. - * - * This pass introduces a new global variable which stores the current vertex - * count (initialized to 0), and converts emit_vertex/end_primitive intrinsics - * to their *_with_counter variants. emit_vertex is also wrapped in a safety - * check to avoid buffer overflows. Finally, it adds a set_vertex_count - * intrinsic at the end of the program, informing the driver of the final - * vertex count. - */ - -struct state { - nir_builder *builder; - nir_variable *vertex_count_var; - bool progress; -}; - -/** - * Replace emit_vertex intrinsics with: - * - * if (vertex_count < max_vertices) { - * emit_vertex_with_counter vertex_count ... - * vertex_count += 1 - * } - */ -static void -rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state) -{ - nir_builder *b = state->builder; - - /* Load the vertex count */ - b->cursor = nir_before_instr(&intrin->instr); - nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); - - nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->info.gs.vertices_out); - - /* Create: if (vertex_count < max_vertices) and insert it. - * - * The new if statement needs to be hooked up to the control flow graph - * before we start inserting instructions into it. - */ - nir_if *if_stmt = nir_if_create(b->shader); - if_stmt->condition = nir_src_for_ssa(nir_ilt(b, count, max_vertices)); - nir_builder_cf_insert(b, &if_stmt->cf_node); - - /* Fill out the new then-block */ - b->cursor = nir_after_cf_list(&if_stmt->then_list); - - nir_intrinsic_instr *lowered = - nir_intrinsic_instr_create(b->shader, - nir_intrinsic_emit_vertex_with_counter); - lowered->const_index[0] = intrin->const_index[0]; - lowered->src[0] = nir_src_for_ssa(count); - nir_builder_instr_insert(b, &lowered->instr); - - /* Increment the vertex count by 1 */ - nir_store_var(b, state->vertex_count_var, - nir_iadd(b, count, nir_imm_int(b, 1)), - 0x1); /* .x */ - - nir_instr_remove(&intrin->instr); - - state->progress = true; -} - -/** - * Replace end_primitive with end_primitive_with_counter. - */ -static void -rewrite_end_primitive(nir_intrinsic_instr *intrin, struct state *state) -{ - nir_builder *b = state->builder; - - b->cursor = nir_before_instr(&intrin->instr); - nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); - - nir_intrinsic_instr *lowered = - nir_intrinsic_instr_create(b->shader, - nir_intrinsic_end_primitive_with_counter); - lowered->const_index[0] = intrin->const_index[0]; - lowered->src[0] = nir_src_for_ssa(count); - nir_builder_instr_insert(b, &lowered->instr); - - nir_instr_remove(&intrin->instr); - - state->progress = true; -} - -static bool -rewrite_intrinsics(nir_block *block, void *closure) -{ - struct state *state = closure; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - switch (intrin->intrinsic) { - case nir_intrinsic_emit_vertex: - rewrite_emit_vertex(intrin, state); - break; - case nir_intrinsic_end_primitive: - rewrite_end_primitive(intrin, state); - break; - default: - /* not interesting; skip this */ - break; - } - } - - return true; -} - -/** - * Add a set_vertex_count intrinsic at the end of the program - * (representing the final vertex count). - */ -static void -append_set_vertex_count(nir_block *end_block, struct state *state) -{ - nir_builder *b = state->builder; - nir_shader *shader = state->builder->shader; - - /* Insert the new intrinsic in all of the predecessors of the end block, - * but before any jump instructions (return). - */ - struct set_entry *entry; - set_foreach(end_block->predecessors, entry) { - nir_block *pred = (nir_block *) entry->key; - b->cursor = nir_after_block_before_jump(pred); - - nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); - - nir_intrinsic_instr *set_vertex_count = - nir_intrinsic_instr_create(shader, nir_intrinsic_set_vertex_count); - set_vertex_count->src[0] = nir_src_for_ssa(count); - - nir_builder_instr_insert(b, &set_vertex_count->instr); - } -} - -bool -nir_lower_gs_intrinsics(nir_shader *shader) -{ - struct state state; - state.progress = false; - - /* Create the counter variable */ - nir_variable *var = rzalloc(shader, nir_variable); - var->data.mode = nir_var_global; - var->type = glsl_uint_type(); - var->name = "vertex_count"; - var->constant_initializer = rzalloc(shader, nir_constant); /* initialize to 0 */ - - exec_list_push_tail(&shader->globals, &var->node); - state.vertex_count_var = var; - - nir_foreach_function(shader, function) { - if (function->impl) { - nir_builder b; - nir_builder_init(&b, function->impl); - state.builder = &b; - - nir_foreach_block(function->impl, rewrite_intrinsics, &state); - - /* This only works because we have a single main() function. */ - append_set_vertex_count(function->impl->end_block, &state); - - nir_metadata_preserve(function->impl, 0); - } - } - - return state.progress; -} diff --git a/src/glsl/nir/nir_lower_idiv.c b/src/glsl/nir/nir_lower_idiv.c deleted file mode 100644 index a084ad9c0e5..00000000000 --- a/src/glsl/nir/nir_lower_idiv.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Copyright © 2015 Red Hat - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "nir.h" -#include "nir_builder.h" - -/* Lowers idiv/udiv/umod - * Based on NV50LegalizeSSA::handleDIV() - * - * Note that this is probably not enough precision for compute shaders. - * Perhaps we want a second higher precision (looping) version of this? - * Or perhaps we assume if you can do compute shaders you can also - * branch out to a pre-optimized shader library routine.. - */ - -static void -convert_instr(nir_builder *bld, nir_alu_instr *alu) -{ - nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r; - nir_op op = alu->op; - bool is_signed; - - if ((op != nir_op_idiv) && - (op != nir_op_udiv) && - (op != nir_op_umod)) - return; - - is_signed = (op == nir_op_idiv); - - bld->cursor = nir_before_instr(&alu->instr); - - numer = nir_ssa_for_alu_src(bld, alu, 0); - denom = nir_ssa_for_alu_src(bld, alu, 1); - - if (is_signed) { - af = nir_i2f(bld, numer); - bf = nir_i2f(bld, denom); - af = nir_fabs(bld, af); - bf = nir_fabs(bld, bf); - a = nir_iabs(bld, numer); - b = nir_iabs(bld, denom); - } else { - af = nir_u2f(bld, numer); - bf = nir_u2f(bld, denom); - a = numer; - b = denom; - } - - /* get first result: */ - bf = nir_frcp(bld, bf); - bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */ - q = nir_fmul(bld, af, bf); - - if (is_signed) { - q = nir_f2i(bld, q); - } else { - q = nir_f2u(bld, q); - } - - /* get error of first result: */ - r = nir_imul(bld, q, b); - r = nir_isub(bld, a, r); - r = nir_u2f(bld, r); - r = nir_fmul(bld, r, bf); - r = nir_f2u(bld, r); - - /* add quotients: */ - q = nir_iadd(bld, q, r); - - /* correction: if modulus >= divisor, add 1 */ - r = nir_imul(bld, q, b); - r = nir_isub(bld, a, r); - - r = nir_uge(bld, r, b); - r = nir_b2i(bld, r); - - q = nir_iadd(bld, q, r); - if (is_signed) { - /* fix the sign: */ - r = nir_ixor(bld, numer, denom); - r = nir_ushr(bld, r, nir_imm_int(bld, 31)); - r = nir_i2b(bld, r); - b = nir_ineg(bld, q); - q = nir_bcsel(bld, r, b, q); - } - - if (op == nir_op_umod) { - /* division result in q */ - r = nir_imul(bld, q, b); - q = nir_isub(bld, a, r); - } - - assert(alu->dest.dest.is_ssa); - nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q)); -} - -static bool -convert_block(nir_block *block, void *state) -{ - nir_builder *b = state; - - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_alu) - convert_instr(b, nir_instr_as_alu(instr)); - } - - return true; -} - -static void -convert_impl(nir_function_impl *impl) -{ - nir_builder b; - nir_builder_init(&b, impl); - - nir_foreach_block(impl, convert_block, &b); - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); -} - -void -nir_lower_idiv(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - convert_impl(function->impl); - } -} diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c deleted file mode 100644 index 80c5151f0ea..00000000000 --- a/src/glsl/nir/nir_lower_io.c +++ /dev/null @@ -1,350 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -/* - * This lowering pass converts references to input/output variables with - * loads/stores to actual input/output intrinsics. - */ - -#include "nir.h" -#include "nir_builder.h" - -struct lower_io_state { - nir_builder builder; - void *mem_ctx; - int (*type_size)(const struct glsl_type *type); - nir_variable_mode mode; -}; - -void -nir_assign_var_locations(struct exec_list *var_list, unsigned *size, - int (*type_size)(const struct glsl_type *)) -{ - unsigned location = 0; - - nir_foreach_variable(var, var_list) { - /* - * UBO's have their own address spaces, so don't count them towards the - * number of global uniforms - */ - if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) && - var->interface_type != NULL) - continue; - - var->data.driver_location = location; - location += type_size(var->type); - } - - *size = location; -} - -/** - * Returns true if we're processing a stage whose inputs are arrays indexed - * by a vertex number (such as geometry shader inputs). - */ -static bool -is_per_vertex_input(struct lower_io_state *state, nir_variable *var) -{ - gl_shader_stage stage = state->builder.shader->stage; - - return var->data.mode == nir_var_shader_in && !var->data.patch && - (stage == MESA_SHADER_TESS_CTRL || - stage == MESA_SHADER_TESS_EVAL || - stage == MESA_SHADER_GEOMETRY); -} - -static bool -is_per_vertex_output(struct lower_io_state *state, nir_variable *var) -{ - gl_shader_stage stage = state->builder.shader->stage; - return var->data.mode == nir_var_shader_out && !var->data.patch && - stage == MESA_SHADER_TESS_CTRL; -} - -static nir_ssa_def * -get_io_offset(nir_builder *b, nir_deref_var *deref, - nir_ssa_def **vertex_index, - int (*type_size)(const struct glsl_type *)) -{ - nir_deref *tail = &deref->deref; - - /* For per-vertex input arrays (i.e. geometry shader inputs), keep the - * outermost array index separate. Process the rest normally. - */ - if (vertex_index != NULL) { - tail = tail->child; - assert(tail->deref_type == nir_deref_type_array); - nir_deref_array *deref_array = nir_deref_as_array(tail); - - nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset); - if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1)); - } - *vertex_index = vtx; - } - - /* Just emit code and let constant-folding go to town */ - nir_ssa_def *offset = nir_imm_int(b, 0); - - while (tail->child != NULL) { - const struct glsl_type *parent_type = tail->type; - tail = tail->child; - - if (tail->deref_type == nir_deref_type_array) { - nir_deref_array *deref_array = nir_deref_as_array(tail); - unsigned size = type_size(tail->type); - - offset = nir_iadd(b, offset, - nir_imm_int(b, size * deref_array->base_offset)); - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - nir_ssa_def *mul = - nir_imul(b, nir_imm_int(b, size), - nir_ssa_for_src(b, deref_array->indirect, 1)); - - offset = nir_iadd(b, offset, mul); - } - } else if (tail->deref_type == nir_deref_type_struct) { - nir_deref_struct *deref_struct = nir_deref_as_struct(tail); - - unsigned field_offset = 0; - for (unsigned i = 0; i < deref_struct->index; i++) { - field_offset += type_size(glsl_get_struct_field(parent_type, i)); - } - offset = nir_iadd(b, offset, nir_imm_int(b, field_offset)); - } - } - - return offset; -} - -static nir_intrinsic_op -load_op(struct lower_io_state *state, - nir_variable_mode mode, bool per_vertex) -{ - nir_intrinsic_op op; - switch (mode) { - case nir_var_shader_in: - op = per_vertex ? nir_intrinsic_load_per_vertex_input : - nir_intrinsic_load_input; - break; - case nir_var_shader_out: - op = per_vertex ? nir_intrinsic_load_per_vertex_output : - nir_intrinsic_load_output; - break; - case nir_var_uniform: - op = nir_intrinsic_load_uniform; - break; - default: - unreachable("Unknown variable mode"); - } - return op; -} - -static bool -nir_lower_io_block(nir_block *block, void *void_state) -{ - struct lower_io_state *state = void_state; - - nir_builder *b = &state->builder; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - if (intrin->intrinsic != nir_intrinsic_load_var && - intrin->intrinsic != nir_intrinsic_store_var) - continue; - - nir_variable_mode mode = intrin->variables[0]->var->data.mode; - - if (state->mode != nir_var_all && state->mode != mode) - continue; - - if (mode != nir_var_shader_in && - mode != nir_var_shader_out && - mode != nir_var_uniform) - continue; - - b->cursor = nir_before_instr(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: { - bool per_vertex = - is_per_vertex_input(state, intrin->variables[0]->var) || - is_per_vertex_output(state, intrin->variables[0]->var); - - nir_ssa_def *offset; - nir_ssa_def *vertex_index; - - offset = get_io_offset(b, intrin->variables[0], - per_vertex ? &vertex_index : NULL, - state->type_size); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(state->mem_ctx, - load_op(state, mode, per_vertex)); - load->num_components = intrin->num_components; - - load->const_index[0] = - intrin->variables[0]->var->data.driver_location; - - if (per_vertex) - load->src[0] = nir_src_for_ssa(vertex_index); - - load->src[per_vertex ? 1 : 0] = nir_src_for_ssa(offset); - - if (intrin->dest.is_ssa) { - nir_ssa_dest_init(&load->instr, &load->dest, - intrin->num_components, NULL); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&load->dest.ssa)); - } else { - nir_dest_copy(&load->dest, &intrin->dest, state->mem_ctx); - } - - nir_instr_insert_before(&intrin->instr, &load->instr); - nir_instr_remove(&intrin->instr); - break; - } - - case nir_intrinsic_store_var: { - assert(mode == nir_var_shader_out); - - nir_ssa_def *offset; - nir_ssa_def *vertex_index; - - bool per_vertex = - is_per_vertex_output(state, intrin->variables[0]->var); - - offset = get_io_offset(b, intrin->variables[0], - per_vertex ? &vertex_index : NULL, - state->type_size); - - nir_intrinsic_op store_op = - per_vertex ? nir_intrinsic_store_per_vertex_output : - nir_intrinsic_store_output; - - nir_intrinsic_instr *store = nir_intrinsic_instr_create(state->mem_ctx, - store_op); - store->num_components = intrin->num_components; - - nir_src_copy(&store->src[0], &intrin->src[0], store); - - store->const_index[0] = - intrin->variables[0]->var->data.driver_location; - - /* Copy the writemask */ - store->const_index[1] = intrin->const_index[0]; - - if (per_vertex) - store->src[1] = nir_src_for_ssa(vertex_index); - - store->src[per_vertex ? 2 : 1] = nir_src_for_ssa(offset); - - nir_instr_insert_before(&intrin->instr, &store->instr); - nir_instr_remove(&intrin->instr); - break; - } - - default: - break; - } - } - - return true; -} - -static void -nir_lower_io_impl(nir_function_impl *impl, - nir_variable_mode mode, - int (*type_size)(const struct glsl_type *)) -{ - struct lower_io_state state; - - nir_builder_init(&state.builder, impl); - state.mem_ctx = ralloc_parent(impl); - state.mode = mode; - state.type_size = type_size; - - nir_foreach_block(impl, nir_lower_io_block, &state); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); -} - -void -nir_lower_io(nir_shader *shader, nir_variable_mode mode, - int (*type_size)(const struct glsl_type *)) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_lower_io_impl(function->impl, mode, type_size); - } -} - -/** - * Return the offset soruce for a load/store intrinsic. - */ -nir_src * -nir_get_io_offset_src(nir_intrinsic_instr *instr) -{ - switch (instr->intrinsic) { - case nir_intrinsic_load_input: - case nir_intrinsic_load_output: - case nir_intrinsic_load_uniform: - return &instr->src[0]; - case nir_intrinsic_load_per_vertex_input: - case nir_intrinsic_load_per_vertex_output: - case nir_intrinsic_store_output: - return &instr->src[1]; - case nir_intrinsic_store_per_vertex_output: - return &instr->src[2]; - default: - return NULL; - } -} - -/** - * Return the vertex index source for a load/store per_vertex intrinsic. - */ -nir_src * -nir_get_io_vertex_index_src(nir_intrinsic_instr *instr) -{ - switch (instr->intrinsic) { - case nir_intrinsic_load_per_vertex_input: - case nir_intrinsic_load_per_vertex_output: - return &instr->src[0]; - case nir_intrinsic_store_per_vertex_output: - return &instr->src[1]; - default: - return NULL; - } -} diff --git a/src/glsl/nir/nir_lower_load_const_to_scalar.c b/src/glsl/nir/nir_lower_load_const_to_scalar.c deleted file mode 100644 index 1eeed13cbac..00000000000 --- a/src/glsl/nir/nir_lower_load_const_to_scalar.c +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright © 2015 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "util/macros.h" -#include "nir.h" -#include "nir_builder.h" - -/** @file nir_lower_load_const_to_scalar.c - * - * Replaces vector nir_load_const instructions with a series of loads and a - * vec[234] to reconstruct the original vector (on the assumption that - * nir_lower_alu_to_scalar() will then be used to split it up). - * - * This gives NIR a chance to CSE more operations on a scalar shader, when the - * same value was used in different vector contant loads. - */ - -static void -lower_load_const_instr_scalar(nir_load_const_instr *lower) -{ - if (lower->def.num_components == 1) - return; - - nir_builder b; - nir_builder_init(&b, nir_cf_node_get_function(&lower->instr.block->cf_node)); - b.cursor = nir_before_instr(&lower->instr); - - /* Emit the individual loads. */ - nir_ssa_def *loads[4]; - for (unsigned i = 0; i < lower->def.num_components; i++) { - nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1); - load_comp->value.u[0] = lower->value.u[i]; - nir_builder_instr_insert(&b, &load_comp->instr); - loads[i] = &load_comp->def; - } - - /* Batch things back together into a vector. */ - nir_ssa_def *vec = nir_vec(&b, loads, lower->def.num_components); - - /* Replace the old load with a reference to our reconstructed vector. */ - nir_ssa_def_rewrite_uses(&lower->def, nir_src_for_ssa(vec)); - nir_instr_remove(&lower->instr); -} - -static bool -lower_load_const_to_scalar_block(nir_block *block, void *data) -{ - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_load_const) - lower_load_const_instr_scalar(nir_instr_as_load_const(instr)); - } - - return true; -} - -static void -nir_lower_load_const_to_scalar_impl(nir_function_impl *impl) -{ - nir_foreach_block(impl, lower_load_const_to_scalar_block, NULL); -} - -void -nir_lower_load_const_to_scalar(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_lower_load_const_to_scalar_impl(function->impl); - } -} diff --git a/src/glsl/nir/nir_lower_locals_to_regs.c b/src/glsl/nir/nir_lower_locals_to_regs.c deleted file mode 100644 index 51b0fa733f2..00000000000 --- a/src/glsl/nir/nir_lower_locals_to_regs.c +++ /dev/null @@ -1,396 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" -#include "nir_array.h" - -struct locals_to_regs_state { - nir_shader *shader; - nir_function_impl *impl; - - /* A hash table mapping derefs to registers */ - struct hash_table *regs_table; - - /* A growing array of derefs that we have encountered. There is exactly - * one element of this array per element in the hash table. This is - * used to make adding register initialization code deterministic. - */ - nir_array derefs_array; - - bool progress; -}; - -/* The following two functions implement a hash and equality check for - * variable dreferences. When the hash or equality function encounters an - * array, it ignores the offset and whether it is direct or indirect - * entirely. - */ -static uint32_t -hash_deref(const void *void_deref) -{ - uint32_t hash = _mesa_fnv32_1a_offset_bias; - - const nir_deref_var *deref_var = void_deref; - hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var); - - for (const nir_deref *deref = deref_var->deref.child; - deref; deref = deref->child) { - if (deref->deref_type == nir_deref_type_struct) { - const nir_deref_struct *deref_struct = nir_deref_as_struct(deref); - hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index); - } - } - - return hash; -} - -static bool -derefs_equal(const void *void_a, const void *void_b) -{ - const nir_deref_var *a_var = void_a; - const nir_deref_var *b_var = void_b; - - if (a_var->var != b_var->var) - return false; - - for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child; - a != NULL; a = a->child, b = b->child) { - if (a->deref_type != b->deref_type) - return false; - - if (a->deref_type == nir_deref_type_struct) { - if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index) - return false; - } - /* Do nothing for arrays. They're all the same. */ - - assert((a->child == NULL) == (b->child == NULL)); - if((a->child == NULL) != (b->child == NULL)) - return false; - } - - return true; -} - -static nir_register * -get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state) -{ - uint32_t hash = hash_deref(deref); - - struct hash_entry *entry = - _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref); - if (entry) - return entry->data; - - unsigned array_size = 1; - nir_deref *tail = &deref->deref; - while (tail->child) { - if (tail->child->deref_type == nir_deref_type_array) - array_size *= glsl_get_length(tail->type); - tail = tail->child; - } - - assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type)); - - nir_register *reg = nir_local_reg_create(state->impl); - reg->num_components = glsl_get_vector_elements(tail->type); - reg->num_array_elems = array_size > 1 ? array_size : 0; - - _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg); - nir_array_add(&state->derefs_array, nir_deref_var *, deref); - - return reg; -} - -static nir_src -get_deref_reg_src(nir_deref_var *deref, nir_instr *instr, - struct locals_to_regs_state *state) -{ - nir_src src; - - src.is_ssa = false; - src.reg.reg = get_reg_for_deref(deref, state); - src.reg.base_offset = 0; - src.reg.indirect = NULL; - - /* It is possible for a user to create a shader that has an array with a - * single element and then proceed to access it indirectly. Indirectly - * accessing a non-array register is not allowed in NIR. In order to - * handle this case we just convert it to a direct reference. - */ - if (src.reg.reg->num_array_elems == 0) - return src; - - nir_deref *tail = &deref->deref; - while (tail->child != NULL) { - const struct glsl_type *parent_type = tail->type; - tail = tail->child; - - if (tail->deref_type != nir_deref_type_array) - continue; - - nir_deref_array *deref_array = nir_deref_as_array(tail); - - src.reg.base_offset *= glsl_get_length(parent_type); - src.reg.base_offset += deref_array->base_offset; - - if (src.reg.indirect) { - nir_load_const_instr *load_const = - nir_load_const_instr_create(state->shader, 1); - load_const->value.u[0] = glsl_get_length(parent_type); - nir_instr_insert_before(instr, &load_const->instr); - - nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul); - mul->src[0].src = *src.reg.indirect; - mul->src[1].src.is_ssa = true; - mul->src[1].src.ssa = &load_const->def; - mul->dest.write_mask = 1; - nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL); - nir_instr_insert_before(instr, &mul->instr); - - src.reg.indirect->is_ssa = true; - src.reg.indirect->ssa = &mul->dest.dest.ssa; - } - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - if (src.reg.indirect == NULL) { - src.reg.indirect = ralloc(state->shader, nir_src); - nir_src_copy(src.reg.indirect, &deref_array->indirect, - state->shader); - } else { - nir_alu_instr *add = nir_alu_instr_create(state->shader, - nir_op_iadd); - add->src[0].src = *src.reg.indirect; - nir_src_copy(&add->src[1].src, &deref_array->indirect, add); - add->dest.write_mask = 1; - nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL); - nir_instr_insert_before(instr, &add->instr); - - src.reg.indirect->is_ssa = true; - src.reg.indirect->ssa = &add->dest.dest.ssa; - } - } - } - - return src; -} - -static bool -lower_locals_to_regs_block(nir_block *block, void *void_state) -{ - struct locals_to_regs_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: { - if (intrin->variables[0]->var->data.mode != nir_var_local) - continue; - - nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); - mov->src[0].src = get_deref_reg_src(intrin->variables[0], - &intrin->instr, state); - mov->dest.write_mask = (1 << intrin->num_components) - 1; - if (intrin->dest.is_ssa) { - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, - intrin->num_components, NULL); - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&mov->dest.dest.ssa)); - } else { - nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr); - } - nir_instr_insert_before(&intrin->instr, &mov->instr); - - nir_instr_remove(&intrin->instr); - state->progress = true; - break; - } - - case nir_intrinsic_store_var: { - if (intrin->variables[0]->var->data.mode != nir_var_local) - continue; - - nir_src reg_src = get_deref_reg_src(intrin->variables[0], - &intrin->instr, state); - - nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); - nir_src_copy(&mov->src[0].src, &intrin->src[0], mov); - mov->dest.write_mask = intrin->const_index[0]; - mov->dest.dest.is_ssa = false; - mov->dest.dest.reg.reg = reg_src.reg.reg; - mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; - mov->dest.dest.reg.indirect = reg_src.reg.indirect; - - nir_instr_insert_before(&intrin->instr, &mov->instr); - - nir_instr_remove(&intrin->instr); - state->progress = true; - break; - } - - case nir_intrinsic_copy_var: - unreachable("There should be no copies whatsoever at this point"); - break; - - default: - continue; - } - } - - return true; -} - -static nir_block * -compute_reg_usedef_lca(nir_register *reg) -{ - nir_block *lca = NULL; - - list_for_each_entry(nir_dest, def_dest, ®->defs, reg.def_link) - lca = nir_dominance_lca(lca, def_dest->reg.parent_instr->block); - - list_for_each_entry(nir_src, use_src, ®->uses, use_link) - lca = nir_dominance_lca(lca, use_src->parent_instr->block); - - list_for_each_entry(nir_src, use_src, ®->if_uses, use_link) { - nir_cf_node *prev_node = nir_cf_node_prev(&use_src->parent_if->cf_node); - assert(prev_node->type == nir_cf_node_block); - lca = nir_dominance_lca(lca, nir_cf_node_as_block(prev_node)); - } - - return lca; -} - -static void -insert_constant_initializer(nir_deref_var *deref_head, nir_deref *deref_tail, - nir_block *block, - struct locals_to_regs_state *state) -{ - if (deref_tail->child) { - switch (deref_tail->child->deref_type) { - case nir_deref_type_array: { - unsigned array_elems = glsl_get_length(deref_tail->type); - - nir_deref_array arr_deref; - arr_deref.deref = *deref_tail->child; - arr_deref.deref_array_type = nir_deref_array_type_direct; - - nir_deref *old_child = deref_tail->child; - deref_tail->child = &arr_deref.deref; - for (unsigned i = 0; i < array_elems; i++) { - arr_deref.base_offset = i; - insert_constant_initializer(deref_head, &arr_deref.deref, - block, state); - } - deref_tail->child = old_child; - return; - } - - case nir_deref_type_struct: - insert_constant_initializer(deref_head, deref_tail->child, - block, state); - return; - - default: - unreachable("Invalid deref child type"); - } - } - - assert(deref_tail->child == NULL); - - nir_load_const_instr *load = - nir_deref_get_const_initializer_load(state->shader, deref_head); - nir_instr_insert_before_block(block, &load->instr); - - nir_src reg_src = get_deref_reg_src(deref_head, &load->instr, state); - - nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov); - mov->src[0].src = nir_src_for_ssa(&load->def); - mov->dest.write_mask = (1 << load->def.num_components) - 1; - mov->dest.dest.is_ssa = false; - mov->dest.dest.reg.reg = reg_src.reg.reg; - mov->dest.dest.reg.base_offset = reg_src.reg.base_offset; - mov->dest.dest.reg.indirect = reg_src.reg.indirect; - - nir_instr_insert_after(&load->instr, &mov->instr); - state->progress = true; -} - -static bool -nir_lower_locals_to_regs_impl(nir_function_impl *impl) -{ - struct locals_to_regs_state state; - - state.shader = impl->function->shader; - state.impl = impl; - state.progress = false; - state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal); - nir_array_init(&state.derefs_array, NULL); - - nir_metadata_require(impl, nir_metadata_dominance); - - nir_foreach_block(impl, lower_locals_to_regs_block, &state); - - nir_array_foreach(&state.derefs_array, nir_deref_var *, deref_ptr) { - nir_deref_var *deref = *deref_ptr; - struct hash_entry *deref_entry = - _mesa_hash_table_search(state.regs_table, deref); - assert(deref_entry && deref_entry->key == deref); - nir_register *reg = (nir_register *)deref_entry->data; - - if (deref->var->constant_initializer == NULL) - continue; - - nir_block *usedef_lca = compute_reg_usedef_lca(reg); - - insert_constant_initializer(deref, &deref->deref, usedef_lca, &state); - } - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - nir_array_fini(&state.derefs_array); - _mesa_hash_table_destroy(state.regs_table, NULL); - - return state.progress; -} - -bool -nir_lower_locals_to_regs(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) - progress = nir_lower_locals_to_regs_impl(function->impl) || progress; - } - - return progress; -} diff --git a/src/glsl/nir/nir_lower_outputs_to_temporaries.c b/src/glsl/nir/nir_lower_outputs_to_temporaries.c deleted file mode 100644 index 71b06b81fcc..00000000000 --- a/src/glsl/nir/nir_lower_outputs_to_temporaries.c +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* - * Implements a pass that lowers output variables to a temporary plus an - * output variable with a single copy at each exit point of the shader. - * This way the output variable is only ever written. - * - * Because valid NIR requires that output variables are never read, this - * pass is more of a helper for NIR producers and must be run before the - * shader is ever validated. - */ - -#include "nir.h" - -struct lower_outputs_state { - nir_shader *shader; - struct exec_list old_outputs; -}; - -static void -emit_output_copies(nir_cursor cursor, struct lower_outputs_state *state) -{ - assert(exec_list_length(&state->shader->outputs) == - exec_list_length(&state->old_outputs)); - - foreach_two_lists(out_node, &state->shader->outputs, - temp_node, &state->old_outputs) { - nir_variable *output = exec_node_data(nir_variable, out_node, node); - nir_variable *temp = exec_node_data(nir_variable, temp_node, node); - - nir_intrinsic_instr *copy = - nir_intrinsic_instr_create(state->shader, nir_intrinsic_copy_var); - copy->variables[0] = nir_deref_var_create(copy, output); - copy->variables[1] = nir_deref_var_create(copy, temp); - - nir_instr_insert(cursor, ©->instr); - } -} - -static bool -emit_output_copies_block(nir_block *block, void *state) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (intrin->intrinsic == nir_intrinsic_emit_vertex) - emit_output_copies(nir_before_instr(&intrin->instr), state); - } - - return true; -} - -void -nir_lower_outputs_to_temporaries(nir_shader *shader) -{ - struct lower_outputs_state state; - - if (shader->stage == MESA_SHADER_TESS_CTRL) - return; - - state.shader = shader; - exec_list_move_nodes_to(&shader->outputs, &state.old_outputs); - - /* Walk over all of the outputs turn each output into a temporary and - * make a new variable for the actual output. - */ - nir_foreach_variable(var, &state.old_outputs) { - nir_variable *output = ralloc(shader, nir_variable); - memcpy(output, var, sizeof *output); - - /* The orignal is now the temporary */ - nir_variable *temp = var; - - /* Reparent the name to the new variable */ - ralloc_steal(output, output->name); - - /* Give the output a new name with @out-temp appended */ - temp->name = ralloc_asprintf(var, "%s@out-temp", output->name); - temp->data.mode = nir_var_global; - temp->constant_initializer = NULL; - - exec_list_push_tail(&shader->outputs, &output->node); - } - - nir_foreach_function(shader, function) { - if (function->impl == NULL) - continue; - - if (shader->stage == MESA_SHADER_GEOMETRY) { - /* For geometry shaders, we have to emit the output copies right - * before each EmitVertex call. - */ - nir_foreach_block(function->impl, emit_output_copies_block, &state); - } else if (strcmp(function->name, "main") == 0) { - /* For all other shader types, we need to do the copies right before - * the jumps to the end block. - */ - struct set_entry *block_entry; - set_foreach(function->impl->end_block->predecessors, block_entry) { - struct nir_block *block = (void *)block_entry->key; - emit_output_copies(nir_after_block_before_jump(block), &state); - } - } - - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance); - } - - exec_list_append(&shader->globals, &state.old_outputs); -} diff --git a/src/glsl/nir/nir_lower_phis_to_scalar.c b/src/glsl/nir/nir_lower_phis_to_scalar.c deleted file mode 100644 index dd2abcf72f8..00000000000 --- a/src/glsl/nir/nir_lower_phis_to_scalar.c +++ /dev/null @@ -1,293 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" - -/* - * Implements a pass that lowers vector phi nodes to scalar phi nodes when - * we don't think it will hurt anything. - */ - -struct lower_phis_to_scalar_state { - void *mem_ctx; - void *dead_ctx; - - /* Hash table marking which phi nodes are scalarizable. The key is - * pointers to phi instructions and the entry is either NULL for not - * scalarizable or non-null for scalarizable. - */ - struct hash_table *phi_table; -}; - -static bool -should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state); - -static bool -is_phi_src_scalarizable(nir_phi_src *src, - struct lower_phis_to_scalar_state *state) -{ - /* Don't know what to do with non-ssa sources */ - if (!src->src.is_ssa) - return false; - - nir_instr *src_instr = src->src.ssa->parent_instr; - switch (src_instr->type) { - case nir_instr_type_alu: { - nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); - - /* ALU operations with output_size == 0 should be scalarized. We - * will also see a bunch of vecN operations from scalarizing ALU - * operations and, since they can easily be copy-propagated, they - * are ok too. - */ - return nir_op_infos[src_alu->op].output_size == 0 || - src_alu->op == nir_op_vec2 || - src_alu->op == nir_op_vec3 || - src_alu->op == nir_op_vec4; - } - - case nir_instr_type_phi: - /* A phi is scalarizable if we're going to lower it */ - return should_lower_phi(nir_instr_as_phi(src_instr), state); - - case nir_instr_type_load_const: - case nir_instr_type_ssa_undef: - /* These are trivially scalarizable */ - return true; - - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *src_intrin = nir_instr_as_intrinsic(src_instr); - - switch (src_intrin->intrinsic) { - case nir_intrinsic_load_var: - return src_intrin->variables[0]->var->data.mode == nir_var_shader_in || - src_intrin->variables[0]->var->data.mode == nir_var_uniform; - - case nir_intrinsic_interp_var_at_centroid: - case nir_intrinsic_interp_var_at_sample: - case nir_intrinsic_interp_var_at_offset: - case nir_intrinsic_load_uniform: - case nir_intrinsic_load_ubo: - case nir_intrinsic_load_ssbo: - case nir_intrinsic_load_input: - return true; - default: - break; - } - } - - default: - /* We can't scalarize this type of instruction */ - return false; - } -} - -/** - * Determines if the given phi node should be lowered. The only phi nodes - * we will scalarize at the moment are those where all of the sources are - * scalarizable. - * - * The reason for this comes down to coalescing. Since phi sources can't - * swizzle, swizzles on phis have to be resolved by inserting a mov right - * before the phi. The choice then becomes between movs to pick off - * components for a scalar phi or potentially movs to recombine components - * for a vector phi. The problem is that the movs generated to pick off - * the components are almost uncoalescable. We can't coalesce them in NIR - * because we need them to pick off components and we can't coalesce them - * in the backend because the source register is a vector and the - * destination is a scalar that may be used at other places in the program. - * On the other hand, if we have a bunch of scalars going into a vector - * phi, the situation is much better. In this case, if the SSA def is - * generated in the predecessor block to the corresponding phi source, the - * backend code will be an ALU op into a temporary and then a mov into the - * given vector component; this move can almost certainly be coalesced - * away. - */ -static bool -should_lower_phi(nir_phi_instr *phi, struct lower_phis_to_scalar_state *state) -{ - /* Already scalar */ - if (phi->dest.ssa.num_components == 1) - return false; - - struct hash_entry *entry = _mesa_hash_table_search(state->phi_table, phi); - if (entry) - return entry->data != NULL; - - /* Insert an entry and mark it as scalarizable for now. That way - * we don't recurse forever and a cycle in the dependence graph - * won't automatically make us fail to scalarize. - */ - entry = _mesa_hash_table_insert(state->phi_table, phi, (void *)(intptr_t)1); - - bool scalarizable = true; - - nir_foreach_phi_src(phi, src) { - scalarizable = is_phi_src_scalarizable(src, state); - if (!scalarizable) - break; - } - - /* The hash table entry for 'phi' may have changed while recursing the - * dependence graph, so we need to reset it */ - entry = _mesa_hash_table_search(state->phi_table, phi); - assert(entry); - - entry->data = (void *)(intptr_t)scalarizable; - - return scalarizable; -} - -static bool -lower_phis_to_scalar_block(nir_block *block, void *void_state) -{ - struct lower_phis_to_scalar_state *state = void_state; - - /* Find the last phi node in the block */ - nir_phi_instr *last_phi = NULL; - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - last_phi = nir_instr_as_phi(instr); - } - - /* We have to handle the phi nodes in their own pass due to the way - * we're modifying the linked list of instructions. - */ - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - - if (!should_lower_phi(phi, state)) - continue; - - /* Create a vecN operation to combine the results. Most of these - * will be redundant, but copy propagation should clean them up for - * us. No need to add the complexity here. - */ - nir_op vec_op; - switch (phi->dest.ssa.num_components) { - case 2: vec_op = nir_op_vec2; break; - case 3: vec_op = nir_op_vec3; break; - case 4: vec_op = nir_op_vec4; break; - default: unreachable("Invalid number of components"); - } - - nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op); - nir_ssa_dest_init(&vec->instr, &vec->dest.dest, - phi->dest.ssa.num_components, NULL); - vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; - - for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) { - nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx); - nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL); - - vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa); - - nir_foreach_phi_src(phi, src) { - /* We need to insert a mov to grab the i'th component of src */ - nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx, - nir_op_imov); - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL); - mov->dest.write_mask = 1; - nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx); - mov->src[0].swizzle[0] = i; - - /* Insert at the end of the predecessor but before the jump */ - nir_instr *pred_last_instr = nir_block_last_instr(src->pred); - if (pred_last_instr && pred_last_instr->type == nir_instr_type_jump) - nir_instr_insert_before(pred_last_instr, &mov->instr); - else - nir_instr_insert_after_block(src->pred, &mov->instr); - - nir_phi_src *new_src = ralloc(new_phi, nir_phi_src); - new_src->pred = src->pred; - new_src->src = nir_src_for_ssa(&mov->dest.dest.ssa); - - exec_list_push_tail(&new_phi->srcs, &new_src->node); - } - - nir_instr_insert_before(&phi->instr, &new_phi->instr); - } - - nir_instr_insert_after(&last_phi->instr, &vec->instr); - - nir_ssa_def_rewrite_uses(&phi->dest.ssa, - nir_src_for_ssa(&vec->dest.dest.ssa)); - - ralloc_steal(state->dead_ctx, phi); - nir_instr_remove(&phi->instr); - - /* We're using the safe iterator and inserting all the newly - * scalarized phi nodes before their non-scalarized version so that's - * ok. However, we are also inserting vec operations after all of - * the last phi node so once we get here, we can't trust even the - * safe iterator to stop properly. We have to break manually. - */ - if (instr == &last_phi->instr) - break; - } - - return true; -} - -static void -lower_phis_to_scalar_impl(nir_function_impl *impl) -{ - struct lower_phis_to_scalar_state state; - - state.mem_ctx = ralloc_parent(impl); - state.dead_ctx = ralloc_context(NULL); - state.phi_table = _mesa_hash_table_create(state.dead_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - nir_foreach_block(impl, lower_phis_to_scalar_block, &state); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - ralloc_free(state.dead_ctx); -} - -/** A pass that lowers vector phi nodes to scalar - * - * This pass loops through the blocks and lowers looks for vector phi nodes - * it can lower to scalar phi nodes. Not all phi nodes are lowered. For - * instance, if one of the sources is a non-scalarizable vector, then we - * don't bother lowering because that would generate hard-to-coalesce movs. - */ -void -nir_lower_phis_to_scalar(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - lower_phis_to_scalar_impl(function->impl); - } -} diff --git a/src/glsl/nir/nir_lower_samplers.c b/src/glsl/nir/nir_lower_samplers.c deleted file mode 100644 index 95ea072bdfd..00000000000 --- a/src/glsl/nir/nir_lower_samplers.c +++ /dev/null @@ -1,187 +0,0 @@ -/* - * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. - * Copyright (C) 2008 VMware, Inc. All Rights Reserved. - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "nir.h" -#include "nir_builder.h" -#include "program/hash_table.h" -#include "ir_uniform.h" - -#include "main/compiler.h" -#include "main/mtypes.h" -#include "program/prog_parameter.h" -#include "program/program.h" - -/* Calculate the sampler index based on array indicies and also - * calculate the base uniform location for struct members. - */ -static void -calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr, - unsigned *array_elements, nir_ssa_def **indirect, - nir_builder *b, unsigned *location) -{ - if (tail->child == NULL) - return; - - switch (tail->child->deref_type) { - case nir_deref_type_array: { - nir_deref_array *deref_array = nir_deref_as_array(tail->child); - - assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); - - calc_sampler_offsets(tail->child, instr, array_elements, - indirect, b, location); - instr->sampler_index += deref_array->base_offset * *array_elements; - - if (deref_array->deref_array_type == nir_deref_array_type_indirect) { - nir_ssa_def *mul = - nir_imul(b, nir_imm_int(b, *array_elements), - nir_ssa_for_src(b, deref_array->indirect, 1)); - - nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, - NIR_SRC_INIT); - - if (*indirect) { - *indirect = nir_iadd(b, *indirect, mul); - } else { - *indirect = mul; - } - } - - *array_elements *= glsl_get_length(tail->type); - break; - } - - case nir_deref_type_struct: { - nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child); - *location += glsl_get_record_location_offset(tail->type, deref_struct->index); - calc_sampler_offsets(tail->child, instr, array_elements, - indirect, b, location); - break; - } - - default: - unreachable("Invalid deref type"); - break; - } -} - -static void -lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, - gl_shader_stage stage, nir_builder *builder) -{ - if (instr->sampler == NULL) - return; - - instr->sampler_index = 0; - unsigned location = instr->sampler->var->data.location; - unsigned array_elements = 1; - nir_ssa_def *indirect = NULL; - - builder->cursor = nir_before_instr(&instr->instr); - calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements, - &indirect, builder, &location); - - if (indirect) { - /* First, we have to resize the array of texture sources */ - nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, - instr->num_srcs + 1); - - for (unsigned i = 0; i < instr->num_srcs; i++) { - new_srcs[i].src_type = instr->src[i].src_type; - nir_instr_move_src(&instr->instr, &new_srcs[i].src, - &instr->src[i].src); - } - - ralloc_free(instr->src); - instr->src = new_srcs; - - /* Now we can go ahead and move the source over to being a - * first-class texture source. - */ - instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; - instr->num_srcs++; - nir_instr_rewrite_src(&instr->instr, - &instr->src[instr->num_srcs - 1].src, - nir_src_for_ssa(indirect)); - - instr->sampler_array_size = array_elements; - } - - if (location > shader_program->NumUniformStorage - 1 || - !shader_program->UniformStorage[location].opaque[stage].active) { - assert(!"cannot return a sampler"); - return; - } - - instr->sampler_index += - shader_program->UniformStorage[location].opaque[stage].index; - - instr->sampler = NULL; -} - -typedef struct { - nir_builder builder; - const struct gl_shader_program *shader_program; - gl_shader_stage stage; -} lower_state; - -static bool -lower_block_cb(nir_block *block, void *_state) -{ - lower_state *state = (lower_state *) _state; - - nir_foreach_instr(block, instr) { - if (instr->type == nir_instr_type_tex) { - nir_tex_instr *tex_instr = nir_instr_as_tex(instr); - lower_sampler(tex_instr, state->shader_program, state->stage, - &state->builder); - } - } - - return true; -} - -static void -lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, - gl_shader_stage stage) -{ - lower_state state; - - nir_builder_init(&state.builder, impl); - state.shader_program = shader_program; - state.stage = stage; - - nir_foreach_block(impl, lower_block_cb, &state); -} - -void -nir_lower_samplers(nir_shader *shader, - const struct gl_shader_program *shader_program) -{ - nir_foreach_function(shader, function) { - if (function->impl) - lower_impl(function->impl, shader_program, shader->stage); - } -} diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c deleted file mode 100644 index 2bd787d3574..00000000000 --- a/src/glsl/nir/nir_lower_system_values.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" -#include "nir_builder.h" - -struct lower_system_values_state { - nir_builder builder; - bool progress; -}; - -static bool -convert_block(nir_block *block, void *void_state) -{ - struct lower_system_values_state *state = void_state; - - nir_builder *b = &state->builder; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *load_var = nir_instr_as_intrinsic(instr); - - if (load_var->intrinsic != nir_intrinsic_load_var) - continue; - - nir_variable *var = load_var->variables[0]->var; - if (var->data.mode != nir_var_system_value) - continue; - - b->cursor = nir_after_instr(&load_var->instr); - - nir_intrinsic_op sysval_op = - nir_intrinsic_from_system_value(var->data.location); - nir_ssa_def *sysval = nir_load_system_value(b, sysval_op, 0); - - nir_ssa_def_rewrite_uses(&load_var->dest.ssa, nir_src_for_ssa(sysval)); - nir_instr_remove(&load_var->instr); - - state->progress = true; - } - - return true; -} - -static bool -convert_impl(nir_function_impl *impl) -{ - struct lower_system_values_state state; - - state.progress = false; - nir_builder_init(&state.builder, impl); - - nir_foreach_block(impl, convert_block, &state); - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - return state.progress; -} - -bool -nir_lower_system_values(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) - progress = convert_impl(function->impl) || progress; - } - - exec_list_make_empty(&shader->system_values); - - return progress; -} diff --git a/src/glsl/nir/nir_lower_tex.c b/src/glsl/nir/nir_lower_tex.c deleted file mode 100644 index ae24fb2e16a..00000000000 --- a/src/glsl/nir/nir_lower_tex.c +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Copyright © 2015 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* - * This lowering pass supports (as configured via nir_lower_tex_options) - * various texture related conversions: - * + texture projector lowering: converts the coordinate division for - * texture projection to be done in ALU instructions instead of - * asking the texture operation to do so. - * + lowering RECT: converts the un-normalized RECT texture coordinates - * to normalized coordinates with txs plus ALU instructions - * + saturate s/t/r coords: to emulate certain texture clamp/wrap modes, - * inserts instructions to clamp specified coordinates to [0.0, 1.0]. - * Note that this automatically triggers texture projector lowering if - * needed, since clamping must happen after projector lowering. - */ - -#include "nir.h" -#include "nir_builder.h" - -typedef struct { - nir_builder b; - const nir_lower_tex_options *options; - bool progress; -} lower_tex_state; - -static void -project_src(nir_builder *b, nir_tex_instr *tex) -{ - /* Find the projector in the srcs list, if present. */ - unsigned proj_index; - for (proj_index = 0; proj_index < tex->num_srcs; proj_index++) { - if (tex->src[proj_index].src_type == nir_tex_src_projector) - break; - } - if (proj_index == tex->num_srcs) - return; - - b->cursor = nir_before_instr(&tex->instr); - - nir_ssa_def *inv_proj = - nir_frcp(b, nir_ssa_for_src(b, tex->src[proj_index].src, 1)); - - /* Walk through the sources projecting the arguments. */ - for (unsigned i = 0; i < tex->num_srcs; i++) { - switch (tex->src[i].src_type) { - case nir_tex_src_coord: - case nir_tex_src_comparitor: - break; - default: - continue; - } - nir_ssa_def *unprojected = - nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); - nir_ssa_def *projected = nir_fmul(b, unprojected, inv_proj); - - /* Array indices don't get projected, so make an new vector with the - * coordinate's array index untouched. - */ - if (tex->is_array && tex->src[i].src_type == nir_tex_src_coord) { - switch (tex->coord_components) { - case 4: - projected = nir_vec4(b, - nir_channel(b, projected, 0), - nir_channel(b, projected, 1), - nir_channel(b, projected, 2), - nir_channel(b, unprojected, 3)); - break; - case 3: - projected = nir_vec3(b, - nir_channel(b, projected, 0), - nir_channel(b, projected, 1), - nir_channel(b, unprojected, 2)); - break; - case 2: - projected = nir_vec2(b, - nir_channel(b, projected, 0), - nir_channel(b, unprojected, 1)); - break; - default: - unreachable("bad texture coord count for array"); - break; - } - } - - nir_instr_rewrite_src(&tex->instr, - &tex->src[i].src, - nir_src_for_ssa(projected)); - } - - /* Now move the later tex sources down the array so that the projector - * disappears. - */ - nir_instr_rewrite_src(&tex->instr, &tex->src[proj_index].src, - NIR_SRC_INIT); - for (unsigned i = proj_index + 1; i < tex->num_srcs; i++) { - tex->src[i-1].src_type = tex->src[i].src_type; - nir_instr_move_src(&tex->instr, &tex->src[i-1].src, &tex->src[i].src); - } - tex->num_srcs--; -} - -static nir_ssa_def * -get_texture_size(nir_builder *b, nir_tex_instr *tex) -{ - b->cursor = nir_before_instr(&tex->instr); - - /* RECT textures should not be array: */ - assert(!tex->is_array); - - nir_tex_instr *txs; - - txs = nir_tex_instr_create(b->shader, 1); - txs->op = nir_texop_txs; - txs->sampler_dim = GLSL_SAMPLER_DIM_RECT; - txs->sampler_index = tex->sampler_index; - txs->dest_type = nir_type_int; - - /* only single src, the lod: */ - txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0)); - txs->src[0].src_type = nir_tex_src_lod; - - nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL); - nir_builder_instr_insert(b, &txs->instr); - - return nir_i2f(b, &txs->dest.ssa); -} - -static void -lower_rect(nir_builder *b, nir_tex_instr *tex) -{ - nir_ssa_def *txs = get_texture_size(b, tex); - nir_ssa_def *scale = nir_frcp(b, txs); - - /* Walk through the sources normalizing the requested arguments. */ - for (unsigned i = 0; i < tex->num_srcs; i++) { - if (tex->src[i].src_type != nir_tex_src_coord) - continue; - - nir_ssa_def *coords = - nir_ssa_for_src(b, tex->src[i].src, tex->coord_components); - nir_instr_rewrite_src(&tex->instr, - &tex->src[i].src, - nir_src_for_ssa(nir_fmul(b, coords, scale))); - } - - tex->sampler_dim = GLSL_SAMPLER_DIM_2D; -} - -static void -saturate_src(nir_builder *b, nir_tex_instr *tex, unsigned sat_mask) -{ - b->cursor = nir_before_instr(&tex->instr); - - /* Walk through the sources saturating the requested arguments. */ - for (unsigned i = 0; i < tex->num_srcs; i++) { - if (tex->src[i].src_type != nir_tex_src_coord) - continue; - - nir_ssa_def *src = - nir_ssa_for_src(b, tex->src[i].src, tex->coord_components); - - /* split src into components: */ - nir_ssa_def *comp[4]; - - for (unsigned j = 0; j < tex->coord_components; j++) - comp[j] = nir_channel(b, src, j); - - /* clamp requested components, array index does not get clamped: */ - unsigned ncomp = tex->coord_components; - if (tex->is_array) - ncomp--; - - for (unsigned j = 0; j < ncomp; j++) { - if ((1 << j) & sat_mask) { - if (tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) { - /* non-normalized texture coords, so clamp to texture - * size rather than [0.0, 1.0] - */ - nir_ssa_def *txs = get_texture_size(b, tex); - comp[j] = nir_fmax(b, comp[j], nir_imm_float(b, 0.0)); - comp[j] = nir_fmin(b, comp[j], nir_channel(b, txs, j)); - } else { - comp[j] = nir_fsat(b, comp[j]); - } - } - } - - /* and move the result back into a single vecN: */ - src = nir_vec(b, comp, tex->coord_components); - - nir_instr_rewrite_src(&tex->instr, - &tex->src[i].src, - nir_src_for_ssa(src)); - } -} - -static nir_ssa_def * -get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val) -{ - nir_const_value v; - - memset(&v, 0, sizeof(v)); - - if (swizzle_val == 4) { - v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0; - } else { - assert(swizzle_val == 5); - if (type == nir_type_float) - v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0; - else - v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1; - } - - return nir_build_imm(b, 4, v); -} - -static void -swizzle_result(nir_builder *b, nir_tex_instr *tex, const uint8_t swizzle[4]) -{ - assert(tex->dest.is_ssa); - - b->cursor = nir_after_instr(&tex->instr); - - nir_ssa_def *swizzled; - if (tex->op == nir_texop_tg4) { - if (swizzle[tex->component] < 4) { - /* This one's easy */ - tex->component = swizzle[tex->component]; - return; - } else { - swizzled = get_zero_or_one(b, tex->dest_type, swizzle[tex->component]); - } - } else { - assert(nir_tex_instr_dest_size(tex) == 4); - if (swizzle[0] < 4 && swizzle[1] < 4 && - swizzle[2] < 4 && swizzle[3] < 4) { - unsigned swiz[4] = { swizzle[0], swizzle[1], swizzle[2], swizzle[3] }; - /* We have no 0's or 1's, just emit a swizzling MOV */ - swizzled = nir_swizzle(b, &tex->dest.ssa, swiz, 4, false); - } else { - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < 4; i++) { - if (swizzle[i] < 4) { - srcs[i] = nir_channel(b, &tex->dest.ssa, swizzle[i]); - } else { - srcs[i] = get_zero_or_one(b, tex->dest_type, swizzle[i]); - } - } - swizzled = nir_vec(b, srcs, 4); - } - } - - nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, nir_src_for_ssa(swizzled), - swizzled->parent_instr); -} - -static bool -nir_lower_tex_block(nir_block *block, void *void_state) -{ - lower_tex_state *state = void_state; - nir_builder *b = &state->b; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_tex) - continue; - - nir_tex_instr *tex = nir_instr_as_tex(instr); - bool lower_txp = !!(state->options->lower_txp & (1 << tex->sampler_dim)); - - /* mask of src coords to saturate (clamp): */ - unsigned sat_mask = 0; - - if ((1 << tex->sampler_index) & state->options->saturate_r) - sat_mask |= (1 << 2); /* .z */ - if ((1 << tex->sampler_index) & state->options->saturate_t) - sat_mask |= (1 << 1); /* .y */ - if ((1 << tex->sampler_index) & state->options->saturate_s) - sat_mask |= (1 << 0); /* .x */ - - /* If we are clamping any coords, we must lower projector first - * as clamping happens *after* projection: - */ - if (lower_txp || sat_mask) { - project_src(b, tex); - state->progress = true; - } - - if ((tex->sampler_dim == GLSL_SAMPLER_DIM_RECT) && - state->options->lower_rect) { - lower_rect(b, tex); - state->progress = true; - } - - if (sat_mask) { - saturate_src(b, tex, sat_mask); - state->progress = true; - } - - if (((1 << tex->sampler_index) & state->options->swizzle_result) && - !nir_tex_instr_is_query(tex) && - !(tex->is_shadow && tex->is_new_style_shadow)) { - swizzle_result(b, tex, state->options->swizzles[tex->sampler_index]); - state->progress = true; - } - } - - return true; -} - -static void -nir_lower_tex_impl(nir_function_impl *impl, lower_tex_state *state) -{ - nir_builder_init(&state->b, impl); - - nir_foreach_block(impl, nir_lower_tex_block, state); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); -} - -bool -nir_lower_tex(nir_shader *shader, const nir_lower_tex_options *options) -{ - lower_tex_state state; - state.options = options; - state.progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) - nir_lower_tex_impl(function->impl, &state); - } - - return state.progress; -} diff --git a/src/glsl/nir/nir_lower_to_source_mods.c b/src/glsl/nir/nir_lower_to_source_mods.c deleted file mode 100644 index 6c4e1f0d3f3..00000000000 --- a/src/glsl/nir/nir_lower_to_source_mods.c +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" - -/* - * This pass lowers the neg, abs, and sat operations to source modifiers on - * ALU operations to make things nicer for the backend. It's just much - * easier to not have them when we're doing optimizations. - */ - -static bool -nir_lower_to_source_mods_block(nir_block *block, void *state) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_alu) - continue; - - nir_alu_instr *alu = nir_instr_as_alu(instr); - - for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { - if (!alu->src[i].src.is_ssa) - continue; - - if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_alu) - continue; - - nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr); - - if (parent->dest.saturate) - continue; - - switch (nir_op_infos[alu->op].input_types[i]) { - case nir_type_float: - if (parent->op != nir_op_fmov) - continue; - break; - case nir_type_int: - if (parent->op != nir_op_imov) - continue; - break; - default: - continue; - } - - /* We can only do a rewrite if the source we are copying is SSA. - * Otherwise, moving the read might invalidly reorder reads/writes - * on a register. - */ - if (!parent->src[0].src.is_ssa) - continue; - - nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src); - if (alu->src[i].abs) { - /* abs trumps both neg and abs, do nothing */ - } else { - alu->src[i].negate = (alu->src[i].negate != parent->src[0].negate); - alu->src[i].abs |= parent->src[0].abs; - } - - for (int j = 0; j < 4; ++j) { - if (!nir_alu_instr_channel_used(alu, i, j)) - continue; - alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]]; - } - - if (list_empty(&parent->dest.dest.ssa.uses) && - list_empty(&parent->dest.dest.ssa.if_uses)) - nir_instr_remove(&parent->instr); - } - - switch (alu->op) { - case nir_op_fsat: - alu->op = nir_op_fmov; - alu->dest.saturate = true; - break; - case nir_op_ineg: - alu->op = nir_op_imov; - alu->src[0].negate = !alu->src[0].negate; - break; - case nir_op_fneg: - alu->op = nir_op_fmov; - alu->src[0].negate = !alu->src[0].negate; - break; - case nir_op_iabs: - alu->op = nir_op_imov; - alu->src[0].abs = true; - alu->src[0].negate = false; - break; - case nir_op_fabs: - alu->op = nir_op_fmov; - alu->src[0].abs = true; - alu->src[0].negate = false; - break; - default: - break; - } - - /* We've covered sources. Now we're going to try and saturate the - * destination if we can. - */ - - if (!alu->dest.dest.is_ssa) - continue; - - /* We can only saturate float destinations */ - if (nir_op_infos[alu->op].output_type != nir_type_float) - continue; - - if (!list_empty(&alu->dest.dest.ssa.if_uses)) - continue; - - bool all_children_are_sat = true; - nir_foreach_use(&alu->dest.dest.ssa, child_src) { - assert(child_src->is_ssa); - nir_instr *child = child_src->parent_instr; - if (child->type != nir_instr_type_alu) { - all_children_are_sat = false; - continue; - } - - nir_alu_instr *child_alu = nir_instr_as_alu(child); - if (child_alu->src[0].negate || child_alu->src[0].abs) { - all_children_are_sat = false; - continue; - } - - if (child_alu->op != nir_op_fsat && - !(child_alu->op == nir_op_fmov && child_alu->dest.saturate)) { - all_children_are_sat = false; - continue; - } - } - - if (!all_children_are_sat) - continue; - - alu->dest.saturate = true; - - nir_foreach_use(&alu->dest.dest.ssa, child_src) { - assert(child_src->is_ssa); - nir_instr *child = child_src->parent_instr; - assert(child->type == nir_instr_type_alu); - nir_alu_instr *child_alu = nir_instr_as_alu(child); - - child_alu->op = nir_op_fmov; - child_alu->dest.saturate = false; - /* We could propagate the dest of our instruction to the - * destinations of the uses here. However, one quick round of - * copy propagation will clean that all up and then we don't have - * the complexity. - */ - } - } - - return true; -} - -static void -nir_lower_to_source_mods_impl(nir_function_impl *impl) -{ - nir_foreach_block(impl, nir_lower_to_source_mods_block, NULL); -} - -void -nir_lower_to_source_mods(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_lower_to_source_mods_impl(function->impl); - } -} diff --git a/src/glsl/nir/nir_lower_two_sided_color.c b/src/glsl/nir/nir_lower_two_sided_color.c deleted file mode 100644 index 1294cb89004..00000000000 --- a/src/glsl/nir/nir_lower_two_sided_color.c +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright © 2015 Red Hat - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Rob Clark - */ - -#include "nir.h" -#include "nir_builder.h" - -#define MAX_COLORS 2 /* VARYING_SLOT_COL0/COL1 */ - -typedef struct { - nir_builder b; - nir_shader *shader; - nir_variable *face; - struct { - nir_variable *front; /* COLn */ - nir_variable *back; /* BFCn */ - } colors[MAX_COLORS]; - int colors_count; -} lower_2side_state; - - -/* Lowering pass for fragment shaders to emulated two-sided-color. For - * each COLOR input, a corresponding BCOLOR input is created, and bcsel - * instruction used to select front or back color based on FACE. - */ - -static nir_variable * -create_input(nir_shader *shader, unsigned drvloc, gl_varying_slot slot) -{ - nir_variable *var = rzalloc(shader, nir_variable); - - var->data.driver_location = drvloc; - var->type = glsl_vec4_type(); - var->data.mode = nir_var_shader_in; - var->name = ralloc_asprintf(var, "in_%d", drvloc); - var->data.index = 0; - var->data.location = slot; - - exec_list_push_tail(&shader->inputs, &var->node); - - shader->num_inputs++; /* TODO use type_size() */ - - return var; -} - -static nir_ssa_def * -load_input(nir_builder *b, nir_variable *in) -{ - nir_intrinsic_instr *load; - - load = nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_input); - load->num_components = 4; - load->const_index[0] = in->data.driver_location; - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0)); - nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL); - nir_builder_instr_insert(b, &load->instr); - - return &load->dest.ssa; -} - -static int -setup_inputs(lower_2side_state *state) -{ - int maxloc = -1; - - /* find color/face inputs: */ - nir_foreach_variable(var, &state->shader->inputs) { - int loc = var->data.driver_location; - - /* keep track of last used driver-location.. we'll be - * appending BCLr/FACE after last existing input: - */ - maxloc = MAX2(maxloc, loc); - - switch (var->data.location) { - case VARYING_SLOT_COL0: - case VARYING_SLOT_COL1: - assert(state->colors_count < ARRAY_SIZE(state->colors)); - state->colors[state->colors_count].front = var; - state->colors_count++; - break; - case VARYING_SLOT_FACE: - state->face = var; - break; - } - } - - /* if we don't have any color inputs, nothing to do: */ - if (state->colors_count == 0) - return -1; - - /* if we don't already have one, insert a FACE input: */ - if (!state->face) { - state->face = create_input(state->shader, ++maxloc, VARYING_SLOT_FACE); - state->face->data.interpolation = INTERP_QUALIFIER_FLAT; - } - - /* add required back-face color inputs: */ - for (int i = 0; i < state->colors_count; i++) { - gl_varying_slot slot; - - if (state->colors[i].front->data.location == VARYING_SLOT_COL0) - slot = VARYING_SLOT_BFC0; - else - slot = VARYING_SLOT_BFC1; - - state->colors[i].back = create_input(state->shader, ++maxloc, slot); - } - - return 0; -} - -static bool -nir_lower_two_sided_color_block(nir_block *block, void *void_state) -{ - lower_2side_state *state = void_state; - nir_builder *b = &state->b; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); - - if (intr->intrinsic != nir_intrinsic_load_input) - continue; - - int idx; - for (idx = 0; idx < state->colors_count; idx++) { - unsigned drvloc = - state->colors[idx].front->data.driver_location; - if (intr->const_index[0] == drvloc) { - assert(nir_src_as_const_value(intr->src[0])); - break; - } - } - - if (idx == state->colors_count) - continue; - - /* replace load_input(COLn) with - * bcsel(load_input(FACE), load_input(COLn), load_input(BFCn)) - */ - b->cursor = nir_before_instr(&intr->instr); - nir_ssa_def *face = nir_channel(b, load_input(b, state->face), 0); - nir_ssa_def *front = load_input(b, state->colors[idx].front); - nir_ssa_def *back = load_input(b, state->colors[idx].back); - nir_ssa_def *cond = nir_flt(b, face, nir_imm_float(b, 0.0)); - nir_ssa_def *color = nir_bcsel(b, cond, back, front); - - assert(intr->dest.is_ssa); - nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(color)); - } - - return true; -} - -static void -nir_lower_two_sided_color_impl(nir_function_impl *impl, - lower_2side_state *state) -{ - nir_builder *b = &state->b; - - nir_builder_init(b, impl); - - nir_foreach_block(impl, nir_lower_two_sided_color_block, state); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); -} - -void -nir_lower_two_sided_color(nir_shader *shader) -{ - lower_2side_state state = { - .shader = shader, - }; - - if (shader->stage != MESA_SHADER_FRAGMENT) - return; - - if (setup_inputs(&state) != 0) - return; - - nir_foreach_function(shader, function) { - if (function->impl) - nir_lower_two_sided_color_impl(function->impl, &state); - } - -} diff --git a/src/glsl/nir/nir_lower_var_copies.c b/src/glsl/nir/nir_lower_var_copies.c deleted file mode 100644 index 8cb3edd0a84..00000000000 --- a/src/glsl/nir/nir_lower_var_copies.c +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" -#include "compiler/nir_types.h" - -/* - * Lowers all copy intrinsics to sequences of load/store intrinsics. - */ - -/* Walks down the deref chain and returns the next deref in the chain whose - * child is a wildcard. In other words, given the chain a[1].foo[*].bar, - * this function will return the deref to foo. Calling it a second time - * with the [*].bar, it will return NULL. - */ -static nir_deref * -deref_next_wildcard_parent(nir_deref *deref) -{ - for (nir_deref *tail = deref; tail->child; tail = tail->child) { - if (tail->child->deref_type != nir_deref_type_array) - continue; - - nir_deref_array *arr = nir_deref_as_array(tail->child); - - if (arr->deref_array_type == nir_deref_array_type_wildcard) - return tail; - } - - return NULL; -} - -/* This function recursively walks the given deref chain and replaces the - * given copy instruction with an equivalent sequence load/store - * operations. - * - * @copy_instr The copy instruction to replace; new instructions will be - * inserted before this one - * - * @dest_head The head of the destination variable deref chain - * - * @src_head The head of the source variable deref chain - * - * @dest_tail The current tail of the destination variable deref chain; - * this is used for recursion and external callers of this - * function should call it with tail == head - * - * @src_tail The current tail of the source variable deref chain; - * this is used for recursion and external callers of this - * function should call it with tail == head - * - * @state The current variable lowering state - */ -static void -emit_copy_load_store(nir_intrinsic_instr *copy_instr, - nir_deref_var *dest_head, nir_deref_var *src_head, - nir_deref *dest_tail, nir_deref *src_tail, void *mem_ctx) -{ - /* Find the next pair of wildcards */ - nir_deref *src_arr_parent = deref_next_wildcard_parent(src_tail); - nir_deref *dest_arr_parent = deref_next_wildcard_parent(dest_tail); - - if (src_arr_parent || dest_arr_parent) { - /* Wildcards had better come in matched pairs */ - assert(dest_arr_parent && dest_arr_parent); - - nir_deref_array *src_arr = nir_deref_as_array(src_arr_parent->child); - nir_deref_array *dest_arr = nir_deref_as_array(dest_arr_parent->child); - - unsigned length = glsl_get_length(src_arr_parent->type); - /* The wildcards should represent the same number of elements */ - assert(length == glsl_get_length(dest_arr_parent->type)); - assert(length > 0); - - /* Walk over all of the elements that this wildcard refers to and - * call emit_copy_load_store on each one of them */ - src_arr->deref_array_type = nir_deref_array_type_direct; - dest_arr->deref_array_type = nir_deref_array_type_direct; - for (unsigned i = 0; i < length; i++) { - src_arr->base_offset = i; - dest_arr->base_offset = i; - emit_copy_load_store(copy_instr, dest_head, src_head, - &dest_arr->deref, &src_arr->deref, mem_ctx); - } - src_arr->deref_array_type = nir_deref_array_type_wildcard; - dest_arr->deref_array_type = nir_deref_array_type_wildcard; - } else { - /* In this case, we have no wildcards anymore, so all we have to do - * is just emit the load and store operations. */ - src_tail = nir_deref_tail(src_tail); - dest_tail = nir_deref_tail(dest_tail); - - assert(src_tail->type == dest_tail->type); - - unsigned num_components = glsl_get_vector_elements(src_tail->type); - - nir_intrinsic_instr *load = - nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var); - load->num_components = num_components; - load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref)); - nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL); - - nir_instr_insert_before(©_instr->instr, &load->instr); - - nir_intrinsic_instr *store = - nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_store_var); - store->num_components = num_components; - store->const_index[0] = (1 << num_components) - 1; - store->variables[0] = nir_deref_as_var(nir_copy_deref(store, &dest_head->deref)); - - store->src[0].is_ssa = true; - store->src[0].ssa = &load->dest.ssa; - - nir_instr_insert_before(©_instr->instr, &store->instr); - } -} - -/* Lowers a copy instruction to a sequence of load/store instructions - * - * The new instructions are placed before the copy instruction in the IR. - */ -void -nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx) -{ - assert(copy->intrinsic == nir_intrinsic_copy_var); - emit_copy_load_store(copy, copy->variables[0], copy->variables[1], - ©->variables[0]->deref, - ©->variables[1]->deref, mem_ctx); -} - -static bool -lower_var_copies_block(nir_block *block, void *mem_ctx) -{ - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); - if (copy->intrinsic != nir_intrinsic_copy_var) - continue; - - nir_lower_var_copy_instr(copy, mem_ctx); - - nir_instr_remove(©->instr); - ralloc_free(copy); - } - - return true; -} - -static void -lower_var_copies_impl(nir_function_impl *impl) -{ - nir_foreach_block(impl, lower_var_copies_block, ralloc_parent(impl)); -} - -/* Lowers every copy_var instruction in the program to a sequence of - * load/store instructions. - */ -void -nir_lower_var_copies(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - lower_var_copies_impl(function->impl); - } -} diff --git a/src/glsl/nir/nir_lower_vars_to_ssa.c b/src/glsl/nir/nir_lower_vars_to_ssa.c deleted file mode 100644 index 75d31ff60af..00000000000 --- a/src/glsl/nir/nir_lower_vars_to_ssa.c +++ /dev/null @@ -1,973 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" -#include "nir_builder.h" -#include "nir_vla.h" - - -struct deref_node { - struct deref_node *parent; - const struct glsl_type *type; - - bool lower_to_ssa; - - /* Only valid for things that end up in the direct list. - * Note that multiple nir_deref_vars may correspond to this node, but they - * will all be equivalent, so any is as good as the other. - */ - nir_deref_var *deref; - struct exec_node direct_derefs_link; - - struct set *loads; - struct set *stores; - struct set *copies; - - nir_ssa_def **def_stack; - nir_ssa_def **def_stack_tail; - - struct deref_node *wildcard; - struct deref_node *indirect; - struct deref_node *children[0]; -}; - -struct lower_variables_state { - nir_shader *shader; - void *dead_ctx; - nir_function_impl *impl; - - /* A hash table mapping variables to deref_node data */ - struct hash_table *deref_var_nodes; - - /* A hash table mapping fully-qualified direct dereferences, i.e. - * dereferences with no indirect or wildcard array dereferences, to - * deref_node data. - * - * At the moment, we only lower loads, stores, and copies that can be - * trivially lowered to loads and stores, i.e. copies with no indirects - * and no wildcards. If a part of a variable that is being loaded from - * and/or stored into is also involved in a copy operation with - * wildcards, then we lower that copy operation to loads and stores, but - * otherwise we leave copies with wildcards alone. Since the only derefs - * used in these loads, stores, and trivial copies are ones with no - * wildcards and no indirects, these are precisely the derefs that we - * can actually consider lowering. - */ - struct exec_list direct_deref_nodes; - - /* Controls whether get_deref_node will add variables to the - * direct_deref_nodes table. This is turned on when we are initially - * scanning for load/store instructions. It is then turned off so we - * don't accidentally change the direct_deref_nodes table while we're - * iterating throug it. - */ - bool add_to_direct_deref_nodes; - - /* A hash table mapping phi nodes to deref_state data */ - struct hash_table *phi_table; -}; - -static struct deref_node * -deref_node_create(struct deref_node *parent, - const struct glsl_type *type, nir_shader *shader) -{ - size_t size = sizeof(struct deref_node) + - glsl_get_length(type) * sizeof(struct deref_node *); - - struct deref_node *node = rzalloc_size(shader, size); - node->type = type; - node->parent = parent; - node->deref = NULL; - exec_node_init(&node->direct_derefs_link); - - return node; -} - -/* Returns the deref node associated with the given variable. This will be - * the root of the tree representing all of the derefs of the given variable. - */ -static struct deref_node * -get_deref_node_for_var(nir_variable *var, struct lower_variables_state *state) -{ - struct deref_node *node; - - struct hash_entry *var_entry = - _mesa_hash_table_search(state->deref_var_nodes, var); - - if (var_entry) { - return var_entry->data; - } else { - node = deref_node_create(NULL, var->type, state->dead_ctx); - _mesa_hash_table_insert(state->deref_var_nodes, var, node); - return node; - } -} - -/* Gets the deref_node for the given deref chain and creates it if it - * doesn't yet exist. If the deref is fully-qualified and direct and - * state->add_to_direct_deref_nodes is true, it will be added to the hash - * table of of fully-qualified direct derefs. - */ -static struct deref_node * -get_deref_node(nir_deref_var *deref, struct lower_variables_state *state) -{ - bool is_direct = true; - - /* Start at the base of the chain. */ - struct deref_node *node = get_deref_node_for_var(deref->var, state); - assert(deref->deref.type == node->type); - - for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { - switch (tail->deref_type) { - case nir_deref_type_struct: { - nir_deref_struct *deref_struct = nir_deref_as_struct(tail); - - assert(deref_struct->index < glsl_get_length(node->type)); - - if (node->children[deref_struct->index] == NULL) - node->children[deref_struct->index] = - deref_node_create(node, tail->type, state->dead_ctx); - - node = node->children[deref_struct->index]; - break; - } - - case nir_deref_type_array: { - nir_deref_array *arr = nir_deref_as_array(tail); - - switch (arr->deref_array_type) { - case nir_deref_array_type_direct: - /* This is possible if a loop unrolls and generates an - * out-of-bounds offset. We need to handle this at least - * somewhat gracefully. - */ - if (arr->base_offset >= glsl_get_length(node->type)) - return NULL; - - if (node->children[arr->base_offset] == NULL) - node->children[arr->base_offset] = - deref_node_create(node, tail->type, state->dead_ctx); - - node = node->children[arr->base_offset]; - break; - - case nir_deref_array_type_indirect: - if (node->indirect == NULL) - node->indirect = deref_node_create(node, tail->type, - state->dead_ctx); - - node = node->indirect; - is_direct = false; - break; - - case nir_deref_array_type_wildcard: - if (node->wildcard == NULL) - node->wildcard = deref_node_create(node, tail->type, - state->dead_ctx); - - node = node->wildcard; - is_direct = false; - break; - - default: - unreachable("Invalid array deref type"); - } - break; - } - default: - unreachable("Invalid deref type"); - } - } - - assert(node); - - /* Only insert if it isn't already in the list. */ - if (is_direct && state->add_to_direct_deref_nodes && - node->direct_derefs_link.next == NULL) { - node->deref = deref; - assert(deref->var != NULL); - exec_list_push_tail(&state->direct_deref_nodes, - &node->direct_derefs_link); - } - - return node; -} - -/* \sa foreach_deref_node_match */ -static bool -foreach_deref_node_worker(struct deref_node *node, nir_deref *deref, - bool (* cb)(struct deref_node *node, - struct lower_variables_state *state), - struct lower_variables_state *state) -{ - if (deref->child == NULL) { - return cb(node, state); - } else { - switch (deref->child->deref_type) { - case nir_deref_type_array: { - nir_deref_array *arr = nir_deref_as_array(deref->child); - assert(arr->deref_array_type == nir_deref_array_type_direct); - if (node->children[arr->base_offset] && - !foreach_deref_node_worker(node->children[arr->base_offset], - deref->child, cb, state)) - return false; - - if (node->wildcard && - !foreach_deref_node_worker(node->wildcard, - deref->child, cb, state)) - return false; - - return true; - } - - case nir_deref_type_struct: { - nir_deref_struct *str = nir_deref_as_struct(deref->child); - return foreach_deref_node_worker(node->children[str->index], - deref->child, cb, state); - } - - default: - unreachable("Invalid deref child type"); - } - } -} - -/* Walks over every "matching" deref_node and calls the callback. A node - * is considered to "match" if either refers to that deref or matches up t - * a wildcard. In other words, the following would match a[6].foo[3].bar: - * - * a[6].foo[3].bar - * a[*].foo[3].bar - * a[6].foo[*].bar - * a[*].foo[*].bar - * - * The given deref must be a full-length and fully qualified (no wildcards - * or indirects) deref chain. - */ -static bool -foreach_deref_node_match(nir_deref_var *deref, - bool (* cb)(struct deref_node *node, - struct lower_variables_state *state), - struct lower_variables_state *state) -{ - nir_deref_var var_deref = *deref; - var_deref.deref.child = NULL; - struct deref_node *node = get_deref_node(&var_deref, state); - - if (node == NULL) - return false; - - return foreach_deref_node_worker(node, &deref->deref, cb, state); -} - -/* \sa deref_may_be_aliased */ -static bool -deref_may_be_aliased_node(struct deref_node *node, nir_deref *deref, - struct lower_variables_state *state) -{ - if (deref->child == NULL) { - return false; - } else { - switch (deref->child->deref_type) { - case nir_deref_type_array: { - nir_deref_array *arr = nir_deref_as_array(deref->child); - if (arr->deref_array_type == nir_deref_array_type_indirect) - return true; - - /* If there is an indirect at this level, we're aliased. */ - if (node->indirect) - return true; - - assert(arr->deref_array_type == nir_deref_array_type_direct); - - if (node->children[arr->base_offset] && - deref_may_be_aliased_node(node->children[arr->base_offset], - deref->child, state)) - return true; - - if (node->wildcard && - deref_may_be_aliased_node(node->wildcard, deref->child, state)) - return true; - - return false; - } - - case nir_deref_type_struct: { - nir_deref_struct *str = nir_deref_as_struct(deref->child); - if (node->children[str->index]) { - return deref_may_be_aliased_node(node->children[str->index], - deref->child, state); - } else { - return false; - } - } - - default: - unreachable("Invalid nir_deref child type"); - } - } -} - -/* Returns true if there are no indirects that can ever touch this deref. - * - * For example, if the given deref is a[6].foo, then any uses of a[i].foo - * would cause this to return false, but a[i].bar would not affect it - * because it's a different structure member. A var_copy involving of - * a[*].bar also doesn't affect it because that can be lowered to entirely - * direct load/stores. - * - * We only support asking this question about fully-qualified derefs. - * Obviously, it's pointless to ask this about indirects, but we also - * rule-out wildcards. Handling Wildcard dereferences would involve - * checking each array index to make sure that there aren't any indirect - * references. - */ -static bool -deref_may_be_aliased(nir_deref_var *deref, - struct lower_variables_state *state) -{ - return deref_may_be_aliased_node(get_deref_node_for_var(deref->var, state), - &deref->deref, state); -} - -static void -register_load_instr(nir_intrinsic_instr *load_instr, - struct lower_variables_state *state) -{ - struct deref_node *node = get_deref_node(load_instr->variables[0], state); - if (node == NULL) - return; - - if (node->loads == NULL) - node->loads = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - _mesa_set_add(node->loads, load_instr); -} - -static void -register_store_instr(nir_intrinsic_instr *store_instr, - struct lower_variables_state *state) -{ - struct deref_node *node = get_deref_node(store_instr->variables[0], state); - if (node == NULL) - return; - - if (node->stores == NULL) - node->stores = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - _mesa_set_add(node->stores, store_instr); -} - -static void -register_copy_instr(nir_intrinsic_instr *copy_instr, - struct lower_variables_state *state) -{ - for (unsigned idx = 0; idx < 2; idx++) { - struct deref_node *node = - get_deref_node(copy_instr->variables[idx], state); - - if (node == NULL) - continue; - - if (node->copies == NULL) - node->copies = _mesa_set_create(state->dead_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - _mesa_set_add(node->copies, copy_instr); - } -} - -/* Registers all variable uses in the given block. */ -static bool -register_variable_uses_block(nir_block *block, void *void_state) -{ - struct lower_variables_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: - register_load_instr(intrin, state); - break; - - case nir_intrinsic_store_var: - register_store_instr(intrin, state); - break; - - case nir_intrinsic_copy_var: - register_copy_instr(intrin, state); - break; - - default: - continue; - } - } - - return true; -} - -/* Walks over all of the copy instructions to or from the given deref_node - * and lowers them to load/store intrinsics. - */ -static bool -lower_copies_to_load_store(struct deref_node *node, - struct lower_variables_state *state) -{ - if (!node->copies) - return true; - - struct set_entry *copy_entry; - set_foreach(node->copies, copy_entry) { - nir_intrinsic_instr *copy = (void *)copy_entry->key; - - nir_lower_var_copy_instr(copy, state->shader); - - for (unsigned i = 0; i < 2; ++i) { - struct deref_node *arg_node = - get_deref_node(copy->variables[i], state); - - /* Only bother removing copy entries for other nodes */ - if (arg_node == NULL || arg_node == node) - continue; - - struct set_entry *arg_entry = _mesa_set_search(arg_node->copies, copy); - assert(arg_entry); - _mesa_set_remove(node->copies, arg_entry); - } - - nir_instr_remove(©->instr); - } - - node->copies = NULL; - - return true; -} - -/** Pushes an SSA def onto the def stack for the given node - * - * Each node is potentially associated with a stack of SSA definitions. - * This stack is used for determining what SSA definition reaches a given - * point in the program for variable renaming. The stack is always kept in - * dominance-order with at most one SSA def per block. If the SSA - * definition on the top of the stack is in the same block as the one being - * pushed, the top element is replaced. - */ -static void -def_stack_push(struct deref_node *node, nir_ssa_def *def, - struct lower_variables_state *state) -{ - if (node->def_stack == NULL) { - node->def_stack = ralloc_array(state->dead_ctx, nir_ssa_def *, - state->impl->num_blocks); - node->def_stack_tail = node->def_stack - 1; - } - - if (node->def_stack_tail >= node->def_stack) { - nir_ssa_def *top_def = *node->def_stack_tail; - - if (def->parent_instr->block == top_def->parent_instr->block) { - /* They're in the same block, just replace the top */ - *node->def_stack_tail = def; - return; - } - } - - *(++node->def_stack_tail) = def; -} - -/* Pop the top of the def stack if it's in the given block */ -static void -def_stack_pop_if_in_block(struct deref_node *node, nir_block *block) -{ - /* If we're popping, then we have presumably pushed at some time in the - * past so this should exist. - */ - assert(node->def_stack != NULL); - - /* The stack is already empty. Do nothing. */ - if (node->def_stack_tail < node->def_stack) - return; - - nir_ssa_def *def = *node->def_stack_tail; - if (def->parent_instr->block == block) - node->def_stack_tail--; -} - -/** Retrieves the SSA definition on the top of the stack for the given - * node, if one exists. If the stack is empty, then we return the constant - * initializer (if it exists) or an SSA undef. - */ -static nir_ssa_def * -get_ssa_def_for_block(struct deref_node *node, nir_block *block, - struct lower_variables_state *state) -{ - /* If we have something on the stack, go ahead and return it. We're - * assuming that the top of the stack dominates the given block. - */ - if (node->def_stack && node->def_stack_tail >= node->def_stack) - return *node->def_stack_tail; - - /* If we got here then we don't have a definition that dominates the - * given block. This means that we need to add an undef and use that. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - glsl_get_vector_elements(node->type)); - nir_instr_insert_before_cf_list(&state->impl->body, &undef->instr); - def_stack_push(node, &undef->def, state); - return &undef->def; -} - -/* Given a block and one of its predecessors, this function fills in the - * souces of the phi nodes to take SSA defs from the given predecessor. - * This function must be called exactly once per block/predecessor pair. - */ -static void -add_phi_sources(nir_block *block, nir_block *pred, - struct lower_variables_state *state) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - if (!entry) - continue; - - struct deref_node *node = entry->data; - - nir_phi_src *src = ralloc(phi, nir_phi_src); - src->pred = pred; - src->src.parent_instr = &phi->instr; - src->src.is_ssa = true; - src->src.ssa = get_ssa_def_for_block(node, pred, state); - - list_addtail(&src->src.use_link, &src->src.ssa->uses); - - exec_list_push_tail(&phi->srcs, &src->node); - } -} - -/* Performs variable renaming by doing a DFS of the dominance tree - * - * This algorithm is very similar to the one outlined in "Efficiently - * Computing Static Single Assignment Form and the Control Dependence - * Graph" by Cytron et. al. The primary difference is that we only put one - * SSA def on the stack per block. - */ -static bool -rename_variables_block(nir_block *block, struct lower_variables_state *state) -{ - nir_builder b; - nir_builder_init(&b, state->impl); - - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - - /* This can happen if we already have phi nodes in the program - * that were not created in this pass. - */ - if (!entry) - continue; - - struct deref_node *node = entry->data; - - def_stack_push(node, &phi->dest.ssa, state); - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* If we hit this path then we are referencing an invalid - * value. Most likely, we unrolled something and are - * reading past the end of some array. In any case, this - * should result in an undefined value. - */ - nir_ssa_undef_instr *undef = - nir_ssa_undef_instr_create(state->shader, - intrin->num_components); - - nir_instr_insert_before(&intrin->instr, &undef->instr); - nir_instr_remove(&intrin->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&undef->def)); - continue; - } - - if (!node->lower_to_ssa) - continue; - - nir_alu_instr *mov = nir_alu_instr_create(state->shader, - nir_op_imov); - mov->src[0].src.is_ssa = true; - mov->src[0].src.ssa = get_ssa_def_for_block(node, block, state); - for (unsigned i = intrin->num_components; i < 4; i++) - mov->src[0].swizzle[i] = 0; - - assert(intrin->dest.is_ssa); - - mov->dest.write_mask = (1 << intrin->num_components) - 1; - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, - intrin->num_components, NULL); - - nir_instr_insert_before(&intrin->instr, &mov->instr); - nir_instr_remove(&intrin->instr); - - nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&mov->dest.dest.ssa)); - break; - } - - case nir_intrinsic_store_var: { - struct deref_node *node = - get_deref_node(intrin->variables[0], state); - - if (node == NULL) { - /* Probably an out-of-bounds array store. That should be a - * no-op. */ - nir_instr_remove(&intrin->instr); - continue; - } - - if (!node->lower_to_ssa) - continue; - - assert(intrin->num_components == - glsl_get_vector_elements(node->type)); - - assert(intrin->src[0].is_ssa); - - nir_ssa_def *new_def; - b.cursor = nir_before_instr(&intrin->instr); - - if (intrin->const_index[0] == (1 << intrin->num_components) - 1) { - /* Whole variable store - just copy the source. Note that - * intrin->num_components and intrin->src[0].ssa->num_components - * may differ. - */ - unsigned swiz[4]; - for (unsigned i = 0; i < 4; i++) - swiz[i] = i < intrin->num_components ? i : 0; - - new_def = nir_swizzle(&b, intrin->src[0].ssa, swiz, - intrin->num_components, false); - } else { - nir_ssa_def *old_def = get_ssa_def_for_block(node, block, state); - /* For writemasked store_var intrinsics, we combine the newly - * written values with the existing contents of unwritten - * channels, creating a new SSA value for the whole vector. - */ - nir_ssa_def *srcs[4]; - for (unsigned i = 0; i < intrin->num_components; i++) { - if (intrin->const_index[0] & (1 << i)) { - srcs[i] = nir_channel(&b, intrin->src[0].ssa, i); - } else { - srcs[i] = nir_channel(&b, old_def, i); - } - } - new_def = nir_vec(&b, srcs, intrin->num_components); - } - - assert(new_def->num_components == intrin->num_components); - - def_stack_push(node, new_def, state); - - /* We'll wait to remove the instruction until the next pass - * where we pop the node we just pushed back off the stack. - */ - break; - } - - default: - break; - } - } - } - - if (block->successors[0]) - add_phi_sources(block->successors[0], block, state); - if (block->successors[1]) - add_phi_sources(block->successors[1], block, state); - - for (unsigned i = 0; i < block->num_dom_children; ++i) - rename_variables_block(block->dom_children[i], state); - - /* Now we iterate over the instructions and pop off any SSA defs that we - * pushed in the first loop. - */ - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(instr); - - struct hash_entry *entry = - _mesa_hash_table_search(state->phi_table, phi); - - /* This can happen if we already have phi nodes in the program - * that were not created in this pass. - */ - if (!entry) - continue; - - struct deref_node *node = entry->data; - - def_stack_pop_if_in_block(node, block); - } else if (instr->type == nir_instr_type_intrinsic) { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - if (intrin->intrinsic != nir_intrinsic_store_var) - continue; - - struct deref_node *node = get_deref_node(intrin->variables[0], state); - if (!node) - continue; - - if (!node->lower_to_ssa) - continue; - - def_stack_pop_if_in_block(node, block); - nir_instr_remove(&intrin->instr); - } - } - - return true; -} - -/* Inserts phi nodes for all variables marked lower_to_ssa - * - * This is the same algorithm as presented in "Efficiently Computing Static - * Single Assignment Form and the Control Dependence Graph" by Cytron et. - * al. - */ -static void -insert_phi_nodes(struct lower_variables_state *state) -{ - NIR_VLA_ZERO(unsigned, work, state->impl->num_blocks); - NIR_VLA_ZERO(unsigned, has_already, state->impl->num_blocks); - - /* - * Since the work flags already prevent us from inserting a node that has - * ever been inserted into W, we don't need to use a set to represent W. - * Also, since no block can ever be inserted into W more than once, we know - * that the maximum size of W is the number of basic blocks in the - * function. So all we need to handle W is an array and a pointer to the - * next element to be inserted and the next element to be removed. - */ - NIR_VLA(nir_block *, W, state->impl->num_blocks); - - unsigned w_start, w_end; - unsigned iter_count = 0; - - foreach_list_typed(struct deref_node, node, direct_derefs_link, - &state->direct_deref_nodes) { - if (node->stores == NULL) - continue; - - if (!node->lower_to_ssa) - continue; - - w_start = w_end = 0; - iter_count++; - - struct set_entry *store_entry; - set_foreach(node->stores, store_entry) { - nir_intrinsic_instr *store = (nir_intrinsic_instr *)store_entry->key; - if (work[store->instr.block->index] < iter_count) - W[w_end++] = store->instr.block; - work[store->instr.block->index] = iter_count; - } - - while (w_start != w_end) { - nir_block *cur = W[w_start++]; - struct set_entry *dom_entry; - set_foreach(cur->dom_frontier, dom_entry) { - nir_block *next = (nir_block *) dom_entry->key; - - /* - * If there's more than one return statement, then the end block - * can be a join point for some definitions. However, there are - * no instructions in the end block, so nothing would use those - * phi nodes. Of course, we couldn't place those phi nodes - * anyways due to the restriction of having no instructions in the - * end block... - */ - if (next == state->impl->end_block) - continue; - - if (has_already[next->index] < iter_count) { - nir_phi_instr *phi = nir_phi_instr_create(state->shader); - nir_ssa_dest_init(&phi->instr, &phi->dest, - glsl_get_vector_elements(node->type), NULL); - nir_instr_insert_before_block(next, &phi->instr); - - _mesa_hash_table_insert(state->phi_table, phi, node); - - has_already[next->index] = iter_count; - if (work[next->index] < iter_count) { - work[next->index] = iter_count; - W[w_end++] = next; - } - } - } - } - } -} - - -/** Implements a pass to lower variable uses to SSA values - * - * This path walks the list of instructions and tries to lower as many - * local variable load/store operations to SSA defs and uses as it can. - * The process involves four passes: - * - * 1) Iterate over all of the instructions and mark where each local - * variable deref is used in a load, store, or copy. While we're at - * it, we keep track of all of the fully-qualified (no wildcards) and - * fully-direct references we see and store them in the - * direct_deref_nodes hash table. - * - * 2) Walk over the the list of fully-qualified direct derefs generated in - * the previous pass. For each deref, we determine if it can ever be - * aliased, i.e. if there is an indirect reference anywhere that may - * refer to it. If it cannot be aliased, we mark it for lowering to an - * SSA value. At this point, we lower any var_copy instructions that - * use the given deref to load/store operations and, if the deref has a - * constant initializer, we go ahead and add a load_const value at the - * beginning of the function with the initialized value. - * - * 3) Walk over the list of derefs we plan to lower to SSA values and - * insert phi nodes as needed. - * - * 4) Perform "variable renaming" by replacing the load/store instructions - * with SSA definitions and SSA uses. - */ -static bool -nir_lower_vars_to_ssa_impl(nir_function_impl *impl) -{ - struct lower_variables_state state; - - state.shader = impl->function->shader; - state.dead_ctx = ralloc_context(state.shader); - state.impl = impl; - - state.deref_var_nodes = _mesa_hash_table_create(state.dead_ctx, - _mesa_hash_pointer, - _mesa_key_pointer_equal); - exec_list_make_empty(&state.direct_deref_nodes); - state.phi_table = _mesa_hash_table_create(state.dead_ctx, - _mesa_hash_pointer, - _mesa_key_pointer_equal); - - /* Build the initial deref structures and direct_deref_nodes table */ - state.add_to_direct_deref_nodes = true; - nir_foreach_block(impl, register_variable_uses_block, &state); - - bool progress = false; - - nir_metadata_require(impl, nir_metadata_block_index); - - /* We're about to iterate through direct_deref_nodes. Don't modify it. */ - state.add_to_direct_deref_nodes = false; - - foreach_list_typed_safe(struct deref_node, node, direct_derefs_link, - &state.direct_deref_nodes) { - nir_deref_var *deref = node->deref; - - if (deref->var->data.mode != nir_var_local) { - exec_node_remove(&node->direct_derefs_link); - continue; - } - - if (deref_may_be_aliased(deref, &state)) { - exec_node_remove(&node->direct_derefs_link); - continue; - } - - node->lower_to_ssa = true; - progress = true; - - if (deref->var->constant_initializer) { - nir_load_const_instr *load = - nir_deref_get_const_initializer_load(state.shader, deref); - nir_ssa_def_init(&load->instr, &load->def, - glsl_get_vector_elements(node->type), NULL); - nir_instr_insert_before_cf_list(&impl->body, &load->instr); - def_stack_push(node, &load->def, &state); - } - - foreach_deref_node_match(deref, lower_copies_to_load_store, &state); - } - - if (!progress) - return false; - - nir_metadata_require(impl, nir_metadata_dominance); - - /* We may have lowered some copy instructions to load/store - * instructions. The uses from the copy instructions hav already been - * removed but we need to rescan to ensure that the uses from the newly - * added load/store instructions are registered. We need this - * information for phi node insertion below. - */ - nir_foreach_block(impl, register_variable_uses_block, &state); - - insert_phi_nodes(&state); - rename_variables_block(nir_start_block(impl), &state); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - ralloc_free(state.dead_ctx); - - return progress; -} - -void -nir_lower_vars_to_ssa(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_lower_vars_to_ssa_impl(function->impl); - } -} diff --git a/src/glsl/nir/nir_lower_vec_to_movs.c b/src/glsl/nir/nir_lower_vec_to_movs.c deleted file mode 100644 index 06d627900c6..00000000000 --- a/src/glsl/nir/nir_lower_vec_to_movs.c +++ /dev/null @@ -1,310 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" - -/* - * Implements a simple pass that lowers vecN instructions to a series of - * moves with partial writes. - */ - -struct vec_to_movs_state { - nir_function_impl *impl; - bool progress; -}; - -static bool -src_matches_dest_reg(nir_dest *dest, nir_src *src) -{ - if (dest->is_ssa || src->is_ssa) - return false; - - return (dest->reg.reg == src->reg.reg && - dest->reg.base_offset == src->reg.base_offset && - !dest->reg.indirect && - !src->reg.indirect); -} - -/** - * For a given starting writemask channel and corresponding source index in - * the vec instruction, insert a MOV to the vec instruction's dest of all the - * writemask channels that get read from the same src reg. - * - * Returns the writemask of our MOV, so the parent loop calling this knows - * which ones have been processed. - */ -static unsigned -insert_mov(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) -{ - assert(start_idx < nir_op_infos[vec->op].num_inputs); - - nir_alu_instr *mov = nir_alu_instr_create(shader, nir_op_imov); - nir_alu_src_copy(&mov->src[0], &vec->src[start_idx], mov); - nir_alu_dest_copy(&mov->dest, &vec->dest, mov); - - mov->dest.write_mask = (1u << start_idx); - mov->src[0].swizzle[start_idx] = vec->src[start_idx].swizzle[0]; - mov->src[0].negate = vec->src[start_idx].negate; - mov->src[0].abs = vec->src[start_idx].abs; - - for (unsigned i = start_idx + 1; i < 4; i++) { - if (!(vec->dest.write_mask & (1 << i))) - continue; - - if (nir_srcs_equal(vec->src[i].src, vec->src[start_idx].src) && - vec->src[i].negate == vec->src[start_idx].negate && - vec->src[i].abs == vec->src[start_idx].abs) { - mov->dest.write_mask |= (1 << i); - mov->src[0].swizzle[i] = vec->src[i].swizzle[0]; - } - } - - /* In some situations (if the vecN is involved in a phi-web), we can end - * up with a mov from a register to itself. Some of those channels may end - * up doing nothing and there's no reason to have them as part of the mov. - */ - if (src_matches_dest_reg(&mov->dest.dest, &mov->src[0].src) && - !mov->src[0].abs && !mov->src[0].negate) { - for (unsigned i = 0; i < 4; i++) { - if (mov->src[0].swizzle[i] == i) { - mov->dest.write_mask &= ~(1 << i); - } - } - } - - /* Only emit the instruction if it actually does something */ - if (mov->dest.write_mask) { - nir_instr_insert_before(&vec->instr, &mov->instr); - } else { - ralloc_free(mov); - } - - return mov->dest.write_mask; -} - -static bool -has_replicated_dest(nir_alu_instr *alu) -{ - return alu->op == nir_op_fdot_replicated2 || - alu->op == nir_op_fdot_replicated3 || - alu->op == nir_op_fdot_replicated4 || - alu->op == nir_op_fdph_replicated; -} - -/* Attempts to coalesce the "move" from the given source of the vec to the - * destination of the instruction generating the value. If, for whatever - * reason, we cannot coalesce the mmove, it does nothing and returns 0. We - * can then call insert_mov as normal. - */ -static unsigned -try_coalesce(nir_alu_instr *vec, unsigned start_idx, nir_shader *shader) -{ - assert(start_idx < nir_op_infos[vec->op].num_inputs); - - /* We will only even try if the source is SSA */ - if (!vec->src[start_idx].src.is_ssa) - return 0; - - assert(vec->src[start_idx].src.ssa); - - /* If we are going to do a reswizzle, then the vecN operation must be the - * only use of the source value. We also can't have any source modifiers. - */ - nir_foreach_use(vec->src[start_idx].src.ssa, src) { - if (src->parent_instr != &vec->instr) - return 0; - - nir_alu_src *alu_src = exec_node_data(nir_alu_src, src, src); - if (alu_src->abs || alu_src->negate) - return 0; - } - - if (!list_empty(&vec->src[start_idx].src.ssa->if_uses)) - return 0; - - if (vec->src[start_idx].src.ssa->parent_instr->type != nir_instr_type_alu) - return 0; - - nir_alu_instr *src_alu = - nir_instr_as_alu(vec->src[start_idx].src.ssa->parent_instr); - - if (has_replicated_dest(src_alu)) { - /* The fdot instruction is special: It replicates its result to all - * components. This means that we can always rewrite its destination - * and we don't need to swizzle anything. - */ - } else { - /* We only care about being able to re-swizzle the instruction if it is - * something that we can reswizzle. It must be per-component. The one - * exception to this is the fdotN instructions which implicitly splat - * their result out to all channels. - */ - if (nir_op_infos[src_alu->op].output_size != 0) - return 0; - - /* If we are going to reswizzle the instruction, we can't have any - * non-per-component sources either. - */ - for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++) - if (nir_op_infos[src_alu->op].input_sizes[j] != 0) - return 0; - } - - /* Stash off all of the ALU instruction's swizzles. */ - uint8_t swizzles[4][4]; - for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++) - for (unsigned i = 0; i < 4; i++) - swizzles[j][i] = src_alu->src[j].swizzle[i]; - - unsigned write_mask = 0; - for (unsigned i = start_idx; i < 4; i++) { - if (!(vec->dest.write_mask & (1 << i))) - continue; - - if (!vec->src[i].src.is_ssa || - vec->src[i].src.ssa != &src_alu->dest.dest.ssa) - continue; - - /* At this point, the give vec source matchese up with the ALU - * instruction so we can re-swizzle that component to match. - */ - write_mask |= 1 << i; - if (has_replicated_dest(src_alu)) { - /* Since the destination is a single replicated value, we don't need - * to do any reswizzling - */ - } else { - for (unsigned j = 0; j < nir_op_infos[src_alu->op].num_inputs; j++) - src_alu->src[j].swizzle[i] = swizzles[j][vec->src[i].swizzle[0]]; - } - - /* Clear the no longer needed vec source */ - nir_instr_rewrite_src(&vec->instr, &vec->src[i].src, NIR_SRC_INIT); - } - - nir_instr_rewrite_dest(&src_alu->instr, &src_alu->dest.dest, vec->dest.dest); - src_alu->dest.write_mask = write_mask; - - return write_mask; -} - -static bool -lower_vec_to_movs_block(nir_block *block, void *void_state) -{ - struct vec_to_movs_state *state = void_state; - nir_function_impl *impl = state->impl; - nir_shader *shader = impl->function->shader; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_alu) - continue; - - nir_alu_instr *vec = nir_instr_as_alu(instr); - - switch (vec->op) { - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - break; - default: - continue; /* The loop */ - } - - if (vec->dest.dest.is_ssa) { - /* Since we insert multiple MOVs, we have a register destination. */ - nir_register *reg = nir_local_reg_create(impl); - reg->num_components = vec->dest.dest.ssa.num_components; - - nir_ssa_def_rewrite_uses(&vec->dest.dest.ssa, nir_src_for_reg(reg)); - - nir_instr_rewrite_dest(&vec->instr, &vec->dest.dest, - nir_dest_for_reg(reg)); - } - - unsigned finished_write_mask = 0; - - /* First, emit a MOV for all the src channels that are in the - * destination reg, in case other values we're populating in the dest - * might overwrite them. - */ - for (unsigned i = 0; i < 4; i++) { - if (!(vec->dest.write_mask & (1 << i))) - continue; - - if (src_matches_dest_reg(&vec->dest.dest, &vec->src[i].src)) { - finished_write_mask |= insert_mov(vec, i, shader); - break; - } - } - - /* Now, emit MOVs for all the other src channels. */ - for (unsigned i = 0; i < 4; i++) { - if (!(vec->dest.write_mask & (1 << i))) - continue; - - if (!(finished_write_mask & (1 << i))) - finished_write_mask |= try_coalesce(vec, i, shader); - - if (!(finished_write_mask & (1 << i))) - finished_write_mask |= insert_mov(vec, i, shader); - } - - nir_instr_remove(&vec->instr); - ralloc_free(vec); - state->progress = true; - } - - return true; -} - -static bool -nir_lower_vec_to_movs_impl(nir_function_impl *impl) -{ - struct vec_to_movs_state state = { impl, false }; - - nir_foreach_block(impl, lower_vec_to_movs_block, &state); - - if (state.progress) { - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - } - - return state.progress; -} - -bool -nir_lower_vec_to_movs(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) - progress = nir_lower_vec_to_movs_impl(function->impl) || progress; - } - - return progress; -} diff --git a/src/glsl/nir/nir_metadata.c b/src/glsl/nir/nir_metadata.c deleted file mode 100644 index 61aae73221e..00000000000 --- a/src/glsl/nir/nir_metadata.c +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - */ - -#include "nir.h" - -/* - * Handles management of the metadata. - */ - -void -nir_metadata_require(nir_function_impl *impl, nir_metadata required) -{ -#define NEEDS_UPDATE(X) ((required & ~impl->valid_metadata) & (X)) - - if (NEEDS_UPDATE(nir_metadata_block_index)) - nir_index_blocks(impl); - if (NEEDS_UPDATE(nir_metadata_dominance)) - nir_calc_dominance_impl(impl); - if (NEEDS_UPDATE(nir_metadata_live_ssa_defs)) - nir_live_ssa_defs_impl(impl); - -#undef NEEDS_UPDATE - - impl->valid_metadata |= required; -} - -void -nir_metadata_preserve(nir_function_impl *impl, nir_metadata preserved) -{ - impl->valid_metadata &= preserved; -} - -#ifdef DEBUG -/** - * Make sure passes properly invalidate metadata (part 1). - * - * Call this before running a pass to set a bogus metadata flag, which will - * only be preserved if the pass forgets to call nir_metadata_preserve(). - */ -void -nir_metadata_set_validation_flag(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) { - function->impl->valid_metadata |= nir_metadata_not_properly_reset; - } - } -} - -/** - * Make sure passes properly invalidate metadata (part 2). - * - * Call this after a pass makes progress to verify that the bogus metadata set by - * the earlier function was properly thrown away. Note that passes may not call - * nir_metadata_preserve() if they don't actually make any changes at all. - */ -void -nir_metadata_check_validation_flag(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) { - assert(!(function->impl->valid_metadata & - nir_metadata_not_properly_reset)); - } - } -} -#endif diff --git a/src/glsl/nir/nir_move_vec_src_uses_to_dest.c b/src/glsl/nir/nir_move_vec_src_uses_to_dest.c deleted file mode 100644 index b5186e6e944..00000000000 --- a/src/glsl/nir/nir_move_vec_src_uses_to_dest.c +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" - -/* - * Implements a pass that tries to move uses vecN sources to their - * destinations. This is kind of like an inverse copy-propagation pass. - * For instance, if you have - * - * ssa_1 = vec4(a, b, c, d) - * ssa_2 = fadd(a, b) - * - * This will be turned into - * - * ssa_1 = vec4(a, b, c, d) - * ssa_2 = fadd(ssa_1.x, ssa_1.y) - * - * While this is "worse" because it adds a bunch of unneeded dependencies, it - * actually makes it much easier for vec4-based backends to coalesce the MOV's - * that result from the vec4 operation because it doesn't have to worry about - * quite as many reads. - */ - -/* Returns true if the given SSA def dominates the instruction. An SSA def is - * considered to *not* dominate the instruction that defines it. - */ -static bool -ssa_def_dominates_instr(nir_ssa_def *def, nir_instr *instr) -{ - if (instr->index <= def->parent_instr->index) { - return false; - } else if (def->parent_instr->block == instr->block) { - return def->parent_instr->index < instr->index; - } else { - return nir_block_dominates(def->parent_instr->block, instr->block); - } -} - -static bool -move_vec_src_uses_to_dest_block(nir_block *block, void *shader) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_alu) - continue; - - nir_alu_instr *vec = nir_instr_as_alu(instr); - - switch (vec->op) { - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - break; - default: - continue; /* The loop */ - } - - /* Can't handle non-SSA vec operations */ - if (!vec->dest.dest.is_ssa) - continue; - - /* Can't handle saturation */ - if (vec->dest.saturate) - continue; - - /* First, mark all of the sources we are going to consider for rewriting - * to the destination - */ - int srcs_remaining = 0; - for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) { - /* We can't rewrite a source if it's not in SSA form */ - if (!vec->src[i].src.is_ssa) - continue; - - /* We can't rewrite a source if it has modifiers */ - if (vec->src[i].abs || vec->src[i].negate) - continue; - - srcs_remaining |= 1 << i; - } - - /* We can't actually do anything with this instruction */ - if (srcs_remaining == 0) - continue; - - for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) { - int8_t swizzle[4] = { -1, -1, -1, -1 }; - - for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) { - if (vec->src[j].src.ssa != vec->src[i].src.ssa) - continue; - - /* Mark the given chanle as having been handled */ - srcs_remaining &= ~(1 << j); - - /* Mark the appropreate channel as coming from src j */ - swizzle[vec->src[j].swizzle[0]] = j; - } - - nir_foreach_use_safe(vec->src[i].src.ssa, use) { - if (use->parent_instr == &vec->instr) - continue; - - /* We need to dominate the use if we are going to rewrite it */ - if (!ssa_def_dominates_instr(&vec->dest.dest.ssa, use->parent_instr)) - continue; - - /* For now, we'll just rewrite ALU instructions */ - if (use->parent_instr->type != nir_instr_type_alu) - continue; - - assert(use->is_ssa); - - nir_alu_instr *use_alu = nir_instr_as_alu(use->parent_instr); - - /* Figure out which source we're actually looking at */ - nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src); - unsigned src_idx = use_alu_src - use_alu->src; - assert(src_idx < nir_op_infos[use_alu->op].num_inputs); - - bool can_reswizzle = true; - for (unsigned j = 0; j < 4; j++) { - if (!nir_alu_instr_channel_used(use_alu, src_idx, j)) - continue; - - if (swizzle[use_alu_src->swizzle[j]] == -1) { - can_reswizzle = false; - break; - } - } - - if (!can_reswizzle) - continue; - - /* At this point, we have determined that the given use can be - * reswizzled to actually use the destination of the vecN operation. - * Go ahead and rewrite it as needed. - */ - nir_instr_rewrite_src(use->parent_instr, use, - nir_src_for_ssa(&vec->dest.dest.ssa)); - for (unsigned j = 0; j < 4; j++) { - if (!nir_alu_instr_channel_used(use_alu, src_idx, j)) - continue; - - use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]]; - } - } - } - } - - return true; -} - -static void -nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl) -{ - nir_metadata_require(impl, nir_metadata_dominance); - - nir_index_instrs(impl); - nir_foreach_block(impl, move_vec_src_uses_to_dest_block, shader); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); -} - -void -nir_move_vec_src_uses_to_dest(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_move_vec_src_uses_to_dest_impl(shader, function->impl); - } -} diff --git a/src/glsl/nir/nir_normalize_cubemap_coords.c b/src/glsl/nir/nir_normalize_cubemap_coords.c deleted file mode 100644 index 9c15eb8c15c..00000000000 --- a/src/glsl/nir/nir_normalize_cubemap_coords.c +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand - */ - -#include "nir.h" -#include "nir_builder.h" - -/** - * This file implements a NIR lowering pass to perform the normalization of - * the cubemap coordinates to have the largest magnitude component be -1.0 - * or 1.0. This is based on the old GLSL IR based pass by Eric. - */ - -struct normalize_cubemap_state { - nir_builder b; - bool progress; -}; - -static bool -normalize_cubemap_coords_block(nir_block *block, void *void_state) -{ - struct normalize_cubemap_state *state = void_state; - nir_builder *b = &state->b; - - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_tex) - continue; - - nir_tex_instr *tex = nir_instr_as_tex(instr); - if (tex->sampler_dim != GLSL_SAMPLER_DIM_CUBE) - continue; - - b->cursor = nir_before_instr(&tex->instr); - - for (unsigned i = 0; i < tex->num_srcs; i++) { - if (tex->src[i].src_type != nir_tex_src_coord) - continue; - - nir_ssa_def *orig_coord = - nir_ssa_for_src(b, tex->src[i].src, nir_tex_instr_src_size(tex, i)); - assert(orig_coord->num_components >= 3); - - nir_ssa_def *abs = nir_fabs(b, orig_coord); - nir_ssa_def *norm = nir_fmax(b, nir_channel(b, abs, 0), - nir_fmax(b, nir_channel(b, abs, 1), - nir_channel(b, abs, 2))); - - nir_ssa_def *normalized = nir_fmul(b, orig_coord, nir_frcp(b, norm)); - - /* Array indices don't have to be normalized, so make a new vector - * with the coordinate's array index untouched. - */ - if (tex->coord_components == 4) { - normalized = nir_vec4(b, - nir_channel(b, normalized, 0), - nir_channel(b, normalized, 1), - nir_channel(b, normalized, 2), - nir_channel(b, orig_coord, 3)); - } - - nir_instr_rewrite_src(&tex->instr, - &tex->src[i].src, - nir_src_for_ssa(normalized)); - - state->progress = true; - } - } - - return true; -} - -static bool -normalize_cubemap_coords_impl(nir_function_impl *impl) -{ - struct normalize_cubemap_state state; - nir_builder_init(&state.b, impl); - state.progress = false; - - nir_foreach_block(impl, normalize_cubemap_coords_block, &state); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - return state.progress; -} - -bool -nir_normalize_cubemap_coords(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) - progress = normalize_cubemap_coords_impl(function->impl) || progress; - } - - return progress; -} diff --git a/src/glsl/nir/nir_opcodes.py b/src/glsl/nir/nir_opcodes.py deleted file mode 100644 index e79810c1991..00000000000 --- a/src/glsl/nir/nir_opcodes.py +++ /dev/null @@ -1,668 +0,0 @@ -#! /usr/bin/env python -# -# Copyright (C) 2014 Connor Abbott -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# -# Authors: -# Connor Abbott (cwabbott0@gmail.com) - - -# Class that represents all the information we have about the opcode -# NOTE: this must be kept in sync with nir_op_info - -class Opcode(object): - """Class that represents all the information we have about the opcode - NOTE: this must be kept in sync with nir_op_info - """ - def __init__(self, name, output_size, output_type, input_sizes, - input_types, algebraic_properties, const_expr): - """Parameters: - - - name is the name of the opcode (prepend nir_op_ for the enum name) - - all types are strings that get nir_type_ prepended to them - - input_types is a list of types - - algebraic_properties is a space-seperated string, where nir_op_is_ is - prepended before each entry - - const_expr is an expression or series of statements that computes the - constant value of the opcode given the constant values of its inputs. - - Constant expressions are formed from the variables src0, src1, ..., - src(N-1), where N is the number of arguments. The output of the - expression should be stored in the dst variable. Per-component input - and output variables will be scalars and non-per-component input and - output variables will be a struct with fields named x, y, z, and w - all of the correct type. Input and output variables can be assumed - to already be of the correct type and need no conversion. In - particular, the conversion from the C bool type to/from NIR_TRUE and - NIR_FALSE happens automatically. - - For per-component instructions, the entire expression will be - executed once for each component. For non-per-component - instructions, the expression is expected to store the correct values - in dst.x, dst.y, etc. If "dst" does not exist anywhere in the - constant expression, an assignment to dst will happen automatically - and the result will be equivalent to "dst = " for - per-component instructions and "dst.x = dst.y = ... = " - for non-per-component instructions. - """ - assert isinstance(name, str) - assert isinstance(output_size, int) - assert isinstance(output_type, str) - assert isinstance(input_sizes, list) - assert isinstance(input_sizes[0], int) - assert isinstance(input_types, list) - assert isinstance(input_types[0], str) - assert isinstance(algebraic_properties, str) - assert isinstance(const_expr, str) - assert len(input_sizes) == len(input_types) - assert 0 <= output_size <= 4 - for size in input_sizes: - assert 0 <= size <= 4 - if output_size != 0: - assert size != 0 - self.name = name - self.num_inputs = len(input_sizes) - self.output_size = output_size - self.output_type = output_type - self.input_sizes = input_sizes - self.input_types = input_types - self.algebraic_properties = algebraic_properties - self.const_expr = const_expr - -# helper variables for strings -tfloat = "float" -tint = "int" -tbool = "bool" -tuint = "uint" - -commutative = "commutative " -associative = "associative " - -# global dictionary of opcodes -opcodes = {} - -def opcode(name, output_size, output_type, input_sizes, input_types, - algebraic_properties, const_expr): - assert name not in opcodes - opcodes[name] = Opcode(name, output_size, output_type, input_sizes, - input_types, algebraic_properties, const_expr) - -def unop_convert(name, in_type, out_type, const_expr): - opcode(name, 0, out_type, [0], [in_type], "", const_expr) - -def unop(name, ty, const_expr): - opcode(name, 0, ty, [0], [ty], "", const_expr) - -def unop_horiz(name, output_size, output_type, input_size, input_type, - const_expr): - opcode(name, output_size, output_type, [input_size], [input_type], "", - const_expr) - -def unop_reduce(name, output_size, output_type, input_type, prereduce_expr, - reduce_expr, final_expr): - def prereduce(src): - return "(" + prereduce_expr.format(src=src) + ")" - def final(src): - return final_expr.format(src="(" + src + ")") - def reduce_(src0, src1): - return reduce_expr.format(src0=src0, src1=src1) - src0 = prereduce("src0.x") - src1 = prereduce("src0.y") - src2 = prereduce("src0.z") - src3 = prereduce("src0.w") - unop_horiz(name + "2", output_size, output_type, 2, input_type, - final(reduce_(src0, src1))) - unop_horiz(name + "3", output_size, output_type, 3, input_type, - final(reduce_(reduce_(src0, src1), src2))) - unop_horiz(name + "4", output_size, output_type, 4, input_type, - final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) - - -# These two move instructions differ in what modifiers they support and what -# the negate modifier means. Otherwise, they are identical. -unop("fmov", tfloat, "src0") -unop("imov", tint, "src0") - -unop("ineg", tint, "-src0") -unop("fneg", tfloat, "-src0") -unop("inot", tint, "~src0") # invert every bit of the integer -unop("fnot", tfloat, "(src0 == 0.0f) ? 1.0f : 0.0f") -unop("fsign", tfloat, "(src0 == 0.0f) ? 0.0f : ((src0 > 0.0f) ? 1.0f : -1.0f)") -unop("isign", tint, "(src0 == 0) ? 0 : ((src0 > 0) ? 1 : -1)") -unop("iabs", tint, "(src0 < 0) ? -src0 : src0") -unop("fabs", tfloat, "fabsf(src0)") -unop("fsat", tfloat, "(src0 > 1.0f) ? 1.0f : ((src0 <= 0.0f) ? 0.0f : src0)") -unop("frcp", tfloat, "1.0f / src0") -unop("frsq", tfloat, "1.0f / sqrtf(src0)") -unop("fsqrt", tfloat, "sqrtf(src0)") -unop("fexp2", tfloat, "exp2f(src0)") -unop("flog2", tfloat, "log2f(src0)") -unop_convert("f2i", tfloat, tint, "src0") # Float-to-integer conversion. -unop_convert("f2u", tfloat, tuint, "src0") # Float-to-unsigned conversion -unop_convert("i2f", tint, tfloat, "src0") # Integer-to-float conversion. -# Float-to-boolean conversion -unop_convert("f2b", tfloat, tbool, "src0 != 0.0f") -# Boolean-to-float conversion -unop_convert("b2f", tbool, tfloat, "src0 ? 1.0f : 0.0f") -# Int-to-boolean conversion -unop_convert("i2b", tint, tbool, "src0 != 0") -unop_convert("b2i", tbool, tint, "src0 ? 1 : 0") # Boolean-to-int conversion -unop_convert("u2f", tuint, tfloat, "src0") # Unsigned-to-float conversion. - -# Unary floating-point rounding operations. - - -unop("ftrunc", tfloat, "truncf(src0)") -unop("fceil", tfloat, "ceilf(src0)") -unop("ffloor", tfloat, "floorf(src0)") -unop("ffract", tfloat, "src0 - floorf(src0)") -unop("fround_even", tfloat, "_mesa_roundevenf(src0)") - - -# Trigonometric operations. - - -unop("fsin", tfloat, "sinf(src0)") -unop("fcos", tfloat, "cosf(src0)") - - -# Partial derivatives. - - -unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0. -unop("fddy", tfloat, "0.0f") -unop("fddx_fine", tfloat, "0.0f") -unop("fddy_fine", tfloat, "0.0f") -unop("fddx_coarse", tfloat, "0.0f") -unop("fddy_coarse", tfloat, "0.0f") - - -# Floating point pack and unpack operations. - -def pack_2x16(fmt): - unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """ -dst.x = (uint32_t) pack_fmt_1x16(src0.x); -dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16; -""".replace("fmt", fmt)) - -def pack_4x8(fmt): - unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """ -dst.x = (uint32_t) pack_fmt_1x8(src0.x); -dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8; -dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16; -dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24; -""".replace("fmt", fmt)) - -def unpack_2x16(fmt): - unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """ -dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff)); -dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16)); -""".replace("fmt", fmt)) - -def unpack_4x8(fmt): - unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """ -dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff)); -dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff)); -dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff)); -dst.w = unpack_fmt_1x8((uint8_t)(src0.x >> 24)); -""".replace("fmt", fmt)) - - -pack_2x16("snorm") -pack_4x8("snorm") -pack_2x16("unorm") -pack_4x8("unorm") -pack_2x16("half") -unpack_2x16("snorm") -unpack_4x8("snorm") -unpack_2x16("unorm") -unpack_4x8("unorm") -unpack_2x16("half") - - -# Lowered floating point unpacking operations. - - -unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint, - "unpack_half_1x16((uint16_t)(src0.x & 0xffff))") -unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint, - "unpack_half_1x16((uint16_t)(src0.x >> 16))") - - -# Bit operations, part of ARB_gpu_shader5. - - -unop("bitfield_reverse", tuint, """ -/* we're not winning any awards for speed here, but that's ok */ -dst = 0; -for (unsigned bit = 0; bit < 32; bit++) - dst |= ((src0 >> bit) & 1) << (31 - bit); -""") -unop("bit_count", tuint, """ -dst = 0; -for (unsigned bit = 0; bit < 32; bit++) { - if ((src0 >> bit) & 1) - dst++; -} -""") - -unop_convert("ufind_msb", tuint, tint, """ -dst = -1; -for (int bit = 31; bit > 0; bit--) { - if ((src0 >> bit) & 1) { - dst = bit; - break; - } -} -""") - -unop("ifind_msb", tint, """ -dst = -1; -for (int bit = 31; bit >= 0; bit--) { - /* If src0 < 0, we're looking for the first 0 bit. - * if src0 >= 0, we're looking for the first 1 bit. - */ - if ((((src0 >> bit) & 1) && (src0 >= 0)) || - (!((src0 >> bit) & 1) && (src0 < 0))) { - dst = bit; - break; - } -} -""") - -unop("find_lsb", tint, """ -dst = -1; -for (unsigned bit = 0; bit < 32; bit++) { - if ((src0 >> bit) & 1) { - dst = bit; - break; - } -} -""") - - -for i in xrange(1, 5): - for j in xrange(1, 5): - unop_horiz("fnoise{0}_{1}".format(i, j), i, tfloat, j, tfloat, "0.0f") - -def binop_convert(name, out_type, in_type, alg_props, const_expr): - opcode(name, 0, out_type, [0, 0], [in_type, in_type], alg_props, const_expr) - -def binop(name, ty, alg_props, const_expr): - binop_convert(name, ty, ty, alg_props, const_expr) - -def binop_compare(name, ty, alg_props, const_expr): - binop_convert(name, tbool, ty, alg_props, const_expr) - -def binop_horiz(name, out_size, out_type, src1_size, src1_type, src2_size, - src2_type, const_expr): - opcode(name, out_size, out_type, [src1_size, src2_size], [src1_type, src2_type], - "", const_expr) - -def binop_reduce(name, output_size, output_type, src_type, prereduce_expr, - reduce_expr, final_expr): - def final(src): - return final_expr.format(src= "(" + src + ")") - def reduce_(src0, src1): - return reduce_expr.format(src0=src0, src1=src1) - def prereduce(src0, src1): - return "(" + prereduce_expr.format(src0=src0, src1=src1) + ")" - src0 = prereduce("src0.x", "src1.x") - src1 = prereduce("src0.y", "src1.y") - src2 = prereduce("src0.z", "src1.z") - src3 = prereduce("src0.w", "src1.w") - opcode(name + "2", output_size, output_type, - [2, 2], [src_type, src_type], commutative, - final(reduce_(src0, src1))) - opcode(name + "3", output_size, output_type, - [3, 3], [src_type, src_type], commutative, - final(reduce_(reduce_(src0, src1), src2))) - opcode(name + "4", output_size, output_type, - [4, 4], [src_type, src_type], commutative, - final(reduce_(reduce_(src0, src1), reduce_(src2, src3)))) - -binop("fadd", tfloat, commutative + associative, "src0 + src1") -binop("iadd", tint, commutative + associative, "src0 + src1") -binop("fsub", tfloat, "", "src0 - src1") -binop("isub", tint, "", "src0 - src1") - -binop("fmul", tfloat, commutative + associative, "src0 * src1") -# low 32-bits of signed/unsigned integer multiply -binop("imul", tint, commutative + associative, "src0 * src1") -# high 32-bits of signed integer multiply -binop("imul_high", tint, commutative, - "(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)") -# high 32-bits of unsigned integer multiply -binop("umul_high", tuint, commutative, - "(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)") - -binop("fdiv", tfloat, "", "src0 / src1") -binop("idiv", tint, "", "src0 / src1") -binop("udiv", tuint, "", "src0 / src1") - -# returns a boolean representing the carry resulting from the addition of -# the two unsigned arguments. - -binop_convert("uadd_carry", tuint, tuint, commutative, "src0 + src1 < src0") - -# returns a boolean representing the borrow resulting from the subtraction -# of the two unsigned arguments. - -binop_convert("usub_borrow", tuint, tuint, "", "src0 < src1") - -binop("fmod", tfloat, "", "src0 - src1 * floorf(src0 / src1)") -binop("umod", tuint, "", "src1 == 0 ? 0 : src0 % src1") - -# -# Comparisons -# - - -# these integer-aware comparisons return a boolean (0 or ~0) - -binop_compare("flt", tfloat, "", "src0 < src1") -binop_compare("fge", tfloat, "", "src0 >= src1") -binop_compare("feq", tfloat, commutative, "src0 == src1") -binop_compare("fne", tfloat, commutative, "src0 != src1") -binop_compare("ilt", tint, "", "src0 < src1") -binop_compare("ige", tint, "", "src0 >= src1") -binop_compare("ieq", tint, commutative, "src0 == src1") -binop_compare("ine", tint, commutative, "src0 != src1") -binop_compare("ult", tuint, "", "src0 < src1") -binop_compare("uge", tuint, "", "src0 >= src1") - -# integer-aware GLSL-style comparisons that compare floats and ints - -binop_reduce("ball_fequal", 1, tbool, tfloat, "{src0} == {src1}", - "{src0} && {src1}", "{src}") -binop_reduce("bany_fnequal", 1, tbool, tfloat, "{src0} != {src1}", - "{src0} || {src1}", "{src}") -binop_reduce("ball_iequal", 1, tbool, tint, "{src0} == {src1}", - "{src0} && {src1}", "{src}") -binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}", - "{src0} || {src1}", "{src}") - -# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0 - -binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}", - "{src0} && {src1}", "{src} ? 1.0f : 0.0f") -binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}", - "{src0} || {src1}", "{src} ? 1.0f : 0.0f") - -# These comparisons for integer-less hardware return 1.0 and 0.0 for true -# and false respectively - -binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than -binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal -binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal -binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal - - -binop("ishl", tint, "", "src0 << src1") -binop("ishr", tint, "", "src0 >> src1") -binop("ushr", tuint, "", "src0 >> src1") - -# bitwise logic operators -# -# These are also used as boolean and, or, xor for hardware supporting -# integers. - - -binop("iand", tuint, commutative + associative, "src0 & src1") -binop("ior", tuint, commutative + associative, "src0 | src1") -binop("ixor", tuint, commutative + associative, "src0 ^ src1") - - -# floating point logic operators -# -# These use (src != 0.0) for testing the truth of the input, and output 1.0 -# for true and 0.0 for false - -binop("fand", tfloat, commutative, - "((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f") -binop("for", tfloat, commutative, - "((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f") -binop("fxor", tfloat, commutative, - "(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f") - -binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}", - "{src}") - -binop_reduce("fdot_replicated", 4, tfloat, tfloat, - "{src0} * {src1}", "{src0} + {src1}", "{src}") - -opcode("fdph", 1, tfloat, [3, 4], [tfloat, tfloat], "", - "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w") -opcode("fdph_replicated", 4, tfloat, [3, 4], [tfloat, tfloat], "", - "src0.x * src1.x + src0.y * src1.y + src0.z * src1.z + src1.w") - -binop("fmin", tfloat, "", "fminf(src0, src1)") -binop("imin", tint, commutative + associative, "src1 > src0 ? src0 : src1") -binop("umin", tuint, commutative + associative, "src1 > src0 ? src0 : src1") -binop("fmax", tfloat, "", "fmaxf(src0, src1)") -binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0") -binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0") - -# Saturated vector add for 4 8bit ints. -binop("usadd_4x8", tint, commutative + associative, """ -dst = 0; -for (int i = 0; i < 32; i += 8) { - dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i; -} -""") - -# Saturated vector subtract for 4 8bit ints. -binop("ussub_4x8", tint, "", """ -dst = 0; -for (int i = 0; i < 32; i += 8) { - int src0_chan = (src0 >> i) & 0xff; - int src1_chan = (src1 >> i) & 0xff; - if (src0_chan > src1_chan) - dst |= (src0_chan - src1_chan) << i; -} -""") - -# vector min for 4 8bit ints. -binop("umin_4x8", tint, commutative + associative, """ -dst = 0; -for (int i = 0; i < 32; i += 8) { - dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; -} -""") - -# vector max for 4 8bit ints. -binop("umax_4x8", tint, commutative + associative, """ -dst = 0; -for (int i = 0; i < 32; i += 8) { - dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i; -} -""") - -# unorm multiply: (a * b) / 255. -binop("umul_unorm_4x8", tint, commutative + associative, """ -dst = 0; -for (int i = 0; i < 32; i += 8) { - int src0_chan = (src0 >> i) & 0xff; - int src1_chan = (src1 >> i) & 0xff; - dst |= ((src0_chan * src1_chan) / 255) << i; -} -""") - -binop("fpow", tfloat, "", "powf(src0, src1)") - -binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat, - "pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)") - -# bfm implements the behavior of the first operation of the SM5 "bfi" assembly -# and that of the "bfi1" i965 instruction. That is, it has undefined behavior -# if either of its arguments are 32. -binop_convert("bfm", tuint, tint, "", """ -int bits = src0, offset = src1; -if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32) - dst = 0; /* undefined */ -else - dst = ((1u << bits) - 1) << offset; -""") - -opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """ -dst = ldexpf(src0, src1); -/* flush denormals to zero. */ -if (!isnormal(dst)) - dst = copysignf(0.0f, src0); -""") - -# Combines the first component of each input to make a 2-component vector. - -binop_horiz("vec2", 2, tuint, 1, tuint, 1, tuint, """ -dst.x = src0.x; -dst.y = src1.x; -""") - -def triop(name, ty, const_expr): - opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], "", const_expr) -def triop_horiz(name, output_size, src1_size, src2_size, src3_size, const_expr): - opcode(name, output_size, tuint, - [src1_size, src2_size, src3_size], - [tuint, tuint, tuint], "", const_expr) - -triop("ffma", tfloat, "src0 * src1 + src2") - -triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2") - -# Conditional Select -# -# A vector conditional select instruction (like ?:, but operating per- -# component on vectors). There are two versions, one for floating point -# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0). - - -triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2") -opcode("bcsel", 0, tuint, [0, 0, 0], - [tbool, tuint, tuint], "", "src0 ? src1 : src2") - -# SM5 bfi assembly -triop("bfi", tuint, """ -unsigned mask = src0, insert = src1, base = src2; -if (mask == 0) { - dst = base; -} else { - unsigned tmp = mask; - while (!(tmp & 1)) { - tmp >>= 1; - insert <<= 1; - } - dst = (base & ~mask) | (insert & mask); -} -""") - -# SM5 ubfe/ibfe assembly -opcode("ubfe", 0, tuint, - [0, 0, 0], [tuint, tint, tint], "", """ -unsigned base = src0; -int offset = src1, bits = src2; -if (bits == 0) { - dst = 0; -} else if (bits < 0 || offset < 0) { - dst = 0; /* undefined */ -} else if (offset + bits < 32) { - dst = (base << (32 - bits - offset)) >> (32 - bits); -} else { - dst = base >> offset; -} -""") -opcode("ibfe", 0, tint, - [0, 0, 0], [tint, tint, tint], "", """ -int base = src0; -int offset = src1, bits = src2; -if (bits == 0) { - dst = 0; -} else if (bits < 0 || offset < 0) { - dst = 0; /* undefined */ -} else if (offset + bits < 32) { - dst = (base << (32 - bits - offset)) >> (32 - bits); -} else { - dst = base >> offset; -} -""") - -# GLSL bitfieldExtract() -opcode("ubitfield_extract", 0, tuint, - [0, 0, 0], [tuint, tint, tint], "", """ -unsigned base = src0; -int offset = src1, bits = src2; -if (bits == 0) { - dst = 0; -} else if (bits < 0 || offset < 0 || offset + bits > 32) { - dst = 0; /* undefined per the spec */ -} else { - dst = (base >> offset) & ((1ull << bits) - 1); -} -""") -opcode("ibitfield_extract", 0, tint, - [0, 0, 0], [tint, tint, tint], "", """ -int base = src0; -int offset = src1, bits = src2; -if (bits == 0) { - dst = 0; -} else if (offset < 0 || bits < 0 || offset + bits > 32) { - dst = 0; -} else { - dst = (base << (32 - offset - bits)) >> offset; /* use sign-extending shift */ -} -""") - -# Combines the first component of each input to make a 3-component vector. - -triop_horiz("vec3", 3, 1, 1, 1, """ -dst.x = src0.x; -dst.y = src1.x; -dst.z = src2.x; -""") - -def quadop_horiz(name, output_size, src1_size, src2_size, src3_size, - src4_size, const_expr): - opcode(name, output_size, tuint, - [src1_size, src2_size, src3_size, src4_size], - [tuint, tuint, tuint, tuint], - "", const_expr) - -opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0], - [tuint, tuint, tint, tint], "", """ -unsigned base = src0, insert = src1; -int offset = src2, bits = src3; -if (bits == 0) { - dst = 0; -} else if (offset < 0 || bits < 0 || bits + offset > 32) { - dst = 0; -} else { - unsigned mask = ((1ull << bits) - 1) << offset; - dst = (base & ~mask) | ((insert << bits) & mask); -} -""") - -quadop_horiz("vec4", 4, 1, 1, 1, 1, """ -dst.x = src0.x; -dst.y = src1.x; -dst.z = src2.x; -dst.w = src3.x; -""") - - diff --git a/src/glsl/nir/nir_opcodes_c.py b/src/glsl/nir/nir_opcodes_c.py deleted file mode 100644 index 7049c5be676..00000000000 --- a/src/glsl/nir/nir_opcodes_c.py +++ /dev/null @@ -1,55 +0,0 @@ -#! /usr/bin/env python -# -# Copyright (C) 2014 Connor Abbott -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# -# Authors: -# Connor Abbott (cwabbott0@gmail.com) - -from nir_opcodes import opcodes -from mako.template import Template - -template = Template(""" -#include "nir.h" - -const nir_op_info nir_op_infos[nir_num_opcodes] = { -% for name, opcode in sorted(opcodes.iteritems()): -{ - .name = "${name}", - .num_inputs = ${opcode.num_inputs}, - .output_size = ${opcode.output_size}, - .output_type = ${"nir_type_" + opcode.output_type}, - .input_sizes = { - ${ ", ".join(str(size) for size in opcode.input_sizes) } - }, - .input_types = { - ${ ", ".join("nir_type_" + type for type in opcode.input_types) } - }, - .algebraic_properties = - ${ "0" if opcode.algebraic_properties == "" else " | ".join( - "NIR_OP_IS_" + prop.upper() for prop in - opcode.algebraic_properties.strip().split(" ")) } -}, -% endfor -}; -""") - -print template.render(opcodes=opcodes) diff --git a/src/glsl/nir/nir_opcodes_h.py b/src/glsl/nir/nir_opcodes_h.py deleted file mode 100644 index be15a96d236..00000000000 --- a/src/glsl/nir/nir_opcodes_h.py +++ /dev/null @@ -1,47 +0,0 @@ -#! /usr/bin/env python - -template = """\ -/* Copyright (C) 2014 Connor Abbott - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - */ - -#ifndef _NIR_OPCODES_ -#define _NIR_OPCODES_ - -<% opcode_names = sorted(opcodes.iterkeys()) %> - -typedef enum { -% for name in opcode_names: - nir_op_${name}, -% endfor - nir_last_opcode = nir_op_${opcode_names[-1]}, - nir_num_opcodes = nir_last_opcode + 1 -} nir_op; - -#endif /* _NIR_OPCODES_ */""" - -from nir_opcodes import opcodes -from mako.template import Template - -print Template(template).render(opcodes=opcodes) diff --git a/src/glsl/nir/nir_opt_algebraic.py b/src/glsl/nir/nir_opt_algebraic.py deleted file mode 100644 index 7745b76f7ce..00000000000 --- a/src/glsl/nir/nir_opt_algebraic.py +++ /dev/null @@ -1,285 +0,0 @@ -#! /usr/bin/env python -# -# Copyright (C) 2014 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. -# -# Authors: -# Jason Ekstrand (jason@jlekstrand.net) - -import nir_algebraic - -# Convenience variables -a = 'a' -b = 'b' -c = 'c' -d = 'd' - -# Written in the form (, ) where is an expression -# and is either an expression or a value. An expression is -# defined as a tuple of the form (, , , , ) -# where each source is either an expression or a value. A value can be -# either a numeric constant or a string representing a variable name. -# -# Variable names are specified as "[#]name[@type]" where "#" inicates that -# the given variable will only match constants and the type indicates that -# the given variable will only match values from ALU instructions with the -# given output type. -# -# For constants, you have to be careful to make sure that it is the right -# type because python is unaware of the source and destination types of the -# opcodes. - -optimizations = [ - (('fneg', ('fneg', a)), a), - (('ineg', ('ineg', a)), a), - (('fabs', ('fabs', a)), ('fabs', a)), - (('fabs', ('fneg', a)), ('fabs', a)), - (('iabs', ('iabs', a)), ('iabs', a)), - (('iabs', ('ineg', a)), ('iabs', a)), - (('fadd', a, 0.0), a), - (('iadd', a, 0), a), - (('usadd_4x8', a, 0), a), - (('usadd_4x8', a, ~0), ~0), - (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))), - (('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))), - (('fadd', ('fneg', a), a), 0.0), - (('iadd', ('ineg', a), a), 0), - (('iadd', ('ineg', a), ('iadd', a, b)), b), - (('iadd', a, ('iadd', ('ineg', a), b)), b), - (('fadd', ('fneg', a), ('fadd', a, b)), b), - (('fadd', a, ('fadd', ('fneg', a), b)), b), - (('fmul', a, 0.0), 0.0), - (('imul', a, 0), 0), - (('umul_unorm_4x8', a, 0), 0), - (('umul_unorm_4x8', a, ~0), a), - (('fmul', a, 1.0), a), - (('imul', a, 1), a), - (('fmul', a, -1.0), ('fneg', a)), - (('imul', a, -1), ('ineg', a)), - (('ffma', 0.0, a, b), b), - (('ffma', a, 0.0, b), b), - (('ffma', a, b, 0.0), ('fmul', a, b)), - (('ffma', a, 1.0, b), ('fadd', a, b)), - (('ffma', 1.0, a, b), ('fadd', a, b)), - (('flrp', a, b, 0.0), a), - (('flrp', a, b, 1.0), b), - (('flrp', a, a, b), a), - (('flrp', 0.0, a, b), ('fmul', a, b)), - (('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'), - (('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'), - (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'), - (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'), - (('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'), - (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'), - # Comparison simplifications - (('inot', ('flt', a, b)), ('fge', a, b)), - (('inot', ('fge', a, b)), ('flt', a, b)), - (('inot', ('feq', a, b)), ('fne', a, b)), - (('inot', ('fne', a, b)), ('feq', a, b)), - (('inot', ('ilt', a, b)), ('ige', a, b)), - (('inot', ('ige', a, b)), ('ilt', a, b)), - (('inot', ('ieq', a, b)), ('ine', a, b)), - (('inot', ('ine', a, b)), ('ieq', a, b)), - (('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)), - (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)), - (('bcsel', ('flt', a, b), b, a), ('fmax', a, b)), - (('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)), - (('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)), - (('fmin', a, a), a), - (('fmax', a, a), a), - (('imin', a, a), a), - (('imax', a, a), a), - (('umin', a, a), a), - (('umax', a, a), a), - (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'), - (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'), - (('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'), - (('fsat', ('fsat', a)), ('fsat', a)), - (('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)), - (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))), - (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)), - (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))), - (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)), - (('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'), - (('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'), - (('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'), - (('sne', a, b), ('b2f', ('fne', a, b)), 'options->lower_scmp'), - (('fne', ('fneg', a), a), ('fne', a, 0.0)), - (('feq', ('fneg', a), a), ('feq', a, 0.0)), - # Emulating booleans - (('imul', ('b2i', a), ('b2i', b)), ('b2i', ('iand', a, b))), - (('fmul', ('b2f', a), ('b2f', b)), ('b2f', ('iand', a, b))), - (('fsat', ('fadd', ('b2f', a), ('b2f', b))), ('b2f', ('ior', a, b))), - (('iand', 'a@bool', 1.0), ('b2f', a)), - (('flt', ('fneg', ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. - (('flt', ('fsub', 0.0, ('b2f', a)), 0), a), # Generated by TGSI KILL_IF. - # Comparison with the same args. Note that these are not done for - # the float versions because NaN always returns false on float - # inequalities. - (('ilt', a, a), False), - (('ige', a, a), True), - (('ieq', a, a), True), - (('ine', a, a), False), - (('ult', a, a), False), - (('uge', a, a), True), - # Logical and bit operations - (('fand', a, 0.0), 0.0), - (('iand', a, a), a), - (('iand', a, ~0), a), - (('iand', a, 0), 0), - (('ior', a, a), a), - (('ior', a, 0), a), - (('fxor', a, a), 0.0), - (('ixor', a, a), 0), - (('inot', ('inot', a)), a), - # DeMorgan's Laws - (('iand', ('inot', a), ('inot', b)), ('inot', ('ior', a, b))), - (('ior', ('inot', a), ('inot', b)), ('inot', ('iand', a, b))), - # Shift optimizations - (('ishl', 0, a), 0), - (('ishl', a, 0), a), - (('ishr', 0, a), 0), - (('ishr', a, 0), a), - (('ushr', 0, a), 0), - (('ushr', a, 0), a), - # Exponential/logarithmic identities - (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a - (('flog2', ('fexp2', a)), a), # lg2(2^a) = a - (('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b) - (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b - (('fpow', a, 1.0), a), - (('fpow', a, 2.0), ('fmul', a, a)), - (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))), - (('fpow', 2.0, a), ('fexp2', a)), - (('fpow', ('fpow', a, 2.2), 0.454545), a), - (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)), - (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))), - (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))), - (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))), - (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))), - (('flog2', ('frcp', a)), ('fneg', ('flog2', a))), - (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))), - (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))), - (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))), - (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))), - (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))), - # Division and reciprocal - (('fdiv', 1.0, a), ('frcp', a)), - (('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'), - (('frcp', ('frcp', a)), a), - (('frcp', ('fsqrt', a)), ('frsq', a)), - (('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'), - (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'), - # Boolean simplifications - (('ieq', 'a@bool', True), a), - (('ine', 'a@bool', True), ('inot', a)), - (('ine', 'a@bool', False), a), - (('ieq', 'a@bool', False), ('inot', 'a')), - (('bcsel', a, True, False), ('ine', a, 0)), - (('bcsel', a, False, True), ('ieq', a, 0)), - (('bcsel', True, b, c), b), - (('bcsel', False, b, c), c), - # The result of this should be hit by constant propagation and, in the - # next round of opt_algebraic, get picked up by one of the above two. - (('bcsel', '#a', b, c), ('bcsel', ('ine', 'a', 0), b, c)), - - (('bcsel', a, b, b), b), - (('fcsel', a, b, b), b), - - # Conversions - (('i2b', ('b2i', a)), a), - (('f2i', ('ftrunc', a)), ('f2i', a)), - (('f2u', ('ftrunc', a)), ('f2u', a)), - - # Subtracts - (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)), - (('isub', a, ('isub', 0, b)), ('iadd', a, b)), - (('ussub_4x8', a, 0), a), - (('ussub_4x8', a, ~0), 0), - (('fsub', a, b), ('fadd', a, ('fneg', b)), 'options->lower_sub'), - (('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'), - (('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'), - (('ineg', a), ('isub', 0, a), 'options->lower_negate'), - (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)), - (('iadd', a, ('isub', 0, b)), ('isub', a, b)), - (('fabs', ('fsub', 0.0, a)), ('fabs', a)), - (('iabs', ('isub', 0, a)), ('iabs', a)), - - # Misc. lowering - (('fmod', a, b), ('fsub', a, ('fmul', b, ('ffloor', ('fdiv', a, b)))), 'options->lower_fmod'), - (('uadd_carry', a, b), ('b2i', ('ult', ('iadd', a, b), a)), 'options->lower_uadd_carry'), - (('usub_borrow', a, b), ('b2i', ('ult', a, b)), 'options->lower_usub_borrow'), - - (('bitfield_insert', 'base', 'insert', 'offset', 'bits'), - ('bcsel', ('ilt', 31, 'bits'), 'insert', - ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')), - 'options->lower_bitfield_insert'), - - (('ibitfield_extract', 'value', 'offset', 'bits'), - ('bcsel', ('ilt', 31, 'bits'), 'value', - ('ibfe', 'value', 'offset', 'bits')), - 'options->lower_bitfield_extract'), - - (('ubitfield_extract', 'value', 'offset', 'bits'), - ('bcsel', ('ult', 31, 'bits'), 'value', - ('ubfe', 'value', 'offset', 'bits')), - 'options->lower_bitfield_extract'), -] - -# Add optimizations to handle the case where the result of a ternary is -# compared to a constant. This way we can take things like -# -# (a ? 0 : 1) > 0 -# -# and turn it into -# -# a ? (0 > 0) : (1 > 0) -# -# which constant folding will eat for lunch. The resulting ternary will -# further get cleaned up by the boolean reductions above and we will be -# left with just the original variable "a". -for op in ['flt', 'fge', 'feq', 'fne', - 'ilt', 'ige', 'ieq', 'ine', 'ult', 'uge']: - optimizations += [ - ((op, ('bcsel', 'a', '#b', '#c'), '#d'), - ('bcsel', 'a', (op, 'b', 'd'), (op, 'c', 'd'))), - ((op, '#d', ('bcsel', a, '#b', '#c')), - ('bcsel', 'a', (op, 'd', 'b'), (op, 'd', 'c'))), - ] - -# This section contains "late" optimizations that should be run after the -# regular optimizations have finished. Optimizations should go here if -# they help code generation but do not necessarily produce code that is -# more easily optimizable. -late_optimizations = [ - (('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))), - (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))), - (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))), - (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))), - (('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'), - (('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'), - (('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'), - (('fdph', a, b), ('fdph_replicated', a, b), 'options->fdot_replicates'), -] - -print nir_algebraic.AlgebraicPass("nir_opt_algebraic", optimizations).render() -print nir_algebraic.AlgebraicPass("nir_opt_algebraic_late", - late_optimizations).render() diff --git a/src/glsl/nir/nir_opt_constant_folding.c b/src/glsl/nir/nir_opt_constant_folding.c deleted file mode 100644 index 28a73f86f95..00000000000 --- a/src/glsl/nir/nir_opt_constant_folding.c +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir_constant_expressions.h" -#include - -/* - * Implements SSA-based constant folding. - */ - -struct constant_fold_state { - void *mem_ctx; - nir_function_impl *impl; - bool progress; -}; - -static bool -constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx) -{ - nir_const_value src[4]; - - if (!instr->dest.dest.is_ssa) - return false; - - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - if (!instr->src[i].src.is_ssa) - return false; - - nir_instr *src_instr = instr->src[i].src.ssa->parent_instr; - - if (src_instr->type != nir_instr_type_load_const) - return false; - nir_load_const_instr* load_const = nir_instr_as_load_const(src_instr); - - for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i); - j++) { - src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]]; - } - - /* We shouldn't have any source modifiers in the optimization loop. */ - assert(!instr->src[i].abs && !instr->src[i].negate); - } - - /* We shouldn't have any saturate modifiers in the optimization loop. */ - assert(!instr->dest.saturate); - - nir_const_value dest = - nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components, - src); - - nir_load_const_instr *new_instr = - nir_load_const_instr_create(mem_ctx, - instr->dest.dest.ssa.num_components); - - new_instr->value = dest; - - nir_instr_insert_before(&instr->instr, &new_instr->instr); - - nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, - nir_src_for_ssa(&new_instr->def)); - - nir_instr_remove(&instr->instr); - ralloc_free(instr); - - return true; -} - -static bool -constant_fold_deref(nir_instr *instr, nir_deref_var *deref) -{ - bool progress = false; - - for (nir_deref *tail = deref->deref.child; tail; tail = tail->child) { - if (tail->deref_type != nir_deref_type_array) - continue; - - nir_deref_array *arr = nir_deref_as_array(tail); - - if (arr->deref_array_type == nir_deref_array_type_indirect && - arr->indirect.is_ssa && - arr->indirect.ssa->parent_instr->type == nir_instr_type_load_const) { - nir_load_const_instr *indirect = - nir_instr_as_load_const(arr->indirect.ssa->parent_instr); - - arr->base_offset += indirect->value.u[0]; - - /* Clear out the source */ - nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL)); - - arr->deref_array_type = nir_deref_array_type_direct; - - progress = true; - } - } - - return progress; -} - -static bool -constant_fold_intrinsic_instr(nir_intrinsic_instr *instr) -{ - bool progress = false; - - unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; - for (unsigned i = 0; i < num_vars; i++) { - progress |= constant_fold_deref(&instr->instr, instr->variables[i]); - } - - return progress; -} - -static bool -constant_fold_tex_instr(nir_tex_instr *instr) -{ - if (instr->sampler) - return constant_fold_deref(&instr->instr, instr->sampler); - else - return false; -} - -static bool -constant_fold_block(nir_block *block, void *void_state) -{ - struct constant_fold_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - switch (instr->type) { - case nir_instr_type_alu: - state->progress |= constant_fold_alu_instr(nir_instr_as_alu(instr), - state->mem_ctx); - break; - case nir_instr_type_intrinsic: - state->progress |= - constant_fold_intrinsic_instr(nir_instr_as_intrinsic(instr)); - break; - case nir_instr_type_tex: - state->progress |= constant_fold_tex_instr(nir_instr_as_tex(instr)); - break; - default: - /* Don't know how to constant fold */ - break; - } - } - - return true; -} - -static bool -nir_opt_constant_folding_impl(nir_function_impl *impl) -{ - struct constant_fold_state state; - - state.mem_ctx = ralloc_parent(impl); - state.impl = impl; - state.progress = false; - - nir_foreach_block(impl, constant_fold_block, &state); - - if (state.progress) - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - return state.progress; -} - -bool -nir_opt_constant_folding(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) - progress |= nir_opt_constant_folding_impl(function->impl); - } - - return progress; -} diff --git a/src/glsl/nir/nir_opt_copy_propagate.c b/src/glsl/nir/nir_opt_copy_propagate.c deleted file mode 100644 index d99f78ddb36..00000000000 --- a/src/glsl/nir/nir_opt_copy_propagate.c +++ /dev/null @@ -1,290 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" -#include
- -/** - * SSA-based copy propagation - */ - -static bool is_move(nir_alu_instr *instr) -{ - if (instr->op != nir_op_fmov && - instr->op != nir_op_imov) - return false; - - if (instr->dest.saturate) - return false; - - /* we handle modifiers in a separate pass */ - - if (instr->src[0].abs || instr->src[0].negate) - return false; - - if (!instr->src[0].src.is_ssa) - return false; - - return true; - -} - -static bool is_vec(nir_alu_instr *instr) -{ - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - if (!instr->src[i].src.is_ssa) - return false; - - /* we handle modifiers in a separate pass */ - if (instr->src[i].abs || instr->src[i].negate) - return false; - } - - return instr->op == nir_op_vec2 || - instr->op == nir_op_vec3 || - instr->op == nir_op_vec4; -} - -static bool -is_swizzleless_move(nir_alu_instr *instr) -{ - if (is_move(instr)) { - for (unsigned i = 0; i < 4; i++) { - if (!((instr->dest.write_mask >> i) & 1)) - break; - if (instr->src[0].swizzle[i] != i) - return false; - } - return true; - } else if (is_vec(instr)) { - nir_ssa_def *def = NULL; - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - if (instr->src[i].swizzle[0] != i) - return false; - - if (def == NULL) { - def = instr->src[i].src.ssa; - } else if (instr->src[i].src.ssa != def) { - return false; - } - } - return true; - } else { - return false; - } -} - -static bool -copy_prop_src(nir_src *src, nir_instr *parent_instr, nir_if *parent_if) -{ - if (!src->is_ssa) { - if (src->reg.indirect) - return copy_prop_src(src, parent_instr, parent_if); - return false; - } - - nir_instr *src_instr = src->ssa->parent_instr; - if (src_instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr); - if (!is_swizzleless_move(alu_instr)) - return false; - - /* Don't let copy propagation land us with a phi that has more - * components in its source than it has in its destination. That badly - * messes up out-of-ssa. - */ - if (parent_instr && parent_instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(parent_instr); - assert(phi->dest.is_ssa); - if (phi->dest.ssa.num_components != - alu_instr->src[0].src.ssa->num_components) - return false; - } - - if (parent_instr) { - nir_instr_rewrite_src(parent_instr, src, - nir_src_for_ssa(alu_instr->src[0].src.ssa)); - } else { - assert(src == &parent_if->condition); - nir_if_rewrite_condition(parent_if, - nir_src_for_ssa(alu_instr->src[0].src.ssa)); - } - - return true; -} - -static bool -copy_prop_alu_src(nir_alu_instr *parent_alu_instr, unsigned index) -{ - nir_alu_src *src = &parent_alu_instr->src[index]; - if (!src->src.is_ssa) { - if (src->src.reg.indirect) - return copy_prop_src(src->src.reg.indirect, &parent_alu_instr->instr, - NULL); - return false; - } - - nir_instr *src_instr = src->src.ssa->parent_instr; - if (src_instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *alu_instr = nir_instr_as_alu(src_instr); - if (!is_move(alu_instr) && !is_vec(alu_instr)) - return false; - - nir_ssa_def *def; - unsigned new_swizzle[4] = {0, 0, 0, 0}; - - if (alu_instr->op == nir_op_fmov || - alu_instr->op == nir_op_imov) { - for (unsigned i = 0; i < 4; i++) - new_swizzle[i] = alu_instr->src[0].swizzle[src->swizzle[i]]; - def = alu_instr->src[0].src.ssa; - } else { - def = NULL; - - for (unsigned i = 0; i < 4; i++) { - if (!nir_alu_instr_channel_used(parent_alu_instr, index, i)) - continue; - - nir_ssa_def *new_def = alu_instr->src[src->swizzle[i]].src.ssa; - if (def == NULL) - def = new_def; - else { - if (def != new_def) - return false; - } - new_swizzle[i] = alu_instr->src[src->swizzle[i]].swizzle[0]; - } - } - - for (unsigned i = 0; i < 4; i++) - src->swizzle[i] = new_swizzle[i]; - - nir_instr_rewrite_src(&parent_alu_instr->instr, &src->src, - nir_src_for_ssa(def)); - - return true; -} - -typedef struct { - nir_instr *parent_instr; - bool progress; -} copy_prop_state; - -static bool -copy_prop_src_cb(nir_src *src, void *_state) -{ - copy_prop_state *state = (copy_prop_state *) _state; - while (copy_prop_src(src, state->parent_instr, NULL)) - state->progress = true; - - return true; -} - -static bool -copy_prop_instr(nir_instr *instr) -{ - if (instr->type == nir_instr_type_alu) { - nir_alu_instr *alu_instr = nir_instr_as_alu(instr); - bool progress = false; - - for (unsigned i = 0; i < nir_op_infos[alu_instr->op].num_inputs; i++) - while (copy_prop_alu_src(alu_instr, i)) - progress = true; - - if (!alu_instr->dest.dest.is_ssa && alu_instr->dest.dest.reg.indirect) - while (copy_prop_src(alu_instr->dest.dest.reg.indirect, instr, NULL)) - progress = true; - - return progress; - } - - copy_prop_state state; - state.parent_instr = instr; - state.progress = false; - nir_foreach_src(instr, copy_prop_src_cb, &state); - - return state.progress; -} - -static bool -copy_prop_if(nir_if *if_stmt) -{ - return copy_prop_src(&if_stmt->condition, NULL, if_stmt); -} - -static bool -copy_prop_block(nir_block *block, void *_state) -{ - bool *progress = (bool *) _state; - - nir_foreach_instr(block, instr) { - if (copy_prop_instr(instr)) - *progress = true; - } - - if (block->cf_node.node.next != NULL && /* check that we aren't the end node */ - !nir_cf_node_is_last(&block->cf_node) && - nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) { - nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node)); - if (copy_prop_if(if_stmt)) - *progress = true; - } - - return true; -} - -static bool -nir_copy_prop_impl(nir_function_impl *impl) -{ - bool progress = false; - - nir_foreach_block(impl, copy_prop_block, &progress); - - if (progress) { - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - } - - return progress; -} - -bool -nir_copy_prop(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl && nir_copy_prop_impl(function->impl)) - progress = true; - } - - return progress; -} diff --git a/src/glsl/nir/nir_opt_cse.c b/src/glsl/nir/nir_opt_cse.c deleted file mode 100644 index 364fb023dce..00000000000 --- a/src/glsl/nir/nir_opt_cse.c +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir_instr_set.h" - -/* - * Implements common subexpression elimination - */ - -/* - * Visits and CSE's the given block and all its descendants in the dominance - * tree recursively. Note that the instr_set is guaranteed to only ever - * contain instructions that dominate the current block. - */ - -static bool -cse_block(nir_block *block, struct set *instr_set) -{ - bool progress = false; - - nir_foreach_instr_safe(block, instr) { - if (nir_instr_set_add_or_rewrite(instr_set, instr)) { - progress = true; - nir_instr_remove(instr); - } - } - - for (unsigned i = 0; i < block->num_dom_children; i++) { - nir_block *child = block->dom_children[i]; - progress |= cse_block(child, instr_set); - } - - nir_foreach_instr(block, instr) - nir_instr_set_remove(instr_set, instr); - - return progress; -} - -static bool -nir_opt_cse_impl(nir_function_impl *impl) -{ - struct set *instr_set = nir_instr_set_create(NULL); - - nir_metadata_require(impl, nir_metadata_dominance); - - bool progress = cse_block(nir_start_block(impl), instr_set); - - if (progress) - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - nir_instr_set_destroy(instr_set); - return progress; -} - -bool -nir_opt_cse(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) - progress |= nir_opt_cse_impl(function->impl); - } - - return progress; -} - diff --git a/src/glsl/nir/nir_opt_dce.c b/src/glsl/nir/nir_opt_dce.c deleted file mode 100644 index 32436c18b60..00000000000 --- a/src/glsl/nir/nir_opt_dce.c +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" - -/* SSA-based mark-and-sweep dead code elimination */ - -typedef struct { - struct exec_node node; - nir_instr *instr; -} worklist_elem; - -static void -worklist_push(struct exec_list *worklist, nir_instr *instr) -{ - worklist_elem *elem = ralloc(worklist, worklist_elem); - elem->instr = instr; - instr->pass_flags = 1; - exec_list_push_tail(worklist, &elem->node); -} - -static nir_instr * -worklist_pop(struct exec_list *worklist) -{ - struct exec_node *node = exec_list_pop_head(worklist); - worklist_elem *elem = exec_node_data(worklist_elem, node, node); - return elem->instr; -} - -static bool -mark_live_cb(nir_src *src, void *_state) -{ - struct exec_list *worklist = (struct exec_list *) _state; - - if (src->is_ssa && !src->ssa->parent_instr->pass_flags) { - worklist_push(worklist, src->ssa->parent_instr); - } - - return true; -} - -static void -init_instr(nir_instr *instr, struct exec_list *worklist) -{ - nir_alu_instr *alu_instr; - nir_intrinsic_instr *intrin_instr; - nir_tex_instr *tex_instr; - - /* We use the pass_flags to store the live/dead information. In DCE, we - * just treat it as a zero/non-zerl boolean for whether or not the - * instruction is live. - */ - instr->pass_flags = 0; - - switch (instr->type) { - case nir_instr_type_call: - case nir_instr_type_jump: - worklist_push(worklist, instr); - break; - - case nir_instr_type_alu: - alu_instr = nir_instr_as_alu(instr); - if (!alu_instr->dest.dest.is_ssa) - worklist_push(worklist, instr); - break; - - case nir_instr_type_intrinsic: - intrin_instr = nir_instr_as_intrinsic(instr); - if (nir_intrinsic_infos[intrin_instr->intrinsic].flags & - NIR_INTRINSIC_CAN_ELIMINATE) { - if (nir_intrinsic_infos[intrin_instr->intrinsic].has_dest && - !intrin_instr->dest.is_ssa) { - worklist_push(worklist, instr); - } - } else { - worklist_push(worklist, instr); - } - break; - - case nir_instr_type_tex: - tex_instr = nir_instr_as_tex(instr); - if (!tex_instr->dest.is_ssa) - worklist_push(worklist, instr); - break; - - default: - break; - } -} - -static bool -init_block_cb(nir_block *block, void *_state) -{ - struct exec_list *worklist = (struct exec_list *) _state; - - nir_foreach_instr(block, instr) - init_instr(instr, worklist); - - nir_if *following_if = nir_block_get_following_if(block); - if (following_if) { - if (following_if->condition.is_ssa && - !following_if->condition.ssa->parent_instr->pass_flags) - worklist_push(worklist, following_if->condition.ssa->parent_instr); - } - - return true; -} - -static bool -delete_block_cb(nir_block *block, void *_state) -{ - bool *progress = (bool *) _state; - - nir_foreach_instr_safe(block, instr) { - if (!instr->pass_flags) { - nir_instr_remove(instr); - *progress = true; - } - } - - return true; -} - -static bool -nir_opt_dce_impl(nir_function_impl *impl) -{ - struct exec_list *worklist = ralloc(NULL, struct exec_list); - exec_list_make_empty(worklist); - - nir_foreach_block(impl, init_block_cb, worklist); - - while (!exec_list_is_empty(worklist)) { - nir_instr *instr = worklist_pop(worklist); - nir_foreach_src(instr, mark_live_cb, worklist); - } - - ralloc_free(worklist); - - bool progress = false; - nir_foreach_block(impl, delete_block_cb, &progress); - - if (progress) - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - return progress; -} - -bool -nir_opt_dce(nir_shader *shader) -{ - bool progress = false; - nir_foreach_function(shader, function) { - if (function->impl && nir_opt_dce_impl(function->impl)) - progress = true; - } - - return progress; -} diff --git a/src/glsl/nir/nir_opt_dead_cf.c b/src/glsl/nir/nir_opt_dead_cf.c deleted file mode 100644 index 4cc6798702b..00000000000 --- a/src/glsl/nir/nir_opt_dead_cf.c +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright © 2014 Connor Abbott - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" -#include "nir_control_flow.h" - -/* - * This file implements an optimization that deletes statically - * unreachable/dead code. In NIR, one way this can happen if if an if - * statement has a constant condition: - * - * if (true) { - * ... - * } - * - * We delete the if statement and paste the contents of the always-executed - * branch into the surrounding control flow, possibly removing more code if - * the branch had a jump at the end. - * - * Another way is that control flow can end in a jump so that code after it - * never gets executed. In particular, this can happen after optimizing - * something like: - * - * if (true) { - * ... - * break; - * } - * ... - * - * We also consider the case where both branches of an if end in a jump, e.g.: - * - * if (...) { - * break; - * } else { - * continue; - * } - * ... - * - * Finally, we also handle removing useless loops, i.e. loops with no side - * effects and without any definitions that are used elsewhere. This case is a - * little different from the first two in that the code is actually run (it - * just never does anything), but there are similar issues with needing to - * be careful with restarting after deleting the cf_node (see dead_cf_list()) - * so this is a convenient place to remove them. - */ - -static void -remove_after_cf_node(nir_cf_node *node) -{ - nir_cf_node *end = node; - while (!nir_cf_node_is_last(end)) - end = nir_cf_node_next(end); - - nir_cf_list list; - nir_cf_extract(&list, nir_after_cf_node(node), nir_after_cf_node(end)); - nir_cf_delete(&list); -} - -static void -opt_constant_if(nir_if *if_stmt, bool condition) -{ - /* First, we need to remove any phi nodes after the if by rewriting uses to - * point to the correct source. - */ - nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&if_stmt->cf_node)); - nir_block *last_block = - nir_cf_node_as_block(condition ? nir_if_last_then_node(if_stmt) - : nir_if_last_else_node(if_stmt)); - - nir_foreach_instr_safe(after, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - nir_ssa_def *def = NULL; - nir_foreach_phi_src(phi, phi_src) { - if (phi_src->pred != last_block) - continue; - - assert(phi_src->src.is_ssa); - def = phi_src->src.ssa; - } - - assert(def); - assert(phi->dest.is_ssa); - nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def)); - nir_instr_remove(instr); - } - - /* The control flow list we're about to paste in may include a jump at the - * end, and in that case we have to delete the rest of the control flow - * list after the if since it's unreachable and the validator will balk if - * we don't. - */ - - if (!exec_list_is_empty(&last_block->instr_list)) { - nir_instr *last_instr = nir_block_last_instr(last_block); - if (last_instr->type == nir_instr_type_jump) - remove_after_cf_node(&if_stmt->cf_node); - } - - /* Finally, actually paste in the then or else branch and delete the if. */ - struct exec_list *cf_list = condition ? &if_stmt->then_list - : &if_stmt->else_list; - - nir_cf_list list; - nir_cf_extract(&list, nir_before_cf_list(cf_list), - nir_after_cf_list(cf_list)); - nir_cf_reinsert(&list, nir_after_cf_node(&if_stmt->cf_node)); - nir_cf_node_remove(&if_stmt->cf_node); -} - -static bool -block_has_no_side_effects(nir_block *block, void *state) -{ - (void) state; - - nir_foreach_instr(block, instr) { - if (instr->type == nir_instr_type_call) - return false; - - /* Return instructions can cause us to skip over other side-effecting - * instructions after the loop, so consider them to have side effects - * here. - */ - - if (instr->type == nir_instr_type_jump && - nir_instr_as_jump(instr)->type == nir_jump_return) - return false; - - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - if (!nir_intrinsic_infos[intrin->intrinsic].flags & - NIR_INTRINSIC_CAN_ELIMINATE) - return false; - } - - return true; -} - -static bool -def_not_live_out(nir_ssa_def *def, void *state) -{ - nir_block *after = state; - - return !BITSET_TEST(after->live_in, def->live_index); -} - -/* - * Test if a loop is dead. A loop is dead if: - * - * 1) It has no side effects (i.e. intrinsics which could possibly affect the - * state of the program aside from producing an SSA value, indicated by a lack - * of NIR_INTRINSIC_CAN_ELIMINATE). - * - * 2) It has no phi nodes after it, since those indicate values inside the - * loop being used after the loop. - * - * 3) If there are no phi nodes after the loop, then the only way a value - * defined inside the loop can be used outside the loop is if its definition - * dominates the block after the loop. If none of the definitions that - * dominate the loop exit are used outside the loop, then the loop is dead - * and it can be deleted. - */ - -static bool -loop_is_dead(nir_loop *loop) -{ - nir_block *before = nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); - nir_block *after = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); - - if (!exec_list_is_empty(&after->instr_list) && - nir_block_first_instr(after)->type == nir_instr_type_phi) - return false; - - if (!nir_foreach_block_in_cf_node(&loop->cf_node, block_has_no_side_effects, - NULL)) - return false; - - nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node); - nir_metadata_require(impl, nir_metadata_live_ssa_defs | - nir_metadata_dominance); - - for (nir_block *cur = after->imm_dom; cur != before; cur = cur->imm_dom) { - nir_foreach_instr(cur, instr) { - if (!nir_foreach_ssa_def(instr, def_not_live_out, after)) - return false; - } - } - - return true; -} - -static bool -dead_cf_block(nir_block *block) -{ - nir_if *following_if = nir_block_get_following_if(block); - if (following_if) { - nir_const_value *const_value = - nir_src_as_const_value(following_if->condition); - - if (!const_value) - return false; - - opt_constant_if(following_if, const_value->u[0] != 0); - return true; - } - - nir_loop *following_loop = nir_block_get_following_loop(block); - if (!following_loop) - return false; - - if (!loop_is_dead(following_loop)) - return false; - - nir_cf_node_remove(&following_loop->cf_node); - return true; -} - -static bool -ends_in_jump(nir_block *block) -{ - if (exec_list_is_empty(&block->instr_list)) - return false; - - nir_instr *instr = nir_block_last_instr(block); - return instr->type == nir_instr_type_jump; -} - -static bool -dead_cf_list(struct exec_list *list, bool *list_ends_in_jump) -{ - bool progress = false; - *list_ends_in_jump = false; - - nir_cf_node *prev = NULL; - - foreach_list_typed(nir_cf_node, cur, node, list) { - switch (cur->type) { - case nir_cf_node_block: { - nir_block *block = nir_cf_node_as_block(cur); - if (dead_cf_block(block)) { - /* We just deleted the if or loop after this block, so we may have - * deleted the block before or after it -- which one is an - * implementation detail. Therefore, to recover the place we were - * at, we have to use the previous cf_node. - */ - - if (prev) { - cur = nir_cf_node_next(prev); - } else { - cur = exec_node_data(nir_cf_node, exec_list_get_head(list), - node); - } - - block = nir_cf_node_as_block(cur); - - progress = true; - } - - if (ends_in_jump(block)) { - *list_ends_in_jump = true; - - if (!exec_node_is_tail_sentinel(cur->node.next)) { - remove_after_cf_node(cur); - return true; - } - } - - break; - } - - case nir_cf_node_if: { - nir_if *if_stmt = nir_cf_node_as_if(cur); - bool then_ends_in_jump, else_ends_in_jump; - progress |= dead_cf_list(&if_stmt->then_list, &then_ends_in_jump); - progress |= dead_cf_list(&if_stmt->else_list, &else_ends_in_jump); - - if (then_ends_in_jump && else_ends_in_jump) { - *list_ends_in_jump = true; - nir_block *next = nir_cf_node_as_block(nir_cf_node_next(cur)); - if (!exec_list_is_empty(&next->instr_list) || - !exec_node_is_tail_sentinel(next->cf_node.node.next)) { - remove_after_cf_node(cur); - return true; - } - } - - break; - } - - case nir_cf_node_loop: { - nir_loop *loop = nir_cf_node_as_loop(cur); - bool dummy; - progress |= dead_cf_list(&loop->body, &dummy); - - break; - } - - default: - unreachable("unknown cf node type"); - } - - prev = cur; - } - - return progress; -} - -static bool -opt_dead_cf_impl(nir_function_impl *impl) -{ - bool dummy; - bool progress = dead_cf_list(&impl->body, &dummy); - - if (progress) - nir_metadata_preserve(impl, nir_metadata_none); - - return progress; -} - -bool -nir_opt_dead_cf(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) - if (function->impl) - progress |= opt_dead_cf_impl(function->impl); - - return progress; -} diff --git a/src/glsl/nir/nir_opt_gcm.c b/src/glsl/nir/nir_opt_gcm.c deleted file mode 100644 index a8779ce5b84..00000000000 --- a/src/glsl/nir/nir_opt_gcm.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" - -/* - * Implements Global Code Motion. A description of GCM can be found in - * "Global Code Motion; Global Value Numbering" by Cliff Click. - * Unfortunately, the algorithm presented in the paper is broken in a - * number of ways. The algorithm used here differs substantially from the - * one in the paper but it is, in my opinion, much easier to read and - * verify correcness. - */ - -struct gcm_block_info { - /* Number of loops this block is inside */ - unsigned loop_depth; - - /* The last instruction inserted into this block. This is used as we - * traverse the instructions and insert them back into the program to - * put them in the right order. - */ - nir_instr *last_instr; -}; - -/* Flags used in the instr->pass_flags field for various instruction states */ -enum { - GCM_INSTR_PINNED = (1 << 0), - GCM_INSTR_SCHEDULED_EARLY = (1 << 1), - GCM_INSTR_SCHEDULED_LATE = (1 << 2), - GCM_INSTR_PLACED = (1 << 3), -}; - -struct gcm_state { - nir_function_impl *impl; - nir_instr *instr; - - /* The list of non-pinned instructions. As we do the late scheduling, - * we pull non-pinned instructions out of their blocks and place them in - * this list. This saves us from having linked-list problems when we go - * to put instructions back in their blocks. - */ - struct exec_list instrs; - - struct gcm_block_info *blocks; -}; - -/* Recursively walks the CFG and builds the block_info structure */ -static void -gcm_build_block_info(struct exec_list *cf_list, struct gcm_state *state, - unsigned loop_depth) -{ - foreach_list_typed(nir_cf_node, node, node, cf_list) { - switch (node->type) { - case nir_cf_node_block: { - nir_block *block = nir_cf_node_as_block(node); - state->blocks[block->index].loop_depth = loop_depth; - break; - } - case nir_cf_node_if: { - nir_if *if_stmt = nir_cf_node_as_if(node); - gcm_build_block_info(&if_stmt->then_list, state, loop_depth); - gcm_build_block_info(&if_stmt->else_list, state, loop_depth); - break; - } - case nir_cf_node_loop: { - nir_loop *loop = nir_cf_node_as_loop(node); - gcm_build_block_info(&loop->body, state, loop_depth + 1); - break; - } - default: - unreachable("Invalid CF node type"); - } - } -} - -/* Walks the instruction list and marks immovable instructions as pinned - * - * This function also serves to initialize the instr->pass_flags field. - * After this is completed, all instructions' pass_flags fields will be set - * to either GCM_INSTR_PINNED or 0. - */ -static bool -gcm_pin_instructions_block(nir_block *block, void *void_state) -{ - struct gcm_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - switch (instr->type) { - case nir_instr_type_alu: - switch (nir_instr_as_alu(instr)->op) { - case nir_op_fddx: - case nir_op_fddy: - case nir_op_fddx_fine: - case nir_op_fddy_fine: - case nir_op_fddx_coarse: - case nir_op_fddy_coarse: - /* These can only go in uniform control flow; pin them for now */ - instr->pass_flags = GCM_INSTR_PINNED; - break; - - default: - instr->pass_flags = 0; - break; - } - break; - - case nir_instr_type_tex: - switch (nir_instr_as_tex(instr)->op) { - case nir_texop_tex: - case nir_texop_txb: - case nir_texop_lod: - /* These two take implicit derivatives so they need to be pinned */ - instr->pass_flags = GCM_INSTR_PINNED; - break; - - default: - instr->pass_flags = 0; - break; - } - break; - - case nir_instr_type_load_const: - instr->pass_flags = 0; - break; - - case nir_instr_type_intrinsic: { - const nir_intrinsic_info *info = - &nir_intrinsic_infos[nir_instr_as_intrinsic(instr)->intrinsic]; - - if ((info->flags & NIR_INTRINSIC_CAN_ELIMINATE) && - (info->flags & NIR_INTRINSIC_CAN_REORDER)) { - instr->pass_flags = 0; - } else { - instr->pass_flags = GCM_INSTR_PINNED; - } - break; - } - - case nir_instr_type_jump: - case nir_instr_type_ssa_undef: - case nir_instr_type_phi: - instr->pass_flags = GCM_INSTR_PINNED; - break; - - default: - unreachable("Invalid instruction type in GCM"); - } - - if (!(instr->pass_flags & GCM_INSTR_PINNED)) { - /* If this is an unpinned instruction, go ahead and pull it out of - * the program and put it on the instrs list. This has a couple - * of benifits. First, it makes the scheduling algorithm more - * efficient because we can avoid walking over basic blocks and - * pinned instructions. Second, it keeps us from causing linked - * list confusion when we're trying to put everything in its - * proper place at the end of the pass. - * - * Note that we don't use nir_instr_remove here because that also - * cleans up uses and defs and we want to keep that information. - */ - exec_node_remove(&instr->node); - exec_list_push_tail(&state->instrs, &instr->node); - } - } - - return true; -} - -static void -gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state); - -/** Update an instructions schedule for the given source - * - * This function is called iteratively as we walk the sources of an - * instruction. It ensures that the given source instruction has been - * scheduled and then update this instruction's block if the source - * instruction is lower down the tree. - */ -static bool -gcm_schedule_early_src(nir_src *src, void *void_state) -{ - struct gcm_state *state = void_state; - nir_instr *instr = state->instr; - - assert(src->is_ssa); - - gcm_schedule_early_instr(src->ssa->parent_instr, void_state); - - /* While the index isn't a proper dominance depth, it does have the - * property that if A dominates B then A->index <= B->index. Since we - * know that this instruction must have been dominated by all of its - * sources at some point (even if it's gone through value-numbering), - * all of the sources must lie on the same branch of the dominance tree. - * Therefore, we can just go ahead and just compare indices. - */ - if (instr->block->index < src->ssa->parent_instr->block->index) - instr->block = src->ssa->parent_instr->block; - - /* We need to restore the state instruction because it may have been - * changed through the gcm_schedule_early_instr call above. Since we - * may still be iterating through sources and future calls to - * gcm_schedule_early_src for the same instruction will still need it. - */ - state->instr = instr; - - return true; -} - -/** Schedules an instruction early - * - * This function performs a recursive depth-first search starting at the - * given instruction and proceeding through the sources to schedule - * instructions as early as they can possibly go in the dominance tree. - * The instructions are "scheduled" by updating their instr->block field. - */ -static void -gcm_schedule_early_instr(nir_instr *instr, struct gcm_state *state) -{ - if (instr->pass_flags & GCM_INSTR_SCHEDULED_EARLY) - return; - - instr->pass_flags |= GCM_INSTR_SCHEDULED_EARLY; - - /* Pinned instructions are already scheduled so we don't need to do - * anything. Also, bailing here keeps us from ever following the - * sources of phi nodes which can be back-edges. - */ - if (instr->pass_flags & GCM_INSTR_PINNED) - return; - - /* Start with the instruction at the top. As we iterate over the - * sources, it will get moved down as needed. - */ - instr->block = nir_start_block(state->impl); - state->instr = instr; - - nir_foreach_src(instr, gcm_schedule_early_src, state); -} - -static void -gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state); - -/** Schedules the instruction associated with the given SSA def late - * - * This function works by first walking all of the uses of the given SSA - * definition, ensuring that they are scheduled, and then computing the LCA - * (least common ancestor) of its uses. It then schedules this instruction - * as close to the LCA as possible while trying to stay out of loops. - */ -static bool -gcm_schedule_late_def(nir_ssa_def *def, void *void_state) -{ - struct gcm_state *state = void_state; - - nir_block *lca = NULL; - - nir_foreach_use(def, use_src) { - nir_instr *use_instr = use_src->parent_instr; - - gcm_schedule_late_instr(use_instr, state); - - /* Phi instructions are a bit special. SSA definitions don't have to - * dominate the sources of the phi nodes that use them; instead, they - * have to dominate the predecessor block corresponding to the phi - * source. We handle this by looking through the sources, finding - * any that are usingg this SSA def, and using those blocks instead - * of the one the phi lives in. - */ - if (use_instr->type == nir_instr_type_phi) { - nir_phi_instr *phi = nir_instr_as_phi(use_instr); - - nir_foreach_phi_src(phi, phi_src) { - if (phi_src->src.ssa == def) - lca = nir_dominance_lca(lca, phi_src->pred); - } - } else { - lca = nir_dominance_lca(lca, use_instr->block); - } - } - - nir_foreach_if_use(def, use_src) { - nir_if *if_stmt = use_src->parent_if; - - /* For if statements, we consider the block to be the one immediately - * preceding the if CF node. - */ - nir_block *pred_block = - nir_cf_node_as_block(nir_cf_node_prev(&if_stmt->cf_node)); - - lca = nir_dominance_lca(lca, pred_block); - } - - /* Some instructions may never be used. We'll just leave them scheduled - * early and let dead code clean them up. - */ - if (lca == NULL) - return true; - - /* We know have the LCA of all of the uses. If our invariants hold, - * this is dominated by the block that we chose when scheduling early. - * We now walk up the dominance tree and pick the lowest block that is - * as far outside loops as we can get. - */ - nir_block *best = lca; - while (lca != def->parent_instr->block) { - assert(lca); - if (state->blocks[lca->index].loop_depth < - state->blocks[best->index].loop_depth) - best = lca; - lca = lca->imm_dom; - } - def->parent_instr->block = best; - - return true; -} - -/** Schedules an instruction late - * - * This function performs a depth-first search starting at the given - * instruction and proceeding through its uses to schedule instructions as - * late as they can reasonably go in the dominance tree. The instructions - * are "scheduled" by updating their instr->block field. - * - * The name of this function is actually a bit of a misnomer as it doesn't - * schedule them "as late as possible" as the paper implies. Instead, it - * first finds the lates possible place it can schedule the instruction and - * then possibly schedules it earlier than that. The actual location is as - * far down the tree as we can go while trying to stay out of loops. - */ -static void -gcm_schedule_late_instr(nir_instr *instr, struct gcm_state *state) -{ - if (instr->pass_flags & GCM_INSTR_SCHEDULED_LATE) - return; - - instr->pass_flags |= GCM_INSTR_SCHEDULED_LATE; - - /* Pinned instructions are already scheduled so we don't need to do - * anything. Also, bailing here keeps us from ever following phi nodes - * which can be back-edges. - */ - if (instr->pass_flags & GCM_INSTR_PINNED) - return; - - nir_foreach_ssa_def(instr, gcm_schedule_late_def, state); -} - -static void -gcm_place_instr(nir_instr *instr, struct gcm_state *state); - -static bool -gcm_place_instr_def(nir_ssa_def *def, void *state) -{ - nir_foreach_use(def, use_src) - gcm_place_instr(use_src->parent_instr, state); - - return false; -} - -/** Places an instrution back into the program - * - * The earlier passes of GCM simply choose blocks for each instruction and - * otherwise leave them alone. This pass actually places the instructions - * into their chosen blocks. - * - * To do so, we use a standard post-order depth-first search linearization - * algorithm. We walk over the uses of the given instruction and ensure - * that they are placed and then place this instruction. Because we are - * working on multiple blocks at a time, we keep track of the last inserted - * instruction per-block in the state structure's block_info array. When - * we insert an instruction in a block we insert it before the last - * instruction inserted in that block rather than the last instruction - * inserted globally. - */ -static void -gcm_place_instr(nir_instr *instr, struct gcm_state *state) -{ - if (instr->pass_flags & GCM_INSTR_PLACED) - return; - - instr->pass_flags |= GCM_INSTR_PLACED; - - /* Phi nodes are our once source of back-edges. Since right now we are - * only doing scheduling within blocks, we don't need to worry about - * them since they are always at the top. Just skip them completely. - */ - if (instr->type == nir_instr_type_phi) { - assert(instr->pass_flags & GCM_INSTR_PINNED); - return; - } - - nir_foreach_ssa_def(instr, gcm_place_instr_def, state); - - if (instr->pass_flags & GCM_INSTR_PINNED) { - /* Pinned instructions have an implicit dependence on the pinned - * instructions that come after them in the block. Since the pinned - * instructions will naturally "chain" together, we only need to - * explicitly visit one of them. - */ - for (nir_instr *after = nir_instr_next(instr); - after; - after = nir_instr_next(after)) { - if (after->pass_flags & GCM_INSTR_PINNED) { - gcm_place_instr(after, state); - break; - } - } - } - - struct gcm_block_info *block_info = &state->blocks[instr->block->index]; - if (!(instr->pass_flags & GCM_INSTR_PINNED)) { - exec_node_remove(&instr->node); - - if (block_info->last_instr) { - exec_node_insert_node_before(&block_info->last_instr->node, - &instr->node); - } else { - /* Schedule it at the end of the block */ - nir_instr *jump_instr = nir_block_last_instr(instr->block); - if (jump_instr && jump_instr->type == nir_instr_type_jump) { - exec_node_insert_node_before(&jump_instr->node, &instr->node); - } else { - exec_list_push_tail(&instr->block->instr_list, &instr->node); - } - } - } - - block_info->last_instr = instr; -} - -static void -opt_gcm_impl(nir_function_impl *impl) -{ - struct gcm_state state; - - state.impl = impl; - state.instr = NULL; - exec_list_make_empty(&state.instrs); - state.blocks = rzalloc_array(NULL, struct gcm_block_info, impl->num_blocks); - - nir_metadata_require(impl, nir_metadata_block_index | - nir_metadata_dominance); - - gcm_build_block_info(&impl->body, &state, 0); - nir_foreach_block(impl, gcm_pin_instructions_block, &state); - - foreach_list_typed(nir_instr, instr, node, &state.instrs) - gcm_schedule_early_instr(instr, &state); - - foreach_list_typed(nir_instr, instr, node, &state.instrs) - gcm_schedule_late_instr(instr, &state); - - while (!exec_list_is_empty(&state.instrs)) { - nir_instr *instr = exec_node_data(nir_instr, - state.instrs.tail_pred, node); - gcm_place_instr(instr, &state); - } - - ralloc_free(state.blocks); -} - -void -nir_opt_gcm(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - opt_gcm_impl(function->impl); - } -} diff --git a/src/glsl/nir/nir_opt_global_to_local.c b/src/glsl/nir/nir_opt_global_to_local.c deleted file mode 100644 index bccb45b6237..00000000000 --- a/src/glsl/nir/nir_opt_global_to_local.c +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" - -static bool -global_to_local(nir_register *reg) -{ - nir_function_impl *impl = NULL; - - assert(reg->is_global); - - nir_foreach_def(reg, def_dest) { - nir_instr *instr = def_dest->reg.parent_instr; - nir_function_impl *instr_impl = - nir_cf_node_get_function(&instr->block->cf_node); - if (impl != NULL) { - if (impl != instr_impl) - return false; - } else { - impl = instr_impl; - } - } - - nir_foreach_use(reg, use_src) { - nir_instr *instr = use_src->parent_instr; - nir_function_impl *instr_impl = - nir_cf_node_get_function(&instr->block->cf_node); - if (impl != NULL) { - if (impl != instr_impl) - return false; - } else { - impl = instr_impl; - } - } - - nir_foreach_if_use(reg, use_src) { - nir_if *if_stmt = use_src->parent_if; - nir_function_impl *if_impl = nir_cf_node_get_function(&if_stmt->cf_node); - if (impl != NULL) { - if (impl != if_impl) - return false; - } else { - impl = if_impl; - } - } - - if (impl == NULL) { - /* this instruction is never used/defined, delete it */ - nir_reg_remove(reg); - return true; - } - - /* - * if we've gotten to this point, the register is always used/defined in - * the same implementation so we can move it to be local to that - * implementation. - */ - - exec_node_remove(®->node); - exec_list_push_tail(&impl->registers, ®->node); - reg->index = impl->reg_alloc++; - reg->is_global = false; - return true; -} - -bool -nir_opt_global_to_local(nir_shader *shader) -{ - bool progress = false; - - foreach_list_typed_safe(nir_register, reg, node, &shader->registers) { - if (global_to_local(reg)) - progress = true; - } - - return progress; -} diff --git a/src/glsl/nir/nir_opt_peephole_select.c b/src/glsl/nir/nir_opt_peephole_select.c deleted file mode 100644 index 0fc658df861..00000000000 --- a/src/glsl/nir/nir_opt_peephole_select.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" -#include "nir_control_flow.h" - -/* - * Implements a small peephole optimization that looks for - * - * if (cond) { - * - * } else { - * - * } - * phi - * ... - * phi - * - * and replaces it with a series of selects. It can also handle the case - * where, instead of being empty, the if may contain some move operations - * whose only use is one of the following phi nodes. This happens all the - * time when the SSA form comes from a conditional assignment with a - * swizzle. - */ - -struct peephole_select_state { - void *mem_ctx; - bool progress; -}; - -static bool -block_check_for_allowed_instrs(nir_block *block) -{ - nir_foreach_instr(block, instr) { - switch (instr->type) { - case nir_instr_type_intrinsic: { - nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); - - switch (intrin->intrinsic) { - case nir_intrinsic_load_var: - switch (intrin->variables[0]->var->data.mode) { - case nir_var_shader_in: - case nir_var_uniform: - break; - - default: - return false; - } - break; - - default: - return false; - } - - break; - } - - case nir_instr_type_load_const: - break; - - case nir_instr_type_alu: { - nir_alu_instr *mov = nir_instr_as_alu(instr); - switch (mov->op) { - case nir_op_fmov: - case nir_op_imov: - case nir_op_fneg: - case nir_op_ineg: - case nir_op_fabs: - case nir_op_iabs: - case nir_op_vec2: - case nir_op_vec3: - case nir_op_vec4: - /* It must be a move-like operation. */ - break; - default: - return false; - } - - /* Can't handle saturate */ - if (mov->dest.saturate) - return false; - - /* It must be SSA */ - if (!mov->dest.dest.is_ssa) - return false; - - /* It cannot have any if-uses */ - if (!list_empty(&mov->dest.dest.ssa.if_uses)) - return false; - - /* The only uses of this definition must be phi's in the successor */ - nir_foreach_use(&mov->dest.dest.ssa, use) { - if (use->parent_instr->type != nir_instr_type_phi || - use->parent_instr->block != block->successors[0]) - return false; - } - break; - } - - default: - return false; - } - } - - return true; -} - -static bool -nir_opt_peephole_select_block(nir_block *block, void *void_state) -{ - struct peephole_select_state *state = void_state; - - /* If the block is empty, then it certainly doesn't have any phi nodes, - * so we can skip it. This also ensures that we do an early skip on the - * end block of the function which isn't actually attached to the CFG. - */ - if (exec_list_is_empty(&block->instr_list)) - return true; - - if (nir_cf_node_is_first(&block->cf_node)) - return true; - - nir_cf_node *prev_node = nir_cf_node_prev(&block->cf_node); - if (prev_node->type != nir_cf_node_if) - return true; - - nir_if *if_stmt = nir_cf_node_as_if(prev_node); - nir_cf_node *then_node = nir_if_first_then_node(if_stmt); - nir_cf_node *else_node = nir_if_first_else_node(if_stmt); - - /* We can only have one block in each side ... */ - if (nir_if_last_then_node(if_stmt) != then_node || - nir_if_last_else_node(if_stmt) != else_node) - return true; - - nir_block *then_block = nir_cf_node_as_block(then_node); - nir_block *else_block = nir_cf_node_as_block(else_node); - - /* ... and those blocks must only contain "allowed" instructions. */ - if (!block_check_for_allowed_instrs(then_block) || - !block_check_for_allowed_instrs(else_block)) - return true; - - /* At this point, we know that the previous CFG node is an if-then - * statement containing only moves to phi nodes in this block. We can - * just remove that entire CF node and replace all of the phi nodes with - * selects. - */ - - nir_block *prev_block = nir_cf_node_as_block(nir_cf_node_prev(prev_node)); - assert(prev_block->cf_node.type == nir_cf_node_block); - - /* First, we move the remaining instructions from the blocks to the - * block before. We have already guaranteed that this is safe by - * calling block_check_for_allowed_instrs() - */ - nir_foreach_instr_safe(then_block, instr) { - exec_node_remove(&instr->node); - instr->block = prev_block; - exec_list_push_tail(&prev_block->instr_list, &instr->node); - } - - nir_foreach_instr_safe(else_block, instr) { - exec_node_remove(&instr->node); - instr->block = prev_block; - exec_list_push_tail(&prev_block->instr_list, &instr->node); - } - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - nir_alu_instr *sel = nir_alu_instr_create(state->mem_ctx, nir_op_bcsel); - nir_src_copy(&sel->src[0].src, &if_stmt->condition, sel); - /* Splat the condition to all channels */ - memset(sel->src[0].swizzle, 0, sizeof sel->src[0].swizzle); - - assert(exec_list_length(&phi->srcs) == 2); - nir_foreach_phi_src(phi, src) { - assert(src->pred == then_block || src->pred == else_block); - assert(src->src.is_ssa); - - unsigned idx = src->pred == then_block ? 1 : 2; - nir_src_copy(&sel->src[idx].src, &src->src, sel); - } - - nir_ssa_dest_init(&sel->instr, &sel->dest.dest, - phi->dest.ssa.num_components, phi->dest.ssa.name); - sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1; - - nir_ssa_def_rewrite_uses(&phi->dest.ssa, - nir_src_for_ssa(&sel->dest.dest.ssa)); - - nir_instr_insert_before(&phi->instr, &sel->instr); - nir_instr_remove(&phi->instr); - } - - nir_cf_node_remove(&if_stmt->cf_node); - state->progress = true; - - return true; -} - -static bool -nir_opt_peephole_select_impl(nir_function_impl *impl) -{ - struct peephole_select_state state; - - state.mem_ctx = ralloc_parent(impl); - state.progress = false; - - nir_foreach_block(impl, nir_opt_peephole_select_block, &state); - - if (state.progress) - nir_metadata_preserve(impl, nir_metadata_none); - - return state.progress; -} - -bool -nir_opt_peephole_select(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) - progress |= nir_opt_peephole_select_impl(function->impl); - } - - return progress; -} diff --git a/src/glsl/nir/nir_opt_remove_phis.c b/src/glsl/nir/nir_opt_remove_phis.c deleted file mode 100644 index 646183707bd..00000000000 --- a/src/glsl/nir/nir_opt_remove_phis.c +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright © 2015 Connor Abbott - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" - -/* - * This is a pass for removing phi nodes that look like: - * a = phi(b, b, b, ...) - * - * Note that we can't ignore undef sources here, or else we may create a - * situation where the definition of b isn't dominated by its uses. We're - * allowed to do this since the definition of b must dominate all of the - * phi node's predecessors, which means it must dominate the phi node as well - * as all of the phi node's uses. In essence, the phi node acts as a copy - * instruction. b can't be another phi node in the same block, since the only - * time when phi nodes can source other phi nodes defined in the same block is - * at the loop header, and in that case one of the sources of the phi has to - * be from before the loop and that source can't be b. - */ - -static bool -remove_phis_block(nir_block *block, void *state) -{ - bool *progress = state; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi = nir_instr_as_phi(instr); - - nir_ssa_def *def = NULL; - bool srcs_same = true; - - nir_foreach_phi_src(phi, src) { - assert(src->src.is_ssa); - - /* For phi nodes at the beginning of loops, we may encounter some - * sources from backedges that point back to the destination of the - * same phi, i.e. something like: - * - * a = phi(a, b, ...) - * - * We can safely ignore these sources, since if all of the normal - * sources point to the same definition, then that definition must - * still dominate the phi node, and the phi will still always take - * the value of that definition. - */ - if (src->src.ssa == &phi->dest.ssa) - continue; - - if (def == NULL) { - def = src->src.ssa; - } else { - if (src->src.ssa != def) { - srcs_same = false; - break; - } - } - } - - if (!srcs_same) - continue; - - /* We must have found at least one definition, since there must be at - * least one forward edge. - */ - assert(def != NULL); - - assert(phi->dest.is_ssa); - nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def)); - nir_instr_remove(instr); - - *progress = true; - } - - return true; -} - -static bool -remove_phis_impl(nir_function_impl *impl) -{ - bool progress = false; - - nir_foreach_block(impl, remove_phis_block, &progress); - - if (progress) { - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - } - - return progress; -} - -bool -nir_opt_remove_phis(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) - if (function->impl) - progress = remove_phis_impl(function->impl) || progress; - - return progress; -} - diff --git a/src/glsl/nir/nir_opt_undef.c b/src/glsl/nir/nir_opt_undef.c deleted file mode 100644 index 374564d34c5..00000000000 --- a/src/glsl/nir/nir_opt_undef.c +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright © 2015 Broadcom - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "nir.h" - -/** @file nir_opt_undef.c - * - * Handles optimization of operations involving ssa_undef. For now, we just - * make sure that csels between undef and some other value just give the other - * value (on the assumption that the condition's going to be choosing the - * defined value). This reduces work after if flattening when each side of - * the if is defining a variable. - * - * Some day, we may find some use for making other operations consuming an - * undef arg output undef, but I don't know of any cases currently. - */ - -static bool -opt_undef_alu(nir_alu_instr *instr) -{ - if (instr->op != nir_op_bcsel && instr->op != nir_op_fcsel) - return false; - - assert(instr->dest.dest.is_ssa); - - for (int i = 1; i <= 2; i++) { - if (!instr->src[i].src.is_ssa) - continue; - - nir_instr *parent = instr->src[i].src.ssa->parent_instr; - if (parent->type != nir_instr_type_ssa_undef) - continue; - - /* We can't just use nir_alu_src_copy, because we need the def/use - * updated. - */ - nir_instr_rewrite_src(&instr->instr, &instr->src[0].src, - instr->src[i == 1 ? 2 : 1].src); - nir_alu_src_copy(&instr->src[0], &instr->src[i == 1 ? 2 : 1], - ralloc_parent(instr)); - - nir_src empty_src; - memset(&empty_src, 0, sizeof(empty_src)); - nir_instr_rewrite_src(&instr->instr, &instr->src[1].src, empty_src); - nir_instr_rewrite_src(&instr->instr, &instr->src[2].src, empty_src); - instr->op = nir_op_imov; - - return true; - } - - return false; -} - -static bool -opt_undef_block(nir_block *block, void *data) -{ - bool *progress = data; - - nir_foreach_instr_safe(block, instr) { - if (instr->type == nir_instr_type_alu) - if (opt_undef_alu(nir_instr_as_alu(instr))) - (*progress) = true; - } - - return true; -} - -bool -nir_opt_undef(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) { - nir_foreach_block(function->impl, opt_undef_block, &progress); - if (progress) - nir_metadata_preserve(function->impl, - nir_metadata_block_index | - nir_metadata_dominance); - } - } - - return progress; -} diff --git a/src/glsl/nir/nir_print.c b/src/glsl/nir/nir_print.c deleted file mode 100644 index 48ecb48a620..00000000000 --- a/src/glsl/nir/nir_print.c +++ /dev/null @@ -1,1069 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" -#include "compiler/shader_enums.h" -#include -#include - -static void -print_tabs(unsigned num_tabs, FILE *fp) -{ - for (unsigned i = 0; i < num_tabs; i++) - fprintf(fp, "\t"); -} - -typedef struct { - FILE *fp; - nir_shader *shader; - /** map from nir_variable -> printable name */ - struct hash_table *ht; - - /** set of names used so far for nir_variables */ - struct set *syms; - - /* an index used to make new non-conflicting names */ - unsigned index; -} print_state; - -static void -print_register(nir_register *reg, print_state *state) -{ - FILE *fp = state->fp; - if (reg->name != NULL) - fprintf(fp, "/* %s */ ", reg->name); - if (reg->is_global) - fprintf(fp, "gr%u", reg->index); - else - fprintf(fp, "r%u", reg->index); -} - -static const char *sizes[] = { "error", "vec1", "vec2", "vec3", "vec4" }; - -static void -print_register_decl(nir_register *reg, print_state *state) -{ - FILE *fp = state->fp; - fprintf(fp, "decl_reg %s ", sizes[reg->num_components]); - if (reg->is_packed) - fprintf(fp, "(packed) "); - print_register(reg, state); - if (reg->num_array_elems != 0) - fprintf(fp, "[%u]", reg->num_array_elems); - fprintf(fp, "\n"); -} - -static void -print_ssa_def(nir_ssa_def *def, print_state *state) -{ - FILE *fp = state->fp; - if (def->name != NULL) - fprintf(fp, "/* %s */ ", def->name); - fprintf(fp, "%s ssa_%u", sizes[def->num_components], def->index); -} - -static void -print_ssa_use(nir_ssa_def *def, print_state *state) -{ - FILE *fp = state->fp; - if (def->name != NULL) - fprintf(fp, "/* %s */ ", def->name); - fprintf(fp, "ssa_%u", def->index); -} - -static void print_src(nir_src *src, print_state *state); - -static void -print_reg_src(nir_reg_src *src, print_state *state) -{ - FILE *fp = state->fp; - print_register(src->reg, state); - if (src->reg->num_array_elems != 0) { - fprintf(fp, "[%u", src->base_offset); - if (src->indirect != NULL) { - fprintf(fp, " + "); - print_src(src->indirect, state); - } - fprintf(fp, "]"); - } -} - -static void -print_reg_dest(nir_reg_dest *dest, print_state *state) -{ - FILE *fp = state->fp; - print_register(dest->reg, state); - if (dest->reg->num_array_elems != 0) { - fprintf(fp, "[%u", dest->base_offset); - if (dest->indirect != NULL) { - fprintf(fp, " + "); - print_src(dest->indirect, state); - } - fprintf(fp, "]"); - } -} - -static void -print_src(nir_src *src, print_state *state) -{ - if (src->is_ssa) - print_ssa_use(src->ssa, state); - else - print_reg_src(&src->reg, state); -} - -static void -print_dest(nir_dest *dest, print_state *state) -{ - if (dest->is_ssa) - print_ssa_def(&dest->ssa, state); - else - print_reg_dest(&dest->reg, state); -} - -static void -print_alu_src(nir_alu_instr *instr, unsigned src, print_state *state) -{ - FILE *fp = state->fp; - - if (instr->src[src].negate) - fprintf(fp, "-"); - if (instr->src[src].abs) - fprintf(fp, "abs("); - - print_src(&instr->src[src].src, state); - - bool print_swizzle = false; - for (unsigned i = 0; i < 4; i++) { - if (!nir_alu_instr_channel_used(instr, src, i)) - continue; - - if (instr->src[src].swizzle[i] != i) { - print_swizzle = true; - break; - } - } - - if (print_swizzle) { - fprintf(fp, "."); - for (unsigned i = 0; i < 4; i++) { - if (!nir_alu_instr_channel_used(instr, src, i)) - continue; - - fprintf(fp, "%c", "xyzw"[instr->src[src].swizzle[i]]); - } - } - - if (instr->src[src].abs) - fprintf(fp, ")"); -} - -static void -print_alu_dest(nir_alu_dest *dest, print_state *state) -{ - FILE *fp = state->fp; - /* we're going to print the saturate modifier later, after the opcode */ - - print_dest(&dest->dest, state); - - if (!dest->dest.is_ssa && - dest->write_mask != (1 << dest->dest.reg.reg->num_components) - 1) { - fprintf(fp, "."); - for (unsigned i = 0; i < 4; i++) - if ((dest->write_mask >> i) & 1) - fprintf(fp, "%c", "xyzw"[i]); - } -} - -static void -print_alu_instr(nir_alu_instr *instr, print_state *state) -{ - FILE *fp = state->fp; - - print_alu_dest(&instr->dest, state); - - fprintf(fp, " = %s", nir_op_infos[instr->op].name); - if (instr->dest.saturate) - fprintf(fp, ".sat"); - fprintf(fp, " "); - - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - if (i != 0) - fprintf(fp, ", "); - - print_alu_src(instr, i, state); - } -} - -static void -print_constant(nir_constant *c, const struct glsl_type *type, print_state *state) -{ - FILE *fp = state->fp; - unsigned total_elems = glsl_get_components(type); - unsigned i; - - switch (glsl_get_base_type(type)) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - for (i = 0; i < total_elems; i++) { - if (i > 0) fprintf(fp, ", "); - fprintf(fp, "0x%08x", c->value.u[i]); - } - break; - - case GLSL_TYPE_FLOAT: - for (i = 0; i < total_elems; i++) { - if (i > 0) fprintf(fp, ", "); - fprintf(fp, "%f", c->value.f[i]); - } - break; - - case GLSL_TYPE_STRUCT: - for (i = 0; i < c->num_elements; i++) { - if (i > 0) fprintf(fp, ", "); - fprintf(fp, "{ "); - print_constant(c->elements[i], glsl_get_struct_field(type, i), state); - fprintf(fp, " }"); - } - break; - - case GLSL_TYPE_ARRAY: - for (i = 0; i < c->num_elements; i++) { - if (i > 0) fprintf(fp, ", "); - fprintf(fp, "{ "); - print_constant(c->elements[i], glsl_get_array_element(type), state); - fprintf(fp, " }"); - } - break; - - default: - unreachable("not reached"); - } -} - -static void -print_var_decl(nir_variable *var, print_state *state) -{ - FILE *fp = state->fp; - - fprintf(fp, "decl_var "); - - const char *const cent = (var->data.centroid) ? "centroid " : ""; - const char *const samp = (var->data.sample) ? "sample " : ""; - const char *const patch = (var->data.patch) ? "patch " : ""; - const char *const inv = (var->data.invariant) ? "invariant " : ""; - const char *const mode[] = { "shader_in ", "shader_out ", "", "", - "uniform ", "shader_storage", "system " }; - - fprintf(fp, "%s%s%s%s%s%s ", - cent, samp, patch, inv, mode[var->data.mode], - glsl_interp_qualifier_name(var->data.interpolation)); - - glsl_print_type(var->type, fp); - - struct set_entry *entry = NULL; - if (state->syms) - entry = _mesa_set_search(state->syms, var->name); - - char *name; - - if (entry != NULL) { - /* we have a collision with another name, append an @ + a unique index */ - name = ralloc_asprintf(state->syms, "%s@%u", var->name, state->index++); - } else { - name = var->name; - } - - fprintf(fp, " %s", name); - - if (var->data.mode == nir_var_shader_in || - var->data.mode == nir_var_shader_out || - var->data.mode == nir_var_uniform || - var->data.mode == nir_var_shader_storage) { - const char *loc = NULL; - char buf[4]; - - switch (state->shader->stage) { - case MESA_SHADER_VERTEX: - if (var->data.mode == nir_var_shader_in) - loc = gl_vert_attrib_name(var->data.location); - else if (var->data.mode == nir_var_shader_out) - loc = gl_varying_slot_name(var->data.location); - break; - case MESA_SHADER_GEOMETRY: - if ((var->data.mode == nir_var_shader_in) || - (var->data.mode == nir_var_shader_out)) - loc = gl_varying_slot_name(var->data.location); - break; - case MESA_SHADER_FRAGMENT: - if (var->data.mode == nir_var_shader_in) - loc = gl_varying_slot_name(var->data.location); - else if (var->data.mode == nir_var_shader_out) - loc = gl_frag_result_name(var->data.location); - break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_COMPUTE: - default: - /* TODO */ - break; - } - - if (!loc) { - snprintf(buf, sizeof(buf), "%u", var->data.location); - loc = buf; - } - - fprintf(fp, " (%s, %u)", loc, var->data.driver_location); - } - - if (var->constant_initializer) { - fprintf(fp, " = { "); - print_constant(var->constant_initializer, var->type, state); - fprintf(fp, " }"); - } - - fprintf(fp, "\n"); - - if (state->syms) { - _mesa_set_add(state->syms, name); - _mesa_hash_table_insert(state->ht, var, name); - } -} - -static void -print_var(nir_variable *var, print_state *state) -{ - FILE *fp = state->fp; - const char *name; - if (state->ht) { - struct hash_entry *entry = _mesa_hash_table_search(state->ht, var); - - assert(entry != NULL); - name = entry->data; - } else { - name = var->name; - } - - fprintf(fp, "%s", name); -} - -static void -print_deref_var(nir_deref_var *deref, print_state *state) -{ - print_var(deref->var, state); -} - -static void -print_deref_array(nir_deref_array *deref, print_state *state) -{ - FILE *fp = state->fp; - fprintf(fp, "["); - switch (deref->deref_array_type) { - case nir_deref_array_type_direct: - fprintf(fp, "%u", deref->base_offset); - break; - case nir_deref_array_type_indirect: - if (deref->base_offset != 0) - fprintf(fp, "%u + ", deref->base_offset); - print_src(&deref->indirect, state); - break; - case nir_deref_array_type_wildcard: - fprintf(fp, "*"); - break; - } - fprintf(fp, "]"); -} - -static void -print_deref_struct(nir_deref_struct *deref, const struct glsl_type *parent_type, - print_state *state) -{ - FILE *fp = state->fp; - fprintf(fp, ".%s", glsl_get_struct_elem_name(parent_type, deref->index)); -} - -static void -print_deref(nir_deref_var *deref, print_state *state) -{ - nir_deref *tail = &deref->deref; - nir_deref *pretail = NULL; - while (tail != NULL) { - switch (tail->deref_type) { - case nir_deref_type_var: - assert(pretail == NULL); - assert(tail == &deref->deref); - print_deref_var(deref, state); - break; - - case nir_deref_type_array: - assert(pretail != NULL); - print_deref_array(nir_deref_as_array(tail), state); - break; - - case nir_deref_type_struct: - assert(pretail != NULL); - print_deref_struct(nir_deref_as_struct(tail), - pretail->type, state); - break; - - default: - unreachable("Invalid deref type"); - } - - pretail = tail; - tail = pretail->child; - } -} - -static void -print_intrinsic_instr(nir_intrinsic_instr *instr, print_state *state) -{ - unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; - FILE *fp = state->fp; - - if (nir_intrinsic_infos[instr->intrinsic].has_dest) { - print_dest(&instr->dest, state); - fprintf(fp, " = "); - } - - fprintf(fp, "intrinsic %s (", nir_intrinsic_infos[instr->intrinsic].name); - - for (unsigned i = 0; i < num_srcs; i++) { - if (i != 0) - fprintf(fp, ", "); - - print_src(&instr->src[i], state); - } - - fprintf(fp, ") ("); - - unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; - - for (unsigned i = 0; i < num_vars; i++) { - if (i != 0) - fprintf(fp, ", "); - - print_deref(instr->variables[i], state); - } - - fprintf(fp, ") ("); - - unsigned num_indices = nir_intrinsic_infos[instr->intrinsic].num_indices; - - for (unsigned i = 0; i < num_indices; i++) { - if (i != 0) - fprintf(fp, ", "); - - fprintf(fp, "%d", instr->const_index[i]); - } - - fprintf(fp, ")"); - - if (!state->shader) - return; - - struct exec_list *var_list = NULL; - - switch (instr->intrinsic) { - case nir_intrinsic_load_uniform: - var_list = &state->shader->uniforms; - break; - case nir_intrinsic_load_input: - case nir_intrinsic_load_per_vertex_input: - var_list = &state->shader->inputs; - break; - case nir_intrinsic_load_output: - case nir_intrinsic_store_output: - case nir_intrinsic_store_per_vertex_output: - var_list = &state->shader->outputs; - break; - default: - return; - } - - nir_foreach_variable(var, var_list) { - if ((var->data.driver_location == instr->const_index[0]) && - var->name) { - fprintf(fp, "\t/* %s */", var->name); - break; - } - } -} - -static void -print_tex_instr(nir_tex_instr *instr, print_state *state) -{ - FILE *fp = state->fp; - - print_dest(&instr->dest, state); - - fprintf(fp, " = "); - - switch (instr->op) { - case nir_texop_tex: - fprintf(fp, "tex "); - break; - case nir_texop_txb: - fprintf(fp, "txb "); - break; - case nir_texop_txl: - fprintf(fp, "txl "); - break; - case nir_texop_txd: - fprintf(fp, "txd "); - break; - case nir_texop_txf: - fprintf(fp, "txf "); - break; - case nir_texop_txf_ms: - fprintf(fp, "txf_ms "); - break; - case nir_texop_txs: - fprintf(fp, "txs "); - break; - case nir_texop_lod: - fprintf(fp, "lod "); - break; - case nir_texop_tg4: - fprintf(fp, "tg4 "); - break; - case nir_texop_query_levels: - fprintf(fp, "query_levels "); - break; - case nir_texop_texture_samples: - fprintf(fp, "texture_samples "); - break; - case nir_texop_samples_identical: - fprintf(fp, "samples_identical "); - break; - default: - unreachable("Invalid texture operation"); - break; - } - - for (unsigned i = 0; i < instr->num_srcs; i++) { - print_src(&instr->src[i].src, state); - - fprintf(fp, " "); - - switch(instr->src[i].src_type) { - case nir_tex_src_coord: - fprintf(fp, "(coord)"); - break; - case nir_tex_src_projector: - fprintf(fp, "(projector)"); - break; - case nir_tex_src_comparitor: - fprintf(fp, "(comparitor)"); - break; - case nir_tex_src_offset: - fprintf(fp, "(offset)"); - break; - case nir_tex_src_bias: - fprintf(fp, "(bias)"); - break; - case nir_tex_src_lod: - fprintf(fp, "(lod)"); - break; - case nir_tex_src_ms_index: - fprintf(fp, "(ms_index)"); - break; - case nir_tex_src_ddx: - fprintf(fp, "(ddx)"); - break; - case nir_tex_src_ddy: - fprintf(fp, "(ddy)"); - break; - case nir_tex_src_sampler_offset: - fprintf(fp, "(sampler_offset)"); - break; - - default: - unreachable("Invalid texture source type"); - break; - } - - fprintf(fp, ", "); - } - - bool has_nonzero_offset = false; - for (unsigned i = 0; i < 4; i++) { - if (instr->const_offset[i] != 0) { - has_nonzero_offset = true; - break; - } - } - - if (has_nonzero_offset) { - fprintf(fp, "[%i %i %i %i] (offset), ", - instr->const_offset[0], instr->const_offset[1], - instr->const_offset[2], instr->const_offset[3]); - } - - if (instr->op == nir_texop_tg4) { - fprintf(fp, "%u (gather_component), ", instr->component); - } - - if (instr->sampler) { - print_deref(instr->sampler, state); - } else { - fprintf(fp, "%u", instr->sampler_index); - } - - fprintf(fp, " (sampler)"); -} - -static void -print_call_instr(nir_call_instr *instr, print_state *state) -{ - FILE *fp = state->fp; - - fprintf(fp, "call %s ", instr->callee->name); - - for (unsigned i = 0; i < instr->num_params; i++) { - if (i != 0) - fprintf(fp, ", "); - - print_deref(instr->params[i], state); - } - - if (instr->return_deref != NULL) { - if (instr->num_params != 0) - fprintf(fp, ", "); - fprintf(fp, "returning "); - print_deref(instr->return_deref, state); - } -} - -static void -print_load_const_instr(nir_load_const_instr *instr, print_state *state) -{ - FILE *fp = state->fp; - - print_ssa_def(&instr->def, state); - - fprintf(fp, " = load_const ("); - - for (unsigned i = 0; i < instr->def.num_components; i++) { - if (i != 0) - fprintf(fp, ", "); - - /* - * we don't really know the type of the constant (if it will be used as a - * float or an int), so just print the raw constant in hex for fidelity - * and then print the float in a comment for readability. - */ - - fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]); - } - - fprintf(fp, ")"); -} - -static void -print_jump_instr(nir_jump_instr *instr, print_state *state) -{ - FILE *fp = state->fp; - - switch (instr->type) { - case nir_jump_break: - fprintf(fp, "break"); - break; - - case nir_jump_continue: - fprintf(fp, "continue"); - break; - - case nir_jump_return: - fprintf(fp, "return"); - break; - } -} - -static void -print_ssa_undef_instr(nir_ssa_undef_instr* instr, print_state *state) -{ - FILE *fp = state->fp; - print_ssa_def(&instr->def, state); - fprintf(fp, " = undefined"); -} - -static void -print_phi_instr(nir_phi_instr *instr, print_state *state) -{ - FILE *fp = state->fp; - print_dest(&instr->dest, state); - fprintf(fp, " = phi "); - nir_foreach_phi_src(instr, src) { - if (&src->node != exec_list_get_head(&instr->srcs)) - fprintf(fp, ", "); - - fprintf(fp, "block_%u: ", src->pred->index); - print_src(&src->src, state); - } -} - -static void -print_parallel_copy_instr(nir_parallel_copy_instr *instr, print_state *state) -{ - FILE *fp = state->fp; - nir_foreach_parallel_copy_entry(instr, entry) { - if (&entry->node != exec_list_get_head(&instr->entries)) - fprintf(fp, "; "); - - print_dest(&entry->dest, state); - fprintf(fp, " = "); - print_src(&entry->src, state); - } -} - -static void -print_instr(const nir_instr *instr, print_state *state, unsigned tabs) -{ - FILE *fp = state->fp; - print_tabs(tabs, fp); - - switch (instr->type) { - case nir_instr_type_alu: - print_alu_instr(nir_instr_as_alu(instr), state); - break; - - case nir_instr_type_call: - print_call_instr(nir_instr_as_call(instr), state); - break; - - case nir_instr_type_intrinsic: - print_intrinsic_instr(nir_instr_as_intrinsic(instr), state); - break; - - case nir_instr_type_tex: - print_tex_instr(nir_instr_as_tex(instr), state); - break; - - case nir_instr_type_load_const: - print_load_const_instr(nir_instr_as_load_const(instr), state); - break; - - case nir_instr_type_jump: - print_jump_instr(nir_instr_as_jump(instr), state); - break; - - case nir_instr_type_ssa_undef: - print_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state); - break; - - case nir_instr_type_phi: - print_phi_instr(nir_instr_as_phi(instr), state); - break; - - case nir_instr_type_parallel_copy: - print_parallel_copy_instr(nir_instr_as_parallel_copy(instr), state); - break; - - default: - unreachable("Invalid instruction type"); - break; - } -} - -static int -compare_block_index(const void *p1, const void *p2) -{ - const nir_block *block1 = *((const nir_block **) p1); - const nir_block *block2 = *((const nir_block **) p2); - - return (int) block1->index - (int) block2->index; -} - -static void print_cf_node(nir_cf_node *node, print_state *state, - unsigned tabs); - -static void -print_block(nir_block *block, print_state *state, unsigned tabs) -{ - FILE *fp = state->fp; - - print_tabs(tabs, fp); - fprintf(fp, "block block_%u:\n", block->index); - - /* sort the predecessors by index so we consistently print the same thing */ - - nir_block **preds = - malloc(block->predecessors->entries * sizeof(nir_block *)); - - struct set_entry *entry; - unsigned i = 0; - set_foreach(block->predecessors, entry) { - preds[i++] = (nir_block *) entry->key; - } - - qsort(preds, block->predecessors->entries, sizeof(nir_block *), - compare_block_index); - - print_tabs(tabs, fp); - fprintf(fp, "/* preds: "); - for (unsigned i = 0; i < block->predecessors->entries; i++) { - fprintf(fp, "block_%u ", preds[i]->index); - } - fprintf(fp, "*/\n"); - - free(preds); - - nir_foreach_instr(block, instr) { - print_instr(instr, state, tabs); - fprintf(fp, "\n"); - } - - print_tabs(tabs, fp); - fprintf(fp, "/* succs: "); - for (unsigned i = 0; i < 2; i++) - if (block->successors[i]) { - fprintf(fp, "block_%u ", block->successors[i]->index); - } - fprintf(fp, "*/\n"); -} - -static void -print_if(nir_if *if_stmt, print_state *state, unsigned tabs) -{ - FILE *fp = state->fp; - - print_tabs(tabs, fp); - fprintf(fp, "if "); - print_src(&if_stmt->condition, state); - fprintf(fp, " {\n"); - foreach_list_typed(nir_cf_node, node, node, &if_stmt->then_list) { - print_cf_node(node, state, tabs + 1); - } - print_tabs(tabs, fp); - fprintf(fp, "} else {\n"); - foreach_list_typed(nir_cf_node, node, node, &if_stmt->else_list) { - print_cf_node(node, state, tabs + 1); - } - print_tabs(tabs, fp); - fprintf(fp, "}\n"); -} - -static void -print_loop(nir_loop *loop, print_state *state, unsigned tabs) -{ - FILE *fp = state->fp; - - print_tabs(tabs, fp); - fprintf(fp, "loop {\n"); - foreach_list_typed(nir_cf_node, node, node, &loop->body) { - print_cf_node(node, state, tabs + 1); - } - print_tabs(tabs, fp); - fprintf(fp, "}\n"); -} - -static void -print_cf_node(nir_cf_node *node, print_state *state, unsigned int tabs) -{ - switch (node->type) { - case nir_cf_node_block: - print_block(nir_cf_node_as_block(node), state, tabs); - break; - - case nir_cf_node_if: - print_if(nir_cf_node_as_if(node), state, tabs); - break; - - case nir_cf_node_loop: - print_loop(nir_cf_node_as_loop(node), state, tabs); - break; - - default: - unreachable("Invalid CFG node type"); - } -} - -static void -print_function_impl(nir_function_impl *impl, print_state *state) -{ - FILE *fp = state->fp; - - fprintf(fp, "\nimpl %s ", impl->function->name); - - for (unsigned i = 0; i < impl->num_params; i++) { - if (i != 0) - fprintf(fp, ", "); - - print_var(impl->params[i], state); - } - - if (impl->return_var != NULL) { - if (impl->num_params != 0) - fprintf(fp, ", "); - fprintf(fp, "returning "); - print_var(impl->return_var, state); - } - - fprintf(fp, "{\n"); - - nir_foreach_variable(var, &impl->locals) { - fprintf(fp, "\t"); - print_var_decl(var, state); - } - - foreach_list_typed(nir_register, reg, node, &impl->registers) { - fprintf(fp, "\t"); - print_register_decl(reg, state); - } - - nir_index_blocks(impl); - - foreach_list_typed(nir_cf_node, node, node, &impl->body) { - print_cf_node(node, state, 1); - } - - fprintf(fp, "\tblock block_%u:\n}\n\n", impl->end_block->index); -} - -static void -print_function(nir_function *function, print_state *state) -{ - FILE *fp = state->fp; - - fprintf(fp, "decl_function %s ", function->name); - - for (unsigned i = 0; i < function->num_params; i++) { - if (i != 0) - fprintf(fp, ", "); - - switch (function->params[i].param_type) { - case nir_parameter_in: - fprintf(fp, "in "); - break; - case nir_parameter_out: - fprintf(fp, "out "); - break; - case nir_parameter_inout: - fprintf(fp, "inout "); - break; - default: - unreachable("Invalid parameter type"); - } - - glsl_print_type(function->params[i].type, fp); - } - - if (function->return_type != NULL) { - if (function->num_params != 0) - fprintf(fp, ", "); - fprintf(fp, "returning "); - glsl_print_type(function->return_type, fp); - } - - fprintf(fp, "\n"); - - if (function->impl != NULL) { - print_function_impl(function->impl, state); - return; - } -} - -static void -init_print_state(print_state *state, nir_shader *shader, FILE *fp) -{ - state->fp = fp; - state->shader = shader; - state->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - state->syms = _mesa_set_create(NULL, _mesa_key_hash_string, - _mesa_key_string_equal); - state->index = 0; -} - -static void -destroy_print_state(print_state *state) -{ - _mesa_hash_table_destroy(state->ht, NULL); - _mesa_set_destroy(state->syms, NULL); -} - -void -nir_print_shader(nir_shader *shader, FILE *fp) -{ - print_state state; - init_print_state(&state, shader, fp); - - fprintf(fp, "shader: %s\n", gl_shader_stage_name(shader->stage)); - - if (shader->info.name) - fprintf(fp, "name: %s\n", shader->info.name); - - if (shader->info.label) - fprintf(fp, "label: %s\n", shader->info.label); - - fprintf(fp, "inputs: %u\n", shader->num_inputs); - fprintf(fp, "outputs: %u\n", shader->num_outputs); - fprintf(fp, "uniforms: %u\n", shader->num_uniforms); - - nir_foreach_variable(var, &shader->uniforms) { - print_var_decl(var, &state); - } - - nir_foreach_variable(var, &shader->inputs) { - print_var_decl(var, &state); - } - - nir_foreach_variable(var, &shader->outputs) { - print_var_decl(var, &state); - } - - nir_foreach_variable(var, &shader->globals) { - print_var_decl(var, &state); - } - - nir_foreach_variable(var, &shader->system_values) { - print_var_decl(var, &state); - } - - foreach_list_typed(nir_register, reg, node, &shader->registers) { - print_register_decl(reg, &state); - } - - foreach_list_typed(nir_function, func, node, &shader->functions) { - print_function(func, &state); - } - - destroy_print_state(&state); -} - -void -nir_print_instr(const nir_instr *instr, FILE *fp) -{ - print_state state = { - .fp = fp, - }; - print_instr(instr, &state, 0); - -} diff --git a/src/glsl/nir/nir_remove_dead_variables.c b/src/glsl/nir/nir_remove_dead_variables.c deleted file mode 100644 index db754e56b1c..00000000000 --- a/src/glsl/nir/nir_remove_dead_variables.c +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" - -static void -add_var_use_intrinsic(nir_intrinsic_instr *instr, struct set *live) -{ - unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; - for (unsigned i = 0; i < num_vars; i++) { - nir_variable *var = instr->variables[i]->var; - _mesa_set_add(live, var); - } -} - -static void -add_var_use_call(nir_call_instr *instr, struct set *live) -{ - if (instr->return_deref != NULL) { - nir_variable *var = instr->return_deref->var; - _mesa_set_add(live, var); - } - - for (unsigned i = 0; i < instr->num_params; i++) { - nir_variable *var = instr->params[i]->var; - _mesa_set_add(live, var); - } -} - -static void -add_var_use_tex(nir_tex_instr *instr, struct set *live) -{ - if (instr->sampler != NULL) { - nir_variable *var = instr->sampler->var; - _mesa_set_add(live, var); - } -} - -static bool -add_var_use_block(nir_block *block, void *state) -{ - struct set *live = state; - - nir_foreach_instr(block, instr) { - switch(instr->type) { - case nir_instr_type_intrinsic: - add_var_use_intrinsic(nir_instr_as_intrinsic(instr), live); - break; - - case nir_instr_type_call: - add_var_use_call(nir_instr_as_call(instr), live); - break; - - case nir_instr_type_tex: - add_var_use_tex(nir_instr_as_tex(instr), live); - break; - - default: - break; - } - } - - return true; -} - -static void -add_var_use_shader(nir_shader *shader, struct set *live) -{ - nir_foreach_function(shader, function) { - if (function->impl) { - nir_foreach_block(function->impl, add_var_use_block, live); - } - } -} - -static bool -remove_dead_vars(struct exec_list *var_list, struct set *live) -{ - bool progress = false; - - foreach_list_typed_safe(nir_variable, var, node, var_list) { - struct set_entry *entry = _mesa_set_search(live, var); - if (entry == NULL) { - exec_node_remove(&var->node); - ralloc_free(var); - progress = true; - } - } - - return progress; -} - -bool -nir_remove_dead_variables(nir_shader *shader) -{ - bool progress = false; - struct set *live = - _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - - add_var_use_shader(shader, live); - - progress = remove_dead_vars(&shader->globals, live) || progress; - - nir_foreach_function(shader, function) { - if (function->impl) { - if (remove_dead_vars(&function->impl->locals, live)) { - nir_metadata_preserve(function->impl, nir_metadata_block_index | - nir_metadata_dominance | - nir_metadata_live_ssa_defs); - progress = true; - } - } - } - - _mesa_set_destroy(live, NULL); - return progress; -} diff --git a/src/glsl/nir/nir_search.c b/src/glsl/nir/nir_search.c deleted file mode 100644 index 56d7e8162f3..00000000000 --- a/src/glsl/nir/nir_search.c +++ /dev/null @@ -1,379 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir_search.h" - -struct match_state { - unsigned variables_seen; - nir_alu_src variables[NIR_SEARCH_MAX_VARIABLES]; -}; - -static bool -match_expression(const nir_search_expression *expr, nir_alu_instr *instr, - unsigned num_components, const uint8_t *swizzle, - struct match_state *state); - -static const uint8_t identity_swizzle[] = { 0, 1, 2, 3 }; - -static bool alu_instr_is_bool(nir_alu_instr *instr); - -static bool -src_is_bool(nir_src src) -{ - if (!src.is_ssa) - return false; - if (src.ssa->parent_instr->type != nir_instr_type_alu) - return false; - return alu_instr_is_bool(nir_instr_as_alu(src.ssa->parent_instr)); -} - -static bool -alu_instr_is_bool(nir_alu_instr *instr) -{ - switch (instr->op) { - case nir_op_iand: - case nir_op_ior: - case nir_op_ixor: - return src_is_bool(instr->src[0].src) && src_is_bool(instr->src[1].src); - case nir_op_inot: - return src_is_bool(instr->src[0].src); - default: - return nir_op_infos[instr->op].output_type == nir_type_bool; - } -} - -static bool -match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src, - unsigned num_components, const uint8_t *swizzle, - struct match_state *state) -{ - uint8_t new_swizzle[4]; - - /* If the source is an explicitly sized source, then we need to reset - * both the number of components and the swizzle. - */ - if (nir_op_infos[instr->op].input_sizes[src] != 0) { - num_components = nir_op_infos[instr->op].input_sizes[src]; - swizzle = identity_swizzle; - } - - for (unsigned i = 0; i < num_components; ++i) - new_swizzle[i] = instr->src[src].swizzle[swizzle[i]]; - - switch (value->type) { - case nir_search_value_expression: - if (!instr->src[src].src.is_ssa) - return false; - - if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu) - return false; - - return match_expression(nir_search_value_as_expression(value), - nir_instr_as_alu(instr->src[src].src.ssa->parent_instr), - num_components, new_swizzle, state); - - case nir_search_value_variable: { - nir_search_variable *var = nir_search_value_as_variable(value); - assert(var->variable < NIR_SEARCH_MAX_VARIABLES); - - if (state->variables_seen & (1 << var->variable)) { - if (!nir_srcs_equal(state->variables[var->variable].src, - instr->src[src].src)) - return false; - - assert(!instr->src[src].abs && !instr->src[src].negate); - - for (unsigned i = 0; i < num_components; ++i) { - if (state->variables[var->variable].swizzle[i] != new_swizzle[i]) - return false; - } - - return true; - } else { - if (var->is_constant && - instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const) - return false; - - if (var->type != nir_type_invalid) { - if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_alu) - return false; - - nir_alu_instr *src_alu = - nir_instr_as_alu(instr->src[src].src.ssa->parent_instr); - - if (nir_op_infos[src_alu->op].output_type != var->type && - !(var->type == nir_type_bool && alu_instr_is_bool(src_alu))) - return false; - } - - state->variables_seen |= (1 << var->variable); - state->variables[var->variable].src = instr->src[src].src; - state->variables[var->variable].abs = false; - state->variables[var->variable].negate = false; - - for (unsigned i = 0; i < 4; ++i) { - if (i < num_components) - state->variables[var->variable].swizzle[i] = new_swizzle[i]; - else - state->variables[var->variable].swizzle[i] = 0; - } - - return true; - } - } - - case nir_search_value_constant: { - nir_search_constant *const_val = nir_search_value_as_constant(value); - - if (!instr->src[src].src.is_ssa) - return false; - - if (instr->src[src].src.ssa->parent_instr->type != nir_instr_type_load_const) - return false; - - nir_load_const_instr *load = - nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr); - - switch (nir_op_infos[instr->op].input_types[src]) { - case nir_type_float: - for (unsigned i = 0; i < num_components; ++i) { - if (load->value.f[new_swizzle[i]] != const_val->data.f) - return false; - } - return true; - case nir_type_int: - case nir_type_uint: - case nir_type_bool: - for (unsigned i = 0; i < num_components; ++i) { - if (load->value.i[new_swizzle[i]] != const_val->data.i) - return false; - } - return true; - default: - unreachable("Invalid alu source type"); - } - } - - default: - unreachable("Invalid search value type"); - } -} - -static bool -match_expression(const nir_search_expression *expr, nir_alu_instr *instr, - unsigned num_components, const uint8_t *swizzle, - struct match_state *state) -{ - if (instr->op != expr->opcode) - return false; - - assert(!instr->dest.saturate); - assert(nir_op_infos[instr->op].num_inputs > 0); - - /* If we have an explicitly sized destination, we can only handle the - * identity swizzle. While dot(vec3(a, b, c).zxy) is a valid - * expression, we don't have the information right now to propagate that - * swizzle through. We can only properly propagate swizzles if the - * instruction is vectorized. - */ - if (nir_op_infos[instr->op].output_size != 0) { - for (unsigned i = 0; i < num_components; i++) { - if (swizzle[i] != i) - return false; - } - } - - /* Stash off the current variables_seen bitmask. This way we can - * restore it prior to matching in the commutative case below. - */ - unsigned variables_seen_stash = state->variables_seen; - - bool matched = true; - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - if (!match_value(expr->srcs[i], instr, i, num_components, - swizzle, state)) { - matched = false; - break; - } - } - - if (matched) - return true; - - if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) { - assert(nir_op_infos[instr->op].num_inputs == 2); - - /* Restore the variables_seen bitmask. If we don't do this, then we - * could end up with an erroneous failure due to variables found in the - * first match attempt above not matching those in the second. - */ - state->variables_seen = variables_seen_stash; - - if (!match_value(expr->srcs[0], instr, 1, num_components, - swizzle, state)) - return false; - - return match_value(expr->srcs[1], instr, 0, num_components, - swizzle, state); - } else { - return false; - } -} - -static nir_alu_src -construct_value(const nir_search_value *value, nir_alu_type type, - unsigned num_components, struct match_state *state, - nir_instr *instr, void *mem_ctx) -{ - switch (value->type) { - case nir_search_value_expression: { - const nir_search_expression *expr = nir_search_value_as_expression(value); - - if (nir_op_infos[expr->opcode].output_size != 0) - num_components = nir_op_infos[expr->opcode].output_size; - - nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode); - nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL); - alu->dest.write_mask = (1 << num_components) - 1; - alu->dest.saturate = false; - - for (unsigned i = 0; i < nir_op_infos[expr->opcode].num_inputs; i++) { - /* If the source is an explicitly sized source, then we need to reset - * the number of components to match. - */ - if (nir_op_infos[alu->op].input_sizes[i] != 0) - num_components = nir_op_infos[alu->op].input_sizes[i]; - - alu->src[i] = construct_value(expr->srcs[i], - nir_op_infos[alu->op].input_types[i], - num_components, - state, instr, mem_ctx); - } - - nir_instr_insert_before(instr, &alu->instr); - - nir_alu_src val; - val.src = nir_src_for_ssa(&alu->dest.dest.ssa); - val.negate = false; - val.abs = false, - memcpy(val.swizzle, identity_swizzle, sizeof val.swizzle); - - return val; - } - - case nir_search_value_variable: { - const nir_search_variable *var = nir_search_value_as_variable(value); - assert(state->variables_seen & (1 << var->variable)); - - nir_alu_src val = { NIR_SRC_INIT }; - nir_alu_src_copy(&val, &state->variables[var->variable], mem_ctx); - - assert(!var->is_constant); - - return val; - } - - case nir_search_value_constant: { - const nir_search_constant *c = nir_search_value_as_constant(value); - nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1); - - switch (type) { - case nir_type_float: - load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f); - load->value.f[0] = c->data.f; - break; - case nir_type_int: - load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i); - load->value.i[0] = c->data.i; - break; - case nir_type_uint: - case nir_type_bool: - load->value.u[0] = c->data.u; - break; - default: - unreachable("Invalid alu source type"); - } - - nir_instr_insert_before(instr, &load->instr); - - nir_alu_src val; - val.src = nir_src_for_ssa(&load->def); - val.negate = false; - val.abs = false, - memset(val.swizzle, 0, sizeof val.swizzle); - - return val; - } - - default: - unreachable("Invalid search value type"); - } -} - -nir_alu_instr * -nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search, - const nir_search_value *replace, void *mem_ctx) -{ - uint8_t swizzle[4] = { 0, 0, 0, 0 }; - - for (unsigned i = 0; i < instr->dest.dest.ssa.num_components; ++i) - swizzle[i] = i; - - assert(instr->dest.dest.is_ssa); - - struct match_state state; - state.variables_seen = 0; - - if (!match_expression(search, instr, instr->dest.dest.ssa.num_components, - swizzle, &state)) - return NULL; - - /* Inserting a mov may be unnecessary. However, it's much easier to - * simply let copy propagation clean this up than to try to go through - * and rewrite swizzles ourselves. - */ - nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov); - mov->dest.write_mask = instr->dest.write_mask; - nir_ssa_dest_init(&mov->instr, &mov->dest.dest, - instr->dest.dest.ssa.num_components, NULL); - - mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type, - instr->dest.dest.ssa.num_components, &state, - &instr->instr, mem_ctx); - nir_instr_insert_before(&instr->instr, &mov->instr); - - nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa, - nir_src_for_ssa(&mov->dest.dest.ssa)); - - /* We know this one has no more uses because we just rewrote them all, - * so we can remove it. The rest of the matched expression, however, we - * don't know so much about. We'll just let dead code clean them up. - */ - nir_instr_remove(&instr->instr); - - return mov; -} diff --git a/src/glsl/nir/nir_search.h b/src/glsl/nir/nir_search.h deleted file mode 100644 index 7d47792945e..00000000000 --- a/src/glsl/nir/nir_search.h +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#ifndef _NIR_SEARCH_ -#define _NIR_SEARCH_ - -#include "nir.h" - -#define NIR_SEARCH_MAX_VARIABLES 16 - -typedef enum { - nir_search_value_expression, - nir_search_value_variable, - nir_search_value_constant, -} nir_search_value_type; - -typedef struct { - nir_search_value_type type; -} nir_search_value; - -typedef struct { - nir_search_value value; - - /** The variable index; Must be less than NIR_SEARCH_MAX_VARIABLES */ - unsigned variable; - - /** Indicates that the given variable must be a constant - * - * This is only alloed in search expressions and indicates that the - * given variable is only allowed to match constant values. - */ - bool is_constant; - - /** Indicates that the given variable must have a certain type - * - * This is only allowed in search expressions and indicates that the - * given variable is only allowed to match values that come from an ALU - * instruction with the given output type. A type of nir_type_void - * means it can match any type. - * - * Note: A variable that is both constant and has a non-void type will - * never match anything. - */ - nir_alu_type type; -} nir_search_variable; - -typedef struct { - nir_search_value value; - - union { - uint32_t u; - int32_t i; - float f; - } data; -} nir_search_constant; - -typedef struct { - nir_search_value value; - - nir_op opcode; - const nir_search_value *srcs[4]; -} nir_search_expression; - -NIR_DEFINE_CAST(nir_search_value_as_variable, nir_search_value, - nir_search_variable, value) -NIR_DEFINE_CAST(nir_search_value_as_constant, nir_search_value, - nir_search_constant, value) -NIR_DEFINE_CAST(nir_search_value_as_expression, nir_search_value, - nir_search_expression, value) - -nir_alu_instr * -nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search, - const nir_search_value *replace, void *mem_ctx); - -#endif /* _NIR_SEARCH_ */ diff --git a/src/glsl/nir/nir_split_var_copies.c b/src/glsl/nir/nir_split_var_copies.c deleted file mode 100644 index 6fdaefa32c8..00000000000 --- a/src/glsl/nir/nir_split_var_copies.c +++ /dev/null @@ -1,285 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir.h" - -/* - * Implements "copy splitting" which is similar to structure splitting only - * it works on copy operations rather than the datatypes themselves. The - * GLSL language allows you to copy one variable to another an entire - * structure (which may contain arrays or other structures) at a time. - * Normally, in a language such as C this would be handled by a "structure - * splitting" pass that breaks up the structures. Unfortunately for us, - * structures used in inputs or outputs can't be split. Therefore, - * regardlesss of what we do, we have to be able to copy to/from - * structures. - * - * The primary purpose of structure splitting is to allow you to better - * optimize variable access and lower things to registers where you can. - * The primary issue here is that, if you lower the copy to a bunch of - * loads and stores, you loose a lot of information about the copy - * operation that you would like to keep around. To solve this problem, we - * have a "copy splitting" pass that, instead of splitting the structures - * or lowering the copy into loads and storres, splits the copy operation - * into a bunch of copy operations one for each leaf of the structure tree. - * If an intermediate array is encountered, it is referenced with a - * wildcard reference to indicate that the entire array is to be copied. - * - * As things become direct, array copies may be able to be losslessly - * lowered to having fewer and fewer wildcards. However, until that - * happens we want to keep the information about the arrays intact. - * - * Prior to the copy splitting pass, there are no wildcard references but - * there may be incomplete references where the tail of the deref chain is - * an array or a structure and not a specific element. After the copy - * splitting pass has completed, every variable deref will be a full-length - * dereference pointing to a single leaf in the structure type tree with - * possibly a few wildcard array dereferences. - */ - -struct split_var_copies_state { - void *mem_ctx; - void *dead_ctx; - bool progress; -}; - -/* Recursively constructs deref chains to split a copy instruction into - * multiple (if needed) copy instructions with full-length deref chains. - * External callers of this function should pass the tail and head of the - * deref chains found as the source and destination of the copy instruction - * into this function. - * - * \param old_copy The copy instruction we are splitting - * \param dest_head The head of the destination deref chain we are building - * \param src_head The head of the source deref chain we are building - * \param dest_tail The tail of the destination deref chain we are building - * \param src_tail The tail of the source deref chain we are building - * \param state The current split_var_copies_state object - */ -static void -split_var_copy_instr(nir_intrinsic_instr *old_copy, - nir_deref *dest_head, nir_deref *src_head, - nir_deref *dest_tail, nir_deref *src_tail, - struct split_var_copies_state *state) -{ - assert(src_tail->type == dest_tail->type); - - /* Make sure these really are the tails of the deref chains */ - assert(dest_tail->child == NULL); - assert(src_tail->child == NULL); - - switch (glsl_get_base_type(src_tail->type)) { - case GLSL_TYPE_ARRAY: { - /* Make a wildcard dereference */ - nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); - deref->deref.type = glsl_get_array_element(src_tail->type); - deref->deref_array_type = nir_deref_array_type_wildcard; - - /* Set the tail of both as the newly created wildcard deref. It is - * safe to use the same wildcard in both places because a) we will be - * copying it before we put it in an actual instruction and b) - * everything that will potentially add another link in the deref - * chain will also add the same thing to both chains. - */ - src_tail->child = &deref->deref; - dest_tail->child = &deref->deref; - - split_var_copy_instr(old_copy, dest_head, src_head, - dest_tail->child, src_tail->child, state); - - /* Set it back to the way we found it */ - src_tail->child = NULL; - dest_tail->child = NULL; - break; - } - - case GLSL_TYPE_STRUCT: - /* This is the only part that actually does any interesting - * splitting. For array types, we just use wildcards and resolve - * them later. For structure types, we need to emit one copy - * instruction for every structure element. Because we may have - * structs inside structs, we just recurse and let the next level - * take care of any additional structures. - */ - for (unsigned i = 0; i < glsl_get_length(src_tail->type); i++) { - nir_deref_struct *deref = nir_deref_struct_create(state->dead_ctx, i); - deref->deref.type = glsl_get_struct_field(src_tail->type, i); - - /* Set the tail of both as the newly created structure deref. It - * is safe to use the same wildcard in both places because a) we - * will be copying it before we put it in an actual instruction - * and b) everything that will potentially add another link in the - * deref chain will also add the same thing to both chains. - */ - src_tail->child = &deref->deref; - dest_tail->child = &deref->deref; - - split_var_copy_instr(old_copy, dest_head, src_head, - dest_tail->child, src_tail->child, state); - } - /* Set it back to the way we found it */ - src_tail->child = NULL; - dest_tail->child = NULL; - break; - - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(src_tail->type)) { - nir_deref_array *deref = nir_deref_array_create(state->dead_ctx); - deref->deref.type = glsl_get_column_type(src_tail->type); - deref->deref_array_type = nir_deref_array_type_wildcard; - - /* Set the tail of both as the newly created wildcard deref. It - * is safe to use the same wildcard in both places because a) we - * will be copying it before we put it in an actual instruction - * and b) everything that will potentially add another link in the - * deref chain will also add the same thing to both chains. - */ - src_tail->child = &deref->deref; - dest_tail->child = &deref->deref; - - split_var_copy_instr(old_copy, dest_head, src_head, - dest_tail->child, src_tail->child, state); - - /* Set it back to the way we found it */ - src_tail->child = NULL; - dest_tail->child = NULL; - } else { - /* At this point, we have fully built our deref chains and can - * actually add the new copy instruction. - */ - nir_intrinsic_instr *new_copy = - nir_intrinsic_instr_create(state->mem_ctx, nir_intrinsic_copy_var); - - /* We need to make copies because a) this deref chain actually - * belongs to the copy instruction and b) the deref chains may - * have some of the same links due to the way we constructed them - */ - nir_deref *src = nir_copy_deref(new_copy, src_head); - nir_deref *dest = nir_copy_deref(new_copy, dest_head); - - new_copy->variables[0] = nir_deref_as_var(dest); - new_copy->variables[1] = nir_deref_as_var(src); - - /* Emit the copy instruction after the old instruction. We'll - * remove the old one later. - */ - nir_instr_insert_after(&old_copy->instr, &new_copy->instr); - state->progress = true; - } - break; - - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_INTERFACE: - default: - unreachable("Cannot copy these types"); - } -} - -static bool -split_var_copies_block(nir_block *block, void *void_state) -{ - struct split_var_copies_state *state = void_state; - - nir_foreach_instr_safe(block, instr) { - if (instr->type != nir_instr_type_intrinsic) - continue; - - nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); - if (intrinsic->intrinsic != nir_intrinsic_copy_var) - continue; - - nir_deref *dest_head = &intrinsic->variables[0]->deref; - nir_deref *src_head = &intrinsic->variables[1]->deref; - nir_deref *dest_tail = nir_deref_tail(dest_head); - nir_deref *src_tail = nir_deref_tail(src_head); - - switch (glsl_get_base_type(src_tail->type)) { - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_STRUCT: - split_var_copy_instr(intrinsic, dest_head, src_head, - dest_tail, src_tail, state); - nir_instr_remove(&intrinsic->instr); - ralloc_steal(state->dead_ctx, instr); - break; - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_INT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_BOOL: - if (glsl_type_is_matrix(src_tail->type)) { - split_var_copy_instr(intrinsic, dest_head, src_head, - dest_tail, src_tail, state); - nir_instr_remove(&intrinsic->instr); - ralloc_steal(state->dead_ctx, instr); - } - break; - default: - unreachable("Invalid type"); - break; - } - } - - return true; -} - -static bool -split_var_copies_impl(nir_function_impl *impl) -{ - struct split_var_copies_state state; - - state.mem_ctx = ralloc_parent(impl); - state.dead_ctx = ralloc_context(NULL); - state.progress = false; - - nir_foreach_block(impl, split_var_copies_block, &state); - - ralloc_free(state.dead_ctx); - - if (state.progress) { - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - } - - return state.progress; -} - -bool -nir_split_var_copies(nir_shader *shader) -{ - bool progress = false; - - nir_foreach_function(shader, function) { - if (function->impl) - progress = split_var_copies_impl(function->impl) || progress; - } - - return progress; -} diff --git a/src/glsl/nir/nir_sweep.c b/src/glsl/nir/nir_sweep.c deleted file mode 100644 index 0710bdba7c7..00000000000 --- a/src/glsl/nir/nir_sweep.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "nir.h" - -/** - * \file nir_sweep.c - * - * The nir_sweep() pass performs a mark and sweep pass over a nir_shader's associated - * memory - anything still connected to the program will be kept, and any dead memory - * we dropped on the floor will be freed. - * - * The expectation is that drivers should call this when finished compiling the shader - * (after any optimization, lowering, and so on). However, it's also fine to call it - * earlier, and even many times, trading CPU cycles for memory savings. - */ - -#define steal_list(mem_ctx, type, list) \ - foreach_list_typed(type, obj, node, list) { ralloc_steal(mem_ctx, obj); } - -static void sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node); - -static bool -sweep_src_indirect(nir_src *src, void *nir) -{ - if (!src->is_ssa && src->reg.indirect) - ralloc_steal(nir, src->reg.indirect); - - return true; -} - -static bool -sweep_dest_indirect(nir_dest *dest, void *nir) -{ - if (!dest->is_ssa && dest->reg.indirect) - ralloc_steal(nir, dest->reg.indirect); - - return true; -} - -static void -sweep_block(nir_shader *nir, nir_block *block) -{ - ralloc_steal(nir, block); - - nir_foreach_instr(block, instr) { - ralloc_steal(nir, instr); - - nir_foreach_src(instr, sweep_src_indirect, nir); - nir_foreach_dest(instr, sweep_dest_indirect, nir); - } -} - -static void -sweep_if(nir_shader *nir, nir_if *iff) -{ - ralloc_steal(nir, iff); - - foreach_list_typed(nir_cf_node, cf_node, node, &iff->then_list) { - sweep_cf_node(nir, cf_node); - } - - foreach_list_typed(nir_cf_node, cf_node, node, &iff->else_list) { - sweep_cf_node(nir, cf_node); - } -} - -static void -sweep_loop(nir_shader *nir, nir_loop *loop) -{ - ralloc_steal(nir, loop); - - foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { - sweep_cf_node(nir, cf_node); - } -} - -static void -sweep_cf_node(nir_shader *nir, nir_cf_node *cf_node) -{ - switch (cf_node->type) { - case nir_cf_node_block: - sweep_block(nir, nir_cf_node_as_block(cf_node)); - break; - case nir_cf_node_if: - sweep_if(nir, nir_cf_node_as_if(cf_node)); - break; - case nir_cf_node_loop: - sweep_loop(nir, nir_cf_node_as_loop(cf_node)); - break; - default: - unreachable("Invalid CF node type"); - } -} - -static void -sweep_impl(nir_shader *nir, nir_function_impl *impl) -{ - ralloc_steal(nir, impl); - - ralloc_steal(nir, impl->params); - ralloc_steal(nir, impl->return_var); - steal_list(nir, nir_variable, &impl->locals); - steal_list(nir, nir_register, &impl->registers); - - foreach_list_typed(nir_cf_node, cf_node, node, &impl->body) { - sweep_cf_node(nir, cf_node); - } - - sweep_block(nir, impl->end_block); - - /* Wipe out all the metadata, if any. */ - nir_metadata_preserve(impl, nir_metadata_none); -} - -static void -sweep_function(nir_shader *nir, nir_function *f) -{ - ralloc_steal(nir, f); - ralloc_steal(nir, f->params); - - if (f->impl) - sweep_impl(nir, f->impl); -} - -void -nir_sweep(nir_shader *nir) -{ - void *rubbish = ralloc_context(NULL); - - /* First, move ownership of all the memory to a temporary context; assume dead. */ - ralloc_adopt(rubbish, nir); - - ralloc_steal(nir, (char *)nir->info.name); - if (nir->info.label) - ralloc_steal(nir, (char *)nir->info.label); - - /* Variables and registers are not dead. Steal them back. */ - steal_list(nir, nir_variable, &nir->uniforms); - steal_list(nir, nir_variable, &nir->inputs); - steal_list(nir, nir_variable, &nir->outputs); - steal_list(nir, nir_variable, &nir->globals); - steal_list(nir, nir_variable, &nir->system_values); - steal_list(nir, nir_register, &nir->registers); - - /* Recurse into functions, stealing their contents back. */ - foreach_list_typed(nir_function, func, node, &nir->functions) { - sweep_function(nir, func); - } - - /* Free everything we didn't steal back. */ - ralloc_free(rubbish); -} diff --git a/src/glsl/nir/nir_to_ssa.c b/src/glsl/nir/nir_to_ssa.c deleted file mode 100644 index 44a50547738..00000000000 --- a/src/glsl/nir/nir_to_ssa.c +++ /dev/null @@ -1,536 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" -#include -#include - -/* - * Implements the classic to-SSA algorithm described by Cytron et. al. in - * "Efficiently Computing Static Single Assignment Form and the Control - * Dependence Graph." - */ - -/* inserts a phi node of the form reg = phi(reg, reg, reg, ...) */ - -static void -insert_trivial_phi(nir_register *reg, nir_block *block, void *mem_ctx) -{ - nir_phi_instr *instr = nir_phi_instr_create(mem_ctx); - - instr->dest.reg.reg = reg; - struct set_entry *entry; - set_foreach(block->predecessors, entry) { - nir_block *pred = (nir_block *) entry->key; - - nir_phi_src *src = ralloc(instr, nir_phi_src); - src->pred = pred; - src->src.is_ssa = false; - src->src.reg.base_offset = 0; - src->src.reg.indirect = NULL; - src->src.reg.reg = reg; - exec_list_push_tail(&instr->srcs, &src->node); - } - - nir_instr_insert_before_block(block, &instr->instr); -} - -static void -insert_phi_nodes(nir_function_impl *impl) -{ - void *mem_ctx = ralloc_parent(impl); - - unsigned *work = calloc(impl->num_blocks, sizeof(unsigned)); - unsigned *has_already = calloc(impl->num_blocks, sizeof(unsigned)); - - /* - * Since the work flags already prevent us from inserting a node that has - * ever been inserted into W, we don't need to use a set to represent W. - * Also, since no block can ever be inserted into W more than once, we know - * that the maximum size of W is the number of basic blocks in the - * function. So all we need to handle W is an array and a pointer to the - * next element to be inserted and the next element to be removed. - */ - nir_block **W = malloc(impl->num_blocks * sizeof(nir_block *)); - unsigned w_start, w_end; - - unsigned iter_count = 0; - - nir_index_blocks(impl); - - foreach_list_typed(nir_register, reg, node, &impl->registers) { - if (reg->num_array_elems != 0) - continue; - - w_start = w_end = 0; - iter_count++; - - nir_foreach_def(reg, dest) { - nir_instr *def = dest->reg.parent_instr; - if (work[def->block->index] < iter_count) - W[w_end++] = def->block; - work[def->block->index] = iter_count; - } - - while (w_start != w_end) { - nir_block *cur = W[w_start++]; - struct set_entry *entry; - set_foreach(cur->dom_frontier, entry) { - nir_block *next = (nir_block *) entry->key; - - /* - * If there's more than one return statement, then the end block - * can be a join point for some definitions. However, there are - * no instructions in the end block, so nothing would use those - * phi nodes. Of course, we couldn't place those phi nodes - * anyways due to the restriction of having no instructions in the - * end block... - */ - if (next == impl->end_block) - continue; - - if (has_already[next->index] < iter_count) { - insert_trivial_phi(reg, next, mem_ctx); - has_already[next->index] = iter_count; - if (work[next->index] < iter_count) { - work[next->index] = iter_count; - W[w_end++] = next; - } - } - } - } - } - - free(work); - free(has_already); - free(W); -} - -typedef struct { - nir_ssa_def **stack; - int index; - unsigned num_defs; /** < used to add indices to debug names */ -#ifndef NDEBUG - unsigned stack_size; -#endif -} reg_state; - -typedef struct { - reg_state *states; - void *mem_ctx; - nir_instr *parent_instr; - nir_if *parent_if; - nir_function_impl *impl; - - /* map from SSA value -> original register */ - struct hash_table *ssa_map; -} rewrite_state; - -static nir_ssa_def *get_ssa_src(nir_register *reg, rewrite_state *state) -{ - unsigned index = reg->index; - - if (state->states[index].index == -1) { - /* - * We're using an undefined register, create a new undefined SSA value - * to preserve the information that this source is undefined - */ - nir_ssa_undef_instr *instr = - nir_ssa_undef_instr_create(state->mem_ctx, reg->num_components); - - /* - * We could just insert the undefined instruction before the instruction - * we're rewriting, but we could be rewriting a phi source in which case - * we can't do that, so do the next easiest thing - insert it at the - * beginning of the program. In the end, it doesn't really matter where - * the undefined instructions are because they're going to be ignored - * in the backend. - */ - nir_instr_insert_before_cf_list(&state->impl->body, &instr->instr); - return &instr->def; - } - - return state->states[index].stack[state->states[index].index]; -} - -static bool -rewrite_use(nir_src *src, void *_state) -{ - rewrite_state *state = (rewrite_state *) _state; - - if (src->is_ssa) - return true; - - unsigned index = src->reg.reg->index; - - if (state->states[index].stack == NULL) - return true; - - nir_ssa_def *def = get_ssa_src(src->reg.reg, state); - if (state->parent_instr) - nir_instr_rewrite_src(state->parent_instr, src, nir_src_for_ssa(def)); - else - nir_if_rewrite_condition(state->parent_if, nir_src_for_ssa(def)); - - return true; -} - -static bool -rewrite_def_forwards(nir_dest *dest, void *_state) -{ - rewrite_state *state = (rewrite_state *) _state; - - if (dest->is_ssa) - return true; - - nir_register *reg = dest->reg.reg; - unsigned index = reg->index; - - if (state->states[index].stack == NULL) - return true; - - char *name = NULL; - if (dest->reg.reg->name) - name = ralloc_asprintf(state->mem_ctx, "%s_%u", dest->reg.reg->name, - state->states[index].num_defs); - - list_del(&dest->reg.def_link); - nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name); - - /* push our SSA destination on the stack */ - state->states[index].index++; - assert(state->states[index].index < state->states[index].stack_size); - state->states[index].stack[state->states[index].index] = &dest->ssa; - state->states[index].num_defs++; - - _mesa_hash_table_insert(state->ssa_map, &dest->ssa, reg); - - return true; -} - -static void -rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state) -{ - state->parent_instr = &instr->instr; - - nir_foreach_src(&instr->instr, rewrite_use, state); - - if (instr->dest.dest.is_ssa) - return; - - nir_register *reg = instr->dest.dest.reg.reg; - unsigned index = reg->index; - - if (state->states[index].stack == NULL) - return; - - unsigned write_mask = instr->dest.write_mask; - if (write_mask != (1 << instr->dest.dest.reg.reg->num_components) - 1) { - /* - * Calculate the number of components the final instruction, which for - * per-component things is the number of output components of the - * instruction and non-per-component things is the number of enabled - * channels in the write mask. - */ - unsigned num_components; - if (nir_op_infos[instr->op].output_size == 0) { - unsigned temp = (write_mask & 0x5) + ((write_mask >> 1) & 0x5); - num_components = (temp & 0x3) + ((temp >> 2) & 0x3); - } else { - num_components = nir_op_infos[instr->op].output_size; - } - - char *name = NULL; - if (instr->dest.dest.reg.reg->name) - name = ralloc_asprintf(state->mem_ctx, "%s_%u", - reg->name, state->states[index].num_defs); - - instr->dest.write_mask = (1 << num_components) - 1; - list_del(&instr->dest.dest.reg.def_link); - nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name); - - if (nir_op_infos[instr->op].output_size == 0) { - /* - * When we change the output writemask, we need to change the - * swizzles for per-component inputs too - */ - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - if (nir_op_infos[instr->op].input_sizes[i] != 0) - continue; - - unsigned new_swizzle[4] = {0, 0, 0, 0}; - - /* - * We keep two indices: - * 1. The index of the original (non-SSA) component - * 2. The index of the post-SSA, compacted, component - * - * We need to map the swizzle component at index 1 to the swizzle - * component at index 2. - */ - - unsigned ssa_index = 0; - for (unsigned index = 0; index < 4; index++) { - if (!((write_mask >> index) & 1)) - continue; - - new_swizzle[ssa_index] = instr->src[i].swizzle[index]; - ssa_index++; - } - - for (unsigned j = 0; j < 4; j++) - instr->src[i].swizzle[j] = new_swizzle[j]; - } - } - - nir_op op; - switch (reg->num_components) { - case 2: op = nir_op_vec2; break; - case 3: op = nir_op_vec3; break; - case 4: op = nir_op_vec4; break; - default: unreachable("not reached"); - } - - nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, op); - - vec->dest.dest.reg.reg = reg; - vec->dest.write_mask = (1 << reg->num_components) - 1; - - nir_ssa_def *old_src = get_ssa_src(reg, state); - nir_ssa_def *new_src = &instr->dest.dest.ssa; - - unsigned ssa_index = 0; - for (unsigned i = 0; i < reg->num_components; i++) { - vec->src[i].src.is_ssa = true; - if ((write_mask >> i) & 1) { - vec->src[i].src.ssa = new_src; - if (nir_op_infos[instr->op].output_size == 0) - vec->src[i].swizzle[0] = ssa_index; - else - vec->src[i].swizzle[0] = i; - ssa_index++; - } else { - vec->src[i].src.ssa = old_src; - vec->src[i].swizzle[0] = i; - } - } - - nir_instr_insert_after(&instr->instr, &vec->instr); - - state->parent_instr = &vec->instr; - rewrite_def_forwards(&vec->dest.dest, state); - } else { - rewrite_def_forwards(&instr->dest.dest, state); - } -} - -static void -rewrite_phi_instr(nir_phi_instr *instr, rewrite_state *state) -{ - state->parent_instr = &instr->instr; - rewrite_def_forwards(&instr->dest, state); -} - -static void -rewrite_instr_forward(nir_instr *instr, rewrite_state *state) -{ - if (instr->type == nir_instr_type_alu) { - rewrite_alu_instr_forward(nir_instr_as_alu(instr), state); - return; - } - - if (instr->type == nir_instr_type_phi) { - rewrite_phi_instr(nir_instr_as_phi(instr), state); - return; - } - - state->parent_instr = instr; - - nir_foreach_src(instr, rewrite_use, state); - nir_foreach_dest(instr, rewrite_def_forwards, state); -} - -static void -rewrite_phi_sources(nir_block *block, nir_block *pred, rewrite_state *state) -{ - nir_foreach_instr(block, instr) { - if (instr->type != nir_instr_type_phi) - break; - - nir_phi_instr *phi_instr = nir_instr_as_phi(instr); - - state->parent_instr = instr; - - nir_foreach_phi_src(phi_instr, src) { - if (src->pred == pred) { - rewrite_use(&src->src, state); - break; - } - } - } -} - -static bool -rewrite_def_backwards(nir_dest *dest, void *_state) -{ - rewrite_state *state = (rewrite_state *) _state; - - if (!dest->is_ssa) - return true; - - struct hash_entry *entry = - _mesa_hash_table_search(state->ssa_map, &dest->ssa); - - if (!entry) - return true; - - nir_register *reg = (nir_register *) entry->data; - unsigned index = reg->index; - - state->states[index].index--; - assert(state->states[index].index >= -1); - - return true; -} - -static void -rewrite_instr_backwards(nir_instr *instr, rewrite_state *state) -{ - nir_foreach_dest(instr, rewrite_def_backwards, state); -} - -static void -rewrite_block(nir_block *block, rewrite_state *state) -{ - /* This will skip over any instructions after the current one, which is - * what we want because those instructions (vector gather, conditional - * select) will already be in SSA form. - */ - nir_foreach_instr_safe(block, instr) { - rewrite_instr_forward(instr, state); - } - - if (block != state->impl->end_block && - !nir_cf_node_is_last(&block->cf_node) && - nir_cf_node_next(&block->cf_node)->type == nir_cf_node_if) { - nir_if *if_stmt = nir_cf_node_as_if(nir_cf_node_next(&block->cf_node)); - state->parent_instr = NULL; - state->parent_if = if_stmt; - rewrite_use(&if_stmt->condition, state); - } - - if (block->successors[0]) - rewrite_phi_sources(block->successors[0], block, state); - if (block->successors[1]) - rewrite_phi_sources(block->successors[1], block, state); - - for (unsigned i = 0; i < block->num_dom_children; i++) - rewrite_block(block->dom_children[i], state); - - nir_foreach_instr_reverse(block, instr) { - rewrite_instr_backwards(instr, state); - } -} - -static void -remove_unused_regs(nir_function_impl *impl, rewrite_state *state) -{ - foreach_list_typed_safe(nir_register, reg, node, &impl->registers) { - if (state->states[reg->index].stack != NULL) - exec_node_remove(®->node); - } -} - -static void -init_rewrite_state(nir_function_impl *impl, rewrite_state *state) -{ - state->impl = impl; - state->mem_ctx = ralloc_parent(impl); - state->ssa_map = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - state->states = ralloc_array(NULL, reg_state, impl->reg_alloc); - - foreach_list_typed(nir_register, reg, node, &impl->registers) { - assert(reg->index < impl->reg_alloc); - if (reg->num_array_elems > 0) { - state->states[reg->index].stack = NULL; - } else { - /* - * Calculate a conservative estimate of the stack size based on the - * number of definitions there are. Note that this function *must* be - * called after phi nodes are inserted so we can count phi node - * definitions too. - */ - unsigned stack_size = list_length(®->defs); - - state->states[reg->index].stack = ralloc_array(state->states, - nir_ssa_def *, - stack_size); -#ifndef NDEBUG - state->states[reg->index].stack_size = stack_size; -#endif - state->states[reg->index].index = -1; - state->states[reg->index].num_defs = 0; - } - } -} - -static void -destroy_rewrite_state(rewrite_state *state) -{ - _mesa_hash_table_destroy(state->ssa_map, NULL); - ralloc_free(state->states); -} - -void -nir_convert_to_ssa_impl(nir_function_impl *impl) -{ - nir_metadata_require(impl, nir_metadata_dominance); - - insert_phi_nodes(impl); - - rewrite_state state; - init_rewrite_state(impl, &state); - - rewrite_block(nir_start_block(impl), &state); - - remove_unused_regs(impl, &state); - - nir_metadata_preserve(impl, nir_metadata_block_index | - nir_metadata_dominance); - - destroy_rewrite_state(&state); -} - -void -nir_convert_to_ssa(nir_shader *shader) -{ - nir_foreach_function(shader, function) { - if (function->impl) - nir_convert_to_ssa_impl(function->impl); - } -} diff --git a/src/glsl/nir/nir_validate.c b/src/glsl/nir/nir_validate.c deleted file mode 100644 index e4db68db3c0..00000000000 --- a/src/glsl/nir/nir_validate.c +++ /dev/null @@ -1,1071 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Connor Abbott (cwabbott0@gmail.com) - * - */ - -#include "nir.h" -#include - -/* - * This file checks for invalid IR indicating a bug somewhere in the compiler. - */ - -/* Since this file is just a pile of asserts, don't bother compiling it if - * we're not building a debug build. - */ -#ifdef DEBUG - -/* - * Per-register validation state. - */ - -typedef struct { - /* - * equivalent to the uses and defs in nir_register, but built up by the - * validator. At the end, we verify that the sets have the same entries. - */ - struct set *uses, *if_uses, *defs; - nir_function_impl *where_defined; /* NULL for global registers */ -} reg_validate_state; - -typedef struct { - /* - * equivalent to the uses in nir_ssa_def, but built up by the validator. - * At the end, we verify that the sets have the same entries. - */ - struct set *uses, *if_uses; - nir_function_impl *where_defined; -} ssa_def_validate_state; - -typedef struct { - /* map of register -> validation state (struct above) */ - struct hash_table *regs; - - /* the current shader being validated */ - nir_shader *shader; - - /* the current instruction being validated */ - nir_instr *instr; - - /* the current basic block being validated */ - nir_block *block; - - /* the current if statement being validated */ - nir_if *if_stmt; - - /* the current loop being visited */ - nir_loop *loop; - - /* the parent of the current cf node being visited */ - nir_cf_node *parent_node; - - /* the current function implementation being validated */ - nir_function_impl *impl; - - /* map of SSA value -> function implementation where it is defined */ - struct hash_table *ssa_defs; - - /* bitset of ssa definitions we have found; used to check uniqueness */ - BITSET_WORD *ssa_defs_found; - - /* bitset of registers we have currently found; used to check uniqueness */ - BITSET_WORD *regs_found; - - /* map of local variable -> function implementation where it is defined */ - struct hash_table *var_defs; -} validate_state; - -static void validate_src(nir_src *src, validate_state *state); - -static void -validate_reg_src(nir_src *src, validate_state *state) -{ - assert(src->reg.reg != NULL); - - struct hash_entry *entry; - entry = _mesa_hash_table_search(state->regs, src->reg.reg); - assert(entry); - - reg_validate_state *reg_state = (reg_validate_state *) entry->data; - - if (state->instr) { - _mesa_set_add(reg_state->uses, src); - } else { - assert(state->if_stmt); - _mesa_set_add(reg_state->if_uses, src); - } - - if (!src->reg.reg->is_global) { - assert(reg_state->where_defined == state->impl && - "using a register declared in a different function"); - } - - assert((src->reg.reg->num_array_elems == 0 || - src->reg.base_offset < src->reg.reg->num_array_elems) && - "definitely out-of-bounds array access"); - - if (src->reg.indirect) { - assert(src->reg.reg->num_array_elems != 0); - assert((src->reg.indirect->is_ssa || - src->reg.indirect->reg.indirect == NULL) && - "only one level of indirection allowed"); - validate_src(src->reg.indirect, state); - } -} - -static void -validate_ssa_src(nir_src *src, validate_state *state) -{ - assert(src->ssa != NULL); - - struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, src->ssa); - - assert(entry); - - ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; - - assert(def_state->where_defined == state->impl && - "using an SSA value defined in a different function"); - - if (state->instr) { - _mesa_set_add(def_state->uses, src); - } else { - assert(state->if_stmt); - _mesa_set_add(def_state->if_uses, src); - } - - /* TODO validate that the use is dominated by the definition */ -} - -static void -validate_src(nir_src *src, validate_state *state) -{ - if (state->instr) - assert(src->parent_instr == state->instr); - else - assert(src->parent_if == state->if_stmt); - - if (src->is_ssa) - validate_ssa_src(src, state); - else - validate_reg_src(src, state); -} - -static void -validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state) -{ - nir_alu_src *src = &instr->src[index]; - - unsigned num_components; - if (src->src.is_ssa) - num_components = src->src.ssa->num_components; - else { - if (src->src.reg.reg->is_packed) - num_components = 4; /* can't check anything */ - else - num_components = src->src.reg.reg->num_components; - } - for (unsigned i = 0; i < 4; i++) { - assert(src->swizzle[i] < 4); - - if (nir_alu_instr_channel_used(instr, index, i)) - assert(src->swizzle[i] < num_components); - } - - validate_src(&src->src, state); -} - -static void -validate_reg_dest(nir_reg_dest *dest, validate_state *state) -{ - assert(dest->reg != NULL); - - assert(dest->parent_instr == state->instr); - - struct hash_entry *entry2; - entry2 = _mesa_hash_table_search(state->regs, dest->reg); - - assert(entry2); - - reg_validate_state *reg_state = (reg_validate_state *) entry2->data; - _mesa_set_add(reg_state->defs, dest); - - if (!dest->reg->is_global) { - assert(reg_state->where_defined == state->impl && - "writing to a register declared in a different function"); - } - - assert((dest->reg->num_array_elems == 0 || - dest->base_offset < dest->reg->num_array_elems) && - "definitely out-of-bounds array access"); - - if (dest->indirect) { - assert(dest->reg->num_array_elems != 0); - assert((dest->indirect->is_ssa || dest->indirect->reg.indirect == NULL) && - "only one level of indirection allowed"); - validate_src(dest->indirect, state); - } -} - -static void -validate_ssa_def(nir_ssa_def *def, validate_state *state) -{ - assert(def->index < state->impl->ssa_alloc); - assert(!BITSET_TEST(state->ssa_defs_found, def->index)); - BITSET_SET(state->ssa_defs_found, def->index); - - assert(def->parent_instr == state->instr); - - assert(def->num_components <= 4); - - list_validate(&def->uses); - list_validate(&def->if_uses); - - ssa_def_validate_state *def_state = ralloc(state->ssa_defs, - ssa_def_validate_state); - def_state->where_defined = state->impl; - def_state->uses = _mesa_set_create(def_state, _mesa_hash_pointer, - _mesa_key_pointer_equal); - def_state->if_uses = _mesa_set_create(def_state, _mesa_hash_pointer, - _mesa_key_pointer_equal); - _mesa_hash_table_insert(state->ssa_defs, def, def_state); -} - -static void -validate_dest(nir_dest *dest, validate_state *state) -{ - if (dest->is_ssa) - validate_ssa_def(&dest->ssa, state); - else - validate_reg_dest(&dest->reg, state); -} - -static void -validate_alu_dest(nir_alu_dest *dest, validate_state *state) -{ - unsigned dest_size = - dest->dest.is_ssa ? dest->dest.ssa.num_components - : dest->dest.reg.reg->num_components; - bool is_packed = !dest->dest.is_ssa && dest->dest.reg.reg->is_packed; - /* - * validate that the instruction doesn't write to components not in the - * register/SSA value - */ - assert(is_packed || !(dest->write_mask & ~((1 << dest_size) - 1))); - - /* validate that saturate is only ever used on instructions with - * destinations of type float - */ - nir_alu_instr *alu = nir_instr_as_alu(state->instr); - assert(nir_op_infos[alu->op].output_type == nir_type_float || - !dest->saturate); - - validate_dest(&dest->dest, state); -} - -static void -validate_alu_instr(nir_alu_instr *instr, validate_state *state) -{ - assert(instr->op < nir_num_opcodes); - - for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { - validate_alu_src(instr, i, state); - } - - validate_alu_dest(&instr->dest, state); -} - -static void -validate_deref_chain(nir_deref *deref, validate_state *state) -{ - assert(deref->child == NULL || ralloc_parent(deref->child) == deref); - - nir_deref *parent = NULL; - while (deref != NULL) { - switch (deref->deref_type) { - case nir_deref_type_array: - assert(deref->type == glsl_get_array_element(parent->type)); - if (nir_deref_as_array(deref)->deref_array_type == - nir_deref_array_type_indirect) - validate_src(&nir_deref_as_array(deref)->indirect, state); - break; - - case nir_deref_type_struct: - assert(deref->type == - glsl_get_struct_field(parent->type, - nir_deref_as_struct(deref)->index)); - break; - - case nir_deref_type_var: - break; - - default: - assert(!"Invalid deref type"); - break; - } - - parent = deref; - deref = deref->child; - } -} - -static void -validate_var_use(nir_variable *var, validate_state *state) -{ - if (var->data.mode == nir_var_local) { - struct hash_entry *entry = _mesa_hash_table_search(state->var_defs, var); - - assert(entry); - assert((nir_function_impl *) entry->data == state->impl); - } -} - -static void -validate_deref_var(void *parent_mem_ctx, nir_deref_var *deref, validate_state *state) -{ - assert(deref != NULL); - assert(ralloc_parent(deref) == parent_mem_ctx); - assert(deref->deref.type == deref->var->type); - - validate_var_use(deref->var, state); - - validate_deref_chain(&deref->deref, state); -} - -static void -validate_intrinsic_instr(nir_intrinsic_instr *instr, validate_state *state) -{ - unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; - for (unsigned i = 0; i < num_srcs; i++) { - unsigned components_read = - nir_intrinsic_infos[instr->intrinsic].src_components[i]; - if (components_read == 0) - components_read = instr->num_components; - - assert(components_read > 0); - - if (instr->src[i].is_ssa) { - assert(components_read <= instr->src[i].ssa->num_components); - } else if (!instr->src[i].reg.reg->is_packed) { - assert(components_read <= instr->src[i].reg.reg->num_components); - } - - validate_src(&instr->src[i], state); - } - - unsigned num_vars = nir_intrinsic_infos[instr->intrinsic].num_variables; - for (unsigned i = 0; i < num_vars; i++) { - validate_deref_var(instr, instr->variables[i], state); - } - - if (nir_intrinsic_infos[instr->intrinsic].has_dest) { - unsigned components_written = - nir_intrinsic_infos[instr->intrinsic].dest_components; - if (components_written == 0) - components_written = instr->num_components; - - assert(components_written > 0); - - if (instr->dest.is_ssa) { - assert(components_written <= instr->dest.ssa.num_components); - } else if (!instr->dest.reg.reg->is_packed) { - assert(components_written <= instr->dest.reg.reg->num_components); - } - - validate_dest(&instr->dest, state); - } - - switch (instr->intrinsic) { - case nir_intrinsic_load_var: { - const struct glsl_type *type = - nir_deref_tail(&instr->variables[0]->deref)->type; - assert(glsl_type_is_vector_or_scalar(type) || - (instr->variables[0]->var->data.mode == nir_var_uniform && - glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE)); - assert(instr->num_components == glsl_get_vector_elements(type)); - break; - } - case nir_intrinsic_store_var: { - const struct glsl_type *type = - nir_deref_tail(&instr->variables[0]->deref)->type; - assert(glsl_type_is_vector_or_scalar(type) || - (instr->variables[0]->var->data.mode == nir_var_uniform && - glsl_get_base_type(type) == GLSL_TYPE_SUBROUTINE)); - assert(instr->num_components == glsl_get_vector_elements(type)); - assert(instr->variables[0]->var->data.mode != nir_var_shader_in && - instr->variables[0]->var->data.mode != nir_var_uniform && - instr->variables[0]->var->data.mode != nir_var_shader_storage); - assert((instr->const_index[0] & ~((1 << instr->num_components) - 1)) == 0); - break; - } - case nir_intrinsic_copy_var: - assert(nir_deref_tail(&instr->variables[0]->deref)->type == - nir_deref_tail(&instr->variables[1]->deref)->type); - assert(instr->variables[0]->var->data.mode != nir_var_shader_in && - instr->variables[0]->var->data.mode != nir_var_uniform && - instr->variables[0]->var->data.mode != nir_var_shader_storage); - break; - default: - break; - } -} - -static void -validate_tex_instr(nir_tex_instr *instr, validate_state *state) -{ - bool src_type_seen[nir_num_tex_src_types]; - for (unsigned i = 0; i < nir_num_tex_src_types; i++) - src_type_seen[i] = false; - - for (unsigned i = 0; i < instr->num_srcs; i++) { - assert(!src_type_seen[instr->src[i].src_type]); - src_type_seen[instr->src[i].src_type] = true; - validate_src(&instr->src[i].src, state); - } - - if (instr->sampler != NULL) - validate_deref_var(instr, instr->sampler, state); - - validate_dest(&instr->dest, state); -} - -static void -validate_call_instr(nir_call_instr *instr, validate_state *state) -{ - if (instr->return_deref == NULL) - assert(glsl_type_is_void(instr->callee->return_type)); - else - assert(instr->return_deref->deref.type == instr->callee->return_type); - - assert(instr->num_params == instr->callee->num_params); - - for (unsigned i = 0; i < instr->num_params; i++) { - assert(instr->callee->params[i].type == instr->params[i]->deref.type); - validate_deref_var(instr, instr->params[i], state); - } - - validate_deref_var(instr, instr->return_deref, state); -} - -static void -validate_load_const_instr(nir_load_const_instr *instr, validate_state *state) -{ - validate_ssa_def(&instr->def, state); -} - -static void -validate_ssa_undef_instr(nir_ssa_undef_instr *instr, validate_state *state) -{ - validate_ssa_def(&instr->def, state); -} - -static void -validate_phi_instr(nir_phi_instr *instr, validate_state *state) -{ - /* - * don't validate the sources until we get to them from their predecessor - * basic blocks, to avoid validating an SSA use before its definition. - */ - - validate_dest(&instr->dest, state); - - exec_list_validate(&instr->srcs); - assert(exec_list_length(&instr->srcs) == - state->block->predecessors->entries); -} - -static void -validate_instr(nir_instr *instr, validate_state *state) -{ - assert(instr->block == state->block); - - state->instr = instr; - - switch (instr->type) { - case nir_instr_type_alu: - validate_alu_instr(nir_instr_as_alu(instr), state); - break; - - case nir_instr_type_call: - validate_call_instr(nir_instr_as_call(instr), state); - break; - - case nir_instr_type_intrinsic: - validate_intrinsic_instr(nir_instr_as_intrinsic(instr), state); - break; - - case nir_instr_type_tex: - validate_tex_instr(nir_instr_as_tex(instr), state); - break; - - case nir_instr_type_load_const: - validate_load_const_instr(nir_instr_as_load_const(instr), state); - break; - - case nir_instr_type_phi: - validate_phi_instr(nir_instr_as_phi(instr), state); - break; - - case nir_instr_type_ssa_undef: - validate_ssa_undef_instr(nir_instr_as_ssa_undef(instr), state); - break; - - case nir_instr_type_jump: - break; - - default: - assert(!"Invalid ALU instruction type"); - break; - } - - state->instr = NULL; -} - -static void -validate_phi_src(nir_phi_instr *instr, nir_block *pred, validate_state *state) -{ - state->instr = &instr->instr; - - assert(instr->dest.is_ssa); - - exec_list_validate(&instr->srcs); - nir_foreach_phi_src(instr, src) { - if (src->pred == pred) { - assert(src->src.is_ssa); - assert(src->src.ssa->num_components == - instr->dest.ssa.num_components); - - validate_src(&src->src, state); - state->instr = NULL; - return; - } - } - - abort(); -} - -static void -validate_phi_srcs(nir_block *block, nir_block *succ, validate_state *state) -{ - nir_foreach_instr(succ, instr) { - if (instr->type != nir_instr_type_phi) - break; - - validate_phi_src(nir_instr_as_phi(instr), block, state); - } -} - -static void validate_cf_node(nir_cf_node *node, validate_state *state); - -static void -validate_block(nir_block *block, validate_state *state) -{ - assert(block->cf_node.parent == state->parent_node); - - state->block = block; - - exec_list_validate(&block->instr_list); - nir_foreach_instr(block, instr) { - if (instr->type == nir_instr_type_phi) { - assert(instr == nir_block_first_instr(block) || - nir_instr_prev(instr)->type == nir_instr_type_phi); - } - - if (instr->type == nir_instr_type_jump) { - assert(instr == nir_block_last_instr(block)); - } - - validate_instr(instr, state); - } - - assert(block->successors[0] != NULL); - assert(block->successors[0] != block->successors[1]); - - for (unsigned i = 0; i < 2; i++) { - if (block->successors[i] != NULL) { - struct set_entry *entry = - _mesa_set_search(block->successors[i]->predecessors, block); - assert(entry); - - validate_phi_srcs(block, block->successors[i], state); - } - } - - struct set_entry *entry; - set_foreach(block->predecessors, entry) { - const nir_block *pred = entry->key; - assert(pred->successors[0] == block || - pred->successors[1] == block); - } - - if (!exec_list_is_empty(&block->instr_list) && - nir_block_last_instr(block)->type == nir_instr_type_jump) { - assert(block->successors[1] == NULL); - nir_jump_instr *jump = nir_instr_as_jump(nir_block_last_instr(block)); - switch (jump->type) { - case nir_jump_break: { - nir_block *after = - nir_cf_node_as_block(nir_cf_node_next(&state->loop->cf_node)); - assert(block->successors[0] == after); - break; - } - - case nir_jump_continue: { - nir_block *first = - nir_cf_node_as_block(nir_loop_first_cf_node(state->loop)); - assert(block->successors[0] == first); - break; - } - - case nir_jump_return: - assert(block->successors[0] == state->impl->end_block); - break; - - default: - unreachable("bad jump type"); - } - } else { - nir_cf_node *next = nir_cf_node_next(&block->cf_node); - if (next == NULL) { - switch (state->parent_node->type) { - case nir_cf_node_loop: { - nir_block *first = - nir_cf_node_as_block(nir_loop_first_cf_node(state->loop)); - assert(block->successors[0] == first); - /* due to the hack for infinite loops, block->successors[1] may - * point to the block after the loop. - */ - break; - } - - case nir_cf_node_if: { - nir_block *after = - nir_cf_node_as_block(nir_cf_node_next(state->parent_node)); - assert(block->successors[0] == after); - assert(block->successors[1] == NULL); - break; - } - - case nir_cf_node_function: - assert(block->successors[0] == state->impl->end_block); - assert(block->successors[1] == NULL); - break; - - default: - unreachable("unknown control flow node type"); - } - } else { - if (next->type == nir_cf_node_if) { - nir_if *if_stmt = nir_cf_node_as_if(next); - assert(&block->successors[0]->cf_node == - nir_if_first_then_node(if_stmt)); - assert(&block->successors[1]->cf_node == - nir_if_first_else_node(if_stmt)); - } else { - assert(next->type == nir_cf_node_loop); - nir_loop *loop = nir_cf_node_as_loop(next); - assert(&block->successors[0]->cf_node == - nir_loop_first_cf_node(loop)); - assert(block->successors[1] == NULL); - } - } - } -} - -static void -validate_if(nir_if *if_stmt, validate_state *state) -{ - state->if_stmt = if_stmt; - - assert(!exec_node_is_head_sentinel(if_stmt->cf_node.node.prev)); - nir_cf_node *prev_node = nir_cf_node_prev(&if_stmt->cf_node); - assert(prev_node->type == nir_cf_node_block); - - assert(!exec_node_is_tail_sentinel(if_stmt->cf_node.node.next)); - nir_cf_node *next_node = nir_cf_node_next(&if_stmt->cf_node); - assert(next_node->type == nir_cf_node_block); - - validate_src(&if_stmt->condition, state); - - assert(!exec_list_is_empty(&if_stmt->then_list)); - assert(!exec_list_is_empty(&if_stmt->else_list)); - - nir_cf_node *old_parent = state->parent_node; - state->parent_node = &if_stmt->cf_node; - - exec_list_validate(&if_stmt->then_list); - foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->then_list) { - validate_cf_node(cf_node, state); - } - - exec_list_validate(&if_stmt->else_list); - foreach_list_typed(nir_cf_node, cf_node, node, &if_stmt->else_list) { - validate_cf_node(cf_node, state); - } - - state->parent_node = old_parent; - state->if_stmt = NULL; -} - -static void -validate_loop(nir_loop *loop, validate_state *state) -{ - assert(!exec_node_is_head_sentinel(loop->cf_node.node.prev)); - nir_cf_node *prev_node = nir_cf_node_prev(&loop->cf_node); - assert(prev_node->type == nir_cf_node_block); - - assert(!exec_node_is_tail_sentinel(loop->cf_node.node.next)); - nir_cf_node *next_node = nir_cf_node_next(&loop->cf_node); - assert(next_node->type == nir_cf_node_block); - - assert(!exec_list_is_empty(&loop->body)); - - nir_cf_node *old_parent = state->parent_node; - state->parent_node = &loop->cf_node; - nir_loop *old_loop = state->loop; - state->loop = loop; - - exec_list_validate(&loop->body); - foreach_list_typed(nir_cf_node, cf_node, node, &loop->body) { - validate_cf_node(cf_node, state); - } - - state->parent_node = old_parent; - state->loop = old_loop; -} - -static void -validate_cf_node(nir_cf_node *node, validate_state *state) -{ - assert(node->parent == state->parent_node); - - switch (node->type) { - case nir_cf_node_block: - validate_block(nir_cf_node_as_block(node), state); - break; - - case nir_cf_node_if: - validate_if(nir_cf_node_as_if(node), state); - break; - - case nir_cf_node_loop: - validate_loop(nir_cf_node_as_loop(node), state); - break; - - default: - unreachable("Invalid CF node type"); - } -} - -static void -prevalidate_reg_decl(nir_register *reg, bool is_global, validate_state *state) -{ - assert(reg->is_global == is_global); - - if (is_global) - assert(reg->index < state->shader->reg_alloc); - else - assert(reg->index < state->impl->reg_alloc); - assert(!BITSET_TEST(state->regs_found, reg->index)); - BITSET_SET(state->regs_found, reg->index); - - list_validate(®->uses); - list_validate(®->defs); - list_validate(®->if_uses); - - reg_validate_state *reg_state = ralloc(state->regs, reg_validate_state); - reg_state->uses = _mesa_set_create(reg_state, _mesa_hash_pointer, - _mesa_key_pointer_equal); - reg_state->if_uses = _mesa_set_create(reg_state, _mesa_hash_pointer, - _mesa_key_pointer_equal); - reg_state->defs = _mesa_set_create(reg_state, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - reg_state->where_defined = is_global ? NULL : state->impl; - - _mesa_hash_table_insert(state->regs, reg, reg_state); -} - -static void -postvalidate_reg_decl(nir_register *reg, validate_state *state) -{ - struct hash_entry *entry = _mesa_hash_table_search(state->regs, reg); - - reg_validate_state *reg_state = (reg_validate_state *) entry->data; - - nir_foreach_use(reg, src) { - struct set_entry *entry = _mesa_set_search(reg_state->uses, src); - assert(entry); - _mesa_set_remove(reg_state->uses, entry); - } - - if (reg_state->uses->entries != 0) { - printf("extra entries in register uses:\n"); - struct set_entry *entry; - set_foreach(reg_state->uses, entry) - printf("%p\n", entry->key); - - abort(); - } - - nir_foreach_if_use(reg, src) { - struct set_entry *entry = _mesa_set_search(reg_state->if_uses, src); - assert(entry); - _mesa_set_remove(reg_state->if_uses, entry); - } - - if (reg_state->if_uses->entries != 0) { - printf("extra entries in register if_uses:\n"); - struct set_entry *entry; - set_foreach(reg_state->if_uses, entry) - printf("%p\n", entry->key); - - abort(); - } - - nir_foreach_def(reg, src) { - struct set_entry *entry = _mesa_set_search(reg_state->defs, src); - assert(entry); - _mesa_set_remove(reg_state->defs, entry); - } - - if (reg_state->defs->entries != 0) { - printf("extra entries in register defs:\n"); - struct set_entry *entry; - set_foreach(reg_state->defs, entry) - printf("%p\n", entry->key); - - abort(); - } -} - -static void -validate_var_decl(nir_variable *var, bool is_global, validate_state *state) -{ - assert(is_global != (var->data.mode == nir_var_local)); - - /* - * TODO validate some things ir_validate.cpp does (requires more GLSL type - * support) - */ - - if (!is_global) { - _mesa_hash_table_insert(state->var_defs, var, state->impl); - } -} - -static bool -postvalidate_ssa_def(nir_ssa_def *def, void *void_state) -{ - validate_state *state = void_state; - - struct hash_entry *entry = _mesa_hash_table_search(state->ssa_defs, def); - ssa_def_validate_state *def_state = (ssa_def_validate_state *)entry->data; - - nir_foreach_use(def, src) { - struct set_entry *entry = _mesa_set_search(def_state->uses, src); - assert(entry); - _mesa_set_remove(def_state->uses, entry); - } - - if (def_state->uses->entries != 0) { - printf("extra entries in register uses:\n"); - struct set_entry *entry; - set_foreach(def_state->uses, entry) - printf("%p\n", entry->key); - - abort(); - } - - nir_foreach_if_use(def, src) { - struct set_entry *entry = _mesa_set_search(def_state->if_uses, src); - assert(entry); - _mesa_set_remove(def_state->if_uses, entry); - } - - if (def_state->if_uses->entries != 0) { - printf("extra entries in register uses:\n"); - struct set_entry *entry; - set_foreach(def_state->if_uses, entry) - printf("%p\n", entry->key); - - abort(); - } - - return true; -} - -static bool -postvalidate_ssa_defs_block(nir_block *block, void *state) -{ - nir_foreach_instr(block, instr) - nir_foreach_ssa_def(instr, postvalidate_ssa_def, state); - - return true; -} - -static void -validate_function_impl(nir_function_impl *impl, validate_state *state) -{ - assert(impl->function->impl == impl); - assert(impl->cf_node.parent == NULL); - - assert(impl->num_params == impl->function->num_params); - for (unsigned i = 0; i < impl->num_params; i++) - assert(impl->params[i]->type == impl->function->params[i].type); - - if (glsl_type_is_void(impl->function->return_type)) - assert(impl->return_var == NULL); - else - assert(impl->return_var->type == impl->function->return_type); - - assert(exec_list_is_empty(&impl->end_block->instr_list)); - assert(impl->end_block->successors[0] == NULL); - assert(impl->end_block->successors[1] == NULL); - - state->impl = impl; - state->parent_node = &impl->cf_node; - - exec_list_validate(&impl->locals); - nir_foreach_variable(var, &impl->locals) { - validate_var_decl(var, false, state); - } - - state->regs_found = realloc(state->regs_found, - BITSET_WORDS(impl->reg_alloc) * - sizeof(BITSET_WORD)); - memset(state->regs_found, 0, BITSET_WORDS(impl->reg_alloc) * - sizeof(BITSET_WORD)); - exec_list_validate(&impl->registers); - foreach_list_typed(nir_register, reg, node, &impl->registers) { - prevalidate_reg_decl(reg, false, state); - } - - state->ssa_defs_found = realloc(state->ssa_defs_found, - BITSET_WORDS(impl->ssa_alloc) * - sizeof(BITSET_WORD)); - memset(state->ssa_defs_found, 0, BITSET_WORDS(impl->ssa_alloc) * - sizeof(BITSET_WORD)); - exec_list_validate(&impl->body); - foreach_list_typed(nir_cf_node, node, node, &impl->body) { - validate_cf_node(node, state); - } - - foreach_list_typed(nir_register, reg, node, &impl->registers) { - postvalidate_reg_decl(reg, state); - } - - nir_foreach_block(impl, postvalidate_ssa_defs_block, state); -} - -static void -validate_function(nir_function *func, validate_state *state) -{ - if (func->impl != NULL) { - assert(func->impl->function == func); - validate_function_impl(func->impl, state); - } -} - -static void -init_validate_state(validate_state *state) -{ - state->regs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - state->ssa_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - state->ssa_defs_found = NULL; - state->regs_found = NULL; - state->var_defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - state->loop = NULL; -} - -static void -destroy_validate_state(validate_state *state) -{ - _mesa_hash_table_destroy(state->regs, NULL); - _mesa_hash_table_destroy(state->ssa_defs, NULL); - free(state->ssa_defs_found); - free(state->regs_found); - _mesa_hash_table_destroy(state->var_defs, NULL); -} - -void -nir_validate_shader(nir_shader *shader) -{ - validate_state state; - init_validate_state(&state); - - state.shader = shader; - - exec_list_validate(&shader->uniforms); - nir_foreach_variable(var, &shader->uniforms) { - validate_var_decl(var, true, &state); - } - - exec_list_validate(&shader->inputs); - nir_foreach_variable(var, &shader->inputs) { - validate_var_decl(var, true, &state); - } - - exec_list_validate(&shader->outputs); - nir_foreach_variable(var, &shader->outputs) { - validate_var_decl(var, true, &state); - } - - exec_list_validate(&shader->globals); - nir_foreach_variable(var, &shader->globals) { - validate_var_decl(var, true, &state); - } - - exec_list_validate(&shader->system_values); - nir_foreach_variable(var, &shader->system_values) { - validate_var_decl(var, true, &state); - } - - state.regs_found = realloc(state.regs_found, - BITSET_WORDS(shader->reg_alloc) * - sizeof(BITSET_WORD)); - memset(state.regs_found, 0, BITSET_WORDS(shader->reg_alloc) * - sizeof(BITSET_WORD)); - exec_list_validate(&shader->registers); - foreach_list_typed(nir_register, reg, node, &shader->registers) { - prevalidate_reg_decl(reg, true, &state); - } - - exec_list_validate(&shader->functions); - foreach_list_typed(nir_function, func, node, &shader->functions) { - validate_function(func, &state); - } - - foreach_list_typed(nir_register, reg, node, &shader->registers) { - postvalidate_reg_decl(reg, &state); - } - - destroy_validate_state(&state); -} - -#endif /* NDEBUG */ diff --git a/src/glsl/nir/nir_vla.h b/src/glsl/nir/nir_vla.h deleted file mode 100644 index 753783316a2..00000000000 --- a/src/glsl/nir/nir_vla.h +++ /dev/null @@ -1,54 +0,0 @@ -/************************************************************************** - * - * Copyright 2015 VMware, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#pragma once - - -#include "c99_alloca.h" - - -/* Declare a variable length array, with no initialization */ -#define NIR_VLA(_type, _name, _length) \ - _type *_name = alloca((_length) * sizeof *_name) - - -/* Declare a variable length array, and initialize it with the given byte. - * - * _length is evaluated twice, so expressions with side-effects must be - * avoided. - */ -#define NIR_VLA_FILL(_type, _name, _length, _byte) \ - _type *_name = memset(alloca((_length) * sizeof *_name), _byte, (_length) * sizeof *_name) - - -/* Declare a variable length array, and zero it. - * - * Just like NIR_VLA_FILL, _length is evaluated twice, so expressions with - * side-effects must be avoided. - */ -#define NIR_VLA_ZERO(_type, _name, _length) \ - NIR_VLA_FILL(_type, _name, _length, 0) diff --git a/src/glsl/nir/nir_worklist.c b/src/glsl/nir/nir_worklist.c deleted file mode 100644 index 3087a1d2354..00000000000 --- a/src/glsl/nir/nir_worklist.c +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#include "nir_worklist.h" - -void -nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks, - void *mem_ctx) -{ - w->size = num_blocks; - w->count = 0; - w->start = 0; - - w->blocks_present = rzalloc_array(mem_ctx, BITSET_WORD, - BITSET_WORDS(num_blocks)); - w->blocks = ralloc_array(mem_ctx, nir_block *, num_blocks); -} - -void -nir_block_worklist_fini(nir_block_worklist *w) -{ - ralloc_free(w->blocks_present); - ralloc_free(w->blocks); -} - -static bool -worklist_add_block(nir_block *block, void *w) -{ - nir_block_worklist_push_tail(w, block); - - return true; -} - -void -nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl) -{ - nir_foreach_block(impl, worklist_add_block, w); -} - -void -nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block) -{ - /* Pushing a block we already have is a no-op */ - if (BITSET_TEST(w->blocks_present, block->index)) - return; - - assert(w->count < w->size); - - if (w->start == 0) - w->start = w->size - 1; - else - w->start--; - - w->count++; - - w->blocks[w->start] = block; - BITSET_SET(w->blocks_present, block->index); -} - -nir_block * -nir_block_worklist_peek_head(const nir_block_worklist *w) -{ - assert(w->count > 0); - - return w->blocks[w->start]; -} - -nir_block * -nir_block_worklist_pop_head(nir_block_worklist *w) -{ - assert(w->count > 0); - - unsigned head = w->start; - - w->start = (w->start + 1) % w->size; - w->count--; - - BITSET_CLEAR(w->blocks_present, w->blocks[head]->index); - return w->blocks[head]; -} - -void -nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block) -{ - /* Pushing a block we already have is a no-op */ - if (BITSET_TEST(w->blocks_present, block->index)) - return; - - assert(w->count < w->size); - - w->count++; - - unsigned tail = (w->start + w->count - 1) % w->size; - - w->blocks[tail] = block; - BITSET_SET(w->blocks_present, block->index); -} - -nir_block * -nir_block_worklist_peek_tail(const nir_block_worklist *w) -{ - assert(w->count > 0); - - unsigned tail = (w->start + w->count - 1) % w->size; - - return w->blocks[tail]; -} - -nir_block * -nir_block_worklist_pop_tail(nir_block_worklist *w) -{ - assert(w->count > 0); - - unsigned tail = (w->start + w->count - 1) % w->size; - - w->count--; - - BITSET_CLEAR(w->blocks_present, w->blocks[tail]->index); - return w->blocks[tail]; -} diff --git a/src/glsl/nir/nir_worklist.h b/src/glsl/nir/nir_worklist.h deleted file mode 100644 index 829bff24a55..00000000000 --- a/src/glsl/nir/nir_worklist.h +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - * - * Authors: - * Jason Ekstrand (jason@jlekstrand.net) - * - */ - -#pragma once - -#ifndef _NIR_WORKLIST_ -#define _NIR_WORKLIST_ - -#include "nir.h" - -#ifdef __cplusplus -extern "C" { -#endif - -/** Represents a double-ended queue of unique blocks - * - * The worklist datastructure guarantees that eacy block is in the queue at - * most once. Pushing a block onto either end of the queue is a no-op if - * the block is already in the queue. In order for this to work, the - * caller must ensure that the blocks are properly indexed. - */ -typedef struct { - /* The total size of the worklist */ - unsigned size; - - /* The number of blocks currently in the worklist */ - unsigned count; - - /* The offset in the array of blocks at which the list starts */ - unsigned start; - - /* A bitset of all of the blocks currently present in the worklist */ - BITSET_WORD *blocks_present; - - /* The actual worklist */ - nir_block **blocks; -} nir_block_worklist; - -void nir_block_worklist_init(nir_block_worklist *w, unsigned num_blocks, - void *mem_ctx); -void nir_block_worklist_fini(nir_block_worklist *w); - -void nir_block_worklist_add_all(nir_block_worklist *w, nir_function_impl *impl); - -static inline bool -nir_block_worklist_is_empty(const nir_block_worklist *w) -{ - return w->count == 0; -} - -void nir_block_worklist_push_head(nir_block_worklist *w, nir_block *block); - -nir_block *nir_block_worklist_peek_head(const nir_block_worklist *w); - -nir_block *nir_block_worklist_pop_head(nir_block_worklist *w); - -void nir_block_worklist_push_tail(nir_block_worklist *w, nir_block *block); - -nir_block *nir_block_worklist_peek_tail(const nir_block_worklist *w); - -nir_block *nir_block_worklist_pop_tail(nir_block_worklist *w); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif /* _NIR_WORKLIST_ */ diff --git a/src/glsl/nir/tests/control_flow_tests.cpp b/src/glsl/nir/tests/control_flow_tests.cpp deleted file mode 100644 index b9379ef3b06..00000000000 --- a/src/glsl/nir/tests/control_flow_tests.cpp +++ /dev/null @@ -1,148 +0,0 @@ -/* - * Copyright © 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "nir.h" -#include "nir_builder.h" - -class nir_cf_test : public ::testing::Test { -protected: - nir_cf_test(); - ~nir_cf_test(); - - nir_builder b; -}; - -nir_cf_test::nir_cf_test() -{ - static const nir_shader_compiler_options options = { }; - nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, &options); -} - -nir_cf_test::~nir_cf_test() -{ - ralloc_free(b.shader); -} - -TEST_F(nir_cf_test, delete_break_in_loop) -{ - /* Create IR: - * - * while (...) { break; } - */ - nir_loop *loop = nir_loop_create(b.shader); - nir_cf_node_insert(nir_after_cf_list(&b.impl->body), &loop->cf_node); - - b.cursor = nir_after_cf_list(&loop->body); - - nir_jump_instr *jump = nir_jump_instr_create(b.shader, nir_jump_break); - nir_builder_instr_insert(&b, &jump->instr); - - /* At this point, we should have: - * - * impl main { - * block block_0: - * // preds: - * // succs: block_1 - * loop { - * block block_1: - * // preds: block_0 - * break - * // succs: block_2 - * } - * block block_2: - * // preds: block_1 - * // succs: block_3 - * block block_3: - * } - */ - nir_block *block_0 = nir_start_block(b.impl); - nir_block *block_1 = nir_cf_node_as_block(nir_loop_first_cf_node(loop)); - nir_block *block_2 = nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); - nir_block *block_3 = b.impl->end_block; - ASSERT_EQ(nir_cf_node_block, block_0->cf_node.type); - ASSERT_EQ(nir_cf_node_block, block_1->cf_node.type); - ASSERT_EQ(nir_cf_node_block, block_2->cf_node.type); - ASSERT_EQ(nir_cf_node_block, block_3->cf_node.type); - - /* Verify the successors and predecessors. */ - EXPECT_EQ(block_1, block_0->successors[0]); - EXPECT_EQ(NULL, block_0->successors[1]); - EXPECT_EQ(block_2, block_1->successors[0]); - EXPECT_EQ(NULL, block_1->successors[1]); - EXPECT_EQ(block_3, block_2->successors[0]); - EXPECT_EQ(NULL, block_2->successors[1]); - EXPECT_EQ(NULL, block_3->successors[0]); - EXPECT_EQ(NULL, block_3->successors[1]); - EXPECT_EQ(0, block_0->predecessors->entries); - EXPECT_EQ(1, block_1->predecessors->entries); - EXPECT_EQ(1, block_2->predecessors->entries); - EXPECT_EQ(1, block_3->predecessors->entries); - EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0)); - EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1)); - EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2)); - - nir_print_shader(b.shader, stderr); - - /* Now remove the break. */ - nir_instr_remove(&jump->instr); - - nir_print_shader(b.shader, stderr); - - /* At this point, we should have: - * - * impl main { - * block block_0: - * // preds: - * // succs: block_1 - * loop { - * block block_1: - * // preds: block_0 block_1 - * // succs: block_1 - * } - * block block_2: - * // preds: block_1 - * // succs: block_3 - * block block_3: - * } - * - * Re-verify the predecessors and successors. - */ - EXPECT_EQ(block_1, block_0->successors[0]); - EXPECT_EQ(NULL, block_0->successors[1]); - EXPECT_EQ(block_1, block_1->successors[0]); /* back to itself */ - EXPECT_EQ(block_2, block_1->successors[1]); /* fake successor */ - EXPECT_EQ(block_3, block_2->successors[0]); - EXPECT_EQ(NULL, block_2->successors[1]); - EXPECT_EQ(NULL, block_3->successors[0]); - EXPECT_EQ(NULL, block_3->successors[1]); - EXPECT_EQ(0, block_0->predecessors->entries); - EXPECT_EQ(2, block_1->predecessors->entries); - EXPECT_EQ(1, block_2->predecessors->entries); - EXPECT_EQ(1, block_3->predecessors->entries); - EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_0)); - EXPECT_TRUE(_mesa_set_search(block_1->predecessors, block_1)); - EXPECT_TRUE(_mesa_set_search(block_2->predecessors, block_1)); - EXPECT_TRUE(_mesa_set_search(block_3->predecessors, block_2)); - - nir_metadata_require(b.impl, nir_metadata_dominance); -} diff --git a/src/mesa/Android.libmesa_dricore.mk b/src/mesa/Android.libmesa_dricore.mk index cd31e148222..ba952da4ad4 100644 --- a/src/mesa/Android.libmesa_dricore.mk +++ b/src/mesa/Android.libmesa_dricore.mk @@ -60,7 +60,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa/main \ $(MESA_TOP)/src/glsl \ - $(MESA_TOP)/src/glsl/nir \ + $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary diff --git a/src/mesa/Android.libmesa_glsl_utils.mk b/src/mesa/Android.libmesa_glsl_utils.mk index 9e150eaa3c0..894ae20a239 100644 --- a/src/mesa/Android.libmesa_glsl_utils.mk +++ b/src/mesa/Android.libmesa_glsl_utils.mk @@ -37,7 +37,7 @@ LOCAL_MODULE := libmesa_glsl_utils LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/glsl \ - $(MESA_TOP)/src/glsl/nir \ + $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary @@ -63,7 +63,7 @@ LOCAL_CFLAGS := -D_POSIX_C_SOURCE=199309L LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/glsl \ - $(MESA_TOP)/src/glsl/nir \ + $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/gallium/include \ $(MESA_TOP)/src/gallium/auxiliary diff --git a/src/mesa/Android.libmesa_st_mesa.mk b/src/mesa/Android.libmesa_st_mesa.mk index 427a35f4f6e..ee8887b73ec 100644 --- a/src/mesa/Android.libmesa_st_mesa.mk +++ b/src/mesa/Android.libmesa_st_mesa.mk @@ -55,7 +55,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa/main \ $(MESA_TOP)/src/glsl \ - $(MESA_TOP)/src/glsl/nir \ + $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/gallium/auxiliary \ $(MESA_TOP)/src/gallium/include diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 13a05f53b22..386d6aa3815 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -624,9 +624,8 @@ INCLUDE_DIRS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/glsl \ - -I$(top_srcdir)/src/glsl/nir \ + -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/glsl \ - -I$(top_builddir)/src/glsl/nir \ -I$(top_srcdir)/src/glsl/glcpp \ -I$(top_srcdir)/src/mesa \ -I$(top_builddir)/src/mesa \ diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index ad687bf59a4..0db5a51e725 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -33,8 +33,7 @@ AM_CFLAGS = \ -I$(top_srcdir)/src/mesa/drivers/dri/common \ -I$(top_srcdir)/src/mesa/drivers/dri/intel/server \ -I$(top_srcdir)/src/gtest/include \ - -I$(top_srcdir)/src/glsl/nir \ - -I$(top_builddir)/src/glsl/nir \ + -I$(top_builddir)/src/compiler/nir \ -I$(top_builddir)/src/mesa/drivers/dri/common \ $(DEFINES) \ $(VISIBILITY_CFLAGS) \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index 9a54c2dd0be..b75ebfedb3d 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -31,7 +31,7 @@ #include "brw_ir_fs.h" #include "brw_fs_builder.h" #include "glsl/ir.h" -#include "glsl/nir/nir.h" +#include "compiler/nir/nir.h" struct bblock_t; namespace { diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 935529a6003..287f935d539 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -23,8 +23,8 @@ #include "brw_nir.h" #include "brw_shader.h" -#include "glsl/nir/glsl_to_nir.h" -#include "glsl/nir/nir_builder.h" +#include "compiler/nir/glsl_to_nir.h" +#include "compiler/nir/nir_builder.h" #include "program/prog_to_nir.h" static bool diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 78b139b991d..079d8b25174 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -25,7 +25,7 @@ #include "brw_context.h" #include "brw_reg.h" -#include "glsl/nir/nir.h" +#include "compiler/nir/nir.h" #ifdef __cplusplus extern "C" { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index ddfd87d31c9..4e00fd0ec0d 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -32,7 +32,7 @@ #endif #include "glsl/ir.h" -#include "glsl/nir/nir.h" +#include "compiler/nir/nir.h" #ifdef __cplusplus diff --git a/src/mesa/drivers/dri/i965/intel_asm_annotation.c b/src/mesa/drivers/dri/i965/intel_asm_annotation.c index 59cc863268f..459ea60690a 100644 --- a/src/mesa/drivers/dri/i965/intel_asm_annotation.c +++ b/src/mesa/drivers/dri/i965/intel_asm_annotation.c @@ -25,7 +25,7 @@ #include "brw_eu.h" #include "intel_debug.h" #include "intel_asm_annotation.h" -#include "glsl/nir/nir.h" +#include "compiler/nir/nir.h" void dump_assembly(void *assembly, int num_annotations, struct annotation *annotation, diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c index bca783aea86..ee7c1d7bc2c 100644 --- a/src/mesa/drivers/dri/i965/intel_screen.c +++ b/src/mesa/drivers/dri/i965/intel_screen.c @@ -36,7 +36,7 @@ #include "swrast/s_renderbuffer.h" #include "util/ralloc.h" #include "brw_shader.h" -#include "glsl/nir/nir.h" +#include "compiler/nir/nir.h" #include "utils.h" #include "xmlpool.h" diff --git a/src/mesa/program/Android.mk b/src/mesa/program/Android.mk index cc67f8aeadd..0bc97a6b3d4 100644 --- a/src/mesa/program/Android.mk +++ b/src/mesa/program/Android.mk @@ -75,7 +75,7 @@ LOCAL_C_INCLUDES := \ $(MESA_TOP)/src/mapi \ $(MESA_TOP)/src/mesa \ $(MESA_TOP)/src/glsl \ - $(MESA_TOP)/src/glsl/nir \ + $(MESA_TOP)/src/compiler/nir \ $(MESA_TOP)/src/gallium/auxiliary \ $(MESA_TOP)/src/gallium/include diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index ce6f6997d2f..a2ebb1ba77c 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -23,8 +23,8 @@ * IN THE SOFTWARE. */ -#include "nir/nir.h" -#include "nir/nir_builder.h" +#include "compiler/nir/nir.h" +#include "compiler/nir/nir_builder.h" #include "glsl/list.h" #include "main/imports.h" #include "util/ralloc.h" -- cgit v1.2.3 From eb63640c1d38a200a7b1540405051d3ff79d0d8a Mon Sep 17 00:00:00 2001 From: Emil Velikov Date: Mon, 18 Jan 2016 12:16:48 +0200 Subject: glsl: move to compiler/ Signed-off-by: Emil Velikov Acked-by: Matt Turner Acked-by: Jose Fonseca --- configure.ac | 1 - src/Makefile.am | 2 +- src/SConscript | 1 - src/compiler/Makefile.am | 201 +- src/compiler/Makefile.sources | 147 + src/compiler/SConscript | 2 + src/compiler/glsl/.gitignore | 10 + src/compiler/glsl/Android.gen.mk | 76 + src/compiler/glsl/Android.mk | 76 + src/compiler/glsl/Makefile.am | 228 + src/compiler/glsl/Makefile.sources | 222 + src/compiler/glsl/README | 228 + src/compiler/glsl/SConscript | 122 + src/compiler/glsl/TODO | 12 + src/compiler/glsl/ast.h | 1204 ++++ src/compiler/glsl/ast_array_index.cpp | 333 + src/compiler/glsl/ast_expr.cpp | 95 + src/compiler/glsl/ast_function.cpp | 2098 ++++++ src/compiler/glsl/ast_to_hir.cpp | 7583 ++++++++++++++++++++ src/compiler/glsl/ast_type.cpp | 548 ++ src/compiler/glsl/blob.c | 323 + src/compiler/glsl/blob.h | 289 + src/compiler/glsl/builtin_functions.cpp | 5502 ++++++++++++++ src/compiler/glsl/builtin_types.cpp | 394 + src/compiler/glsl/builtin_variables.cpp | 1394 ++++ src/compiler/glsl/glcpp/.gitignore | 6 + src/compiler/glsl/glcpp/README | 30 + src/compiler/glsl/glcpp/glcpp-lex.l | 577 ++ src/compiler/glsl/glcpp/glcpp-parse.y | 2557 +++++++ src/compiler/glsl/glcpp/glcpp.c | 182 + src/compiler/glsl/glcpp/glcpp.h | 251 + src/compiler/glsl/glcpp/pp.c | 241 + src/compiler/glsl/glcpp/tests/.gitignore | 4 + .../glsl/glcpp/tests/000-content-with-spaces.c | 1 + .../glcpp/tests/000-content-with-spaces.c.expected | 1 + src/compiler/glsl/glcpp/tests/001-define.c | 2 + .../glsl/glcpp/tests/001-define.c.expected | 2 + src/compiler/glsl/glcpp/tests/002-define-chain.c | 3 + .../glsl/glcpp/tests/002-define-chain.c.expected | 3 + .../glsl/glcpp/tests/003-define-chain-reverse.c | 3 + .../tests/003-define-chain-reverse.c.expected | 3 + .../glsl/glcpp/tests/004-define-recursive.c | 6 + .../glcpp/tests/004-define-recursive.c.expected | 6 + .../glsl/glcpp/tests/005-define-composite-chain.c | 3 + .../tests/005-define-composite-chain.c.expected | 3 + .../tests/006-define-composite-chain-reverse.c | 3 + .../006-define-composite-chain-reverse.c.expected | 3 + .../glcpp/tests/007-define-composite-recursive.c | 6 + .../007-define-composite-recursive.c.expected | 6 + src/compiler/glsl/glcpp/tests/008-define-empty.c | 2 + .../glsl/glcpp/tests/008-define-empty.c.expected | 2 + src/compiler/glsl/glcpp/tests/009-undef.c | 4 + src/compiler/glsl/glcpp/tests/009-undef.c.expected | 4 + .../glsl/glcpp/tests/010-undef-re-define.c | 6 + .../glcpp/tests/010-undef-re-define.c.expected | 6 + .../glsl/glcpp/tests/011-define-func-empty.c | 2 + .../glcpp/tests/011-define-func-empty.c.expected | 2 + .../glsl/glcpp/tests/012-define-func-no-args.c | 2 + .../glcpp/tests/012-define-func-no-args.c.expected | 2 + .../glcpp/tests/013-define-func-1-arg-unused.c | 2 + .../tests/013-define-func-1-arg-unused.c.expected | 2 + .../glcpp/tests/014-define-func-2-arg-unused.c | 2 + .../tests/014-define-func-2-arg-unused.c.expected | 2 + .../glcpp/tests/015-define-object-with-parens.c | 4 + .../tests/015-define-object-with-parens.c.expected | 4 + .../glsl/glcpp/tests/016-define-func-1-arg.c | 2 + .../glcpp/tests/016-define-func-1-arg.c.expected | 2 + .../glsl/glcpp/tests/017-define-func-2-args.c | 2 + .../glcpp/tests/017-define-func-2-args.c.expected | 2 + .../tests/018-define-func-macro-as-parameter.c | 3 + .../018-define-func-macro-as-parameter.c.expected | 3 + .../glsl/glcpp/tests/019-define-func-1-arg-multi.c | 2 + .../tests/019-define-func-1-arg-multi.c.expected | 2 + .../glsl/glcpp/tests/020-define-func-2-arg-multi.c | 2 + .../tests/020-define-func-2-arg-multi.c.expected | 2 + .../glsl/glcpp/tests/021-define-func-compose.c | 3 + .../glcpp/tests/021-define-func-compose.c.expected | 3 + .../glcpp/tests/022-define-func-arg-with-parens.c | 2 + .../022-define-func-arg-with-parens.c.expected | 2 + .../glsl/glcpp/tests/023-define-extra-whitespace.c | 8 + .../tests/023-define-extra-whitespace.c.expected | 8 + .../tests/024-define-chain-to-self-recursion.c | 3 + .../024-define-chain-to-self-recursion.c.expected | 3 + .../glsl/glcpp/tests/025-func-macro-as-non-macro.c | 2 + .../tests/025-func-macro-as-non-macro.c.expected | 2 + .../glcpp/tests/026-define-func-extra-newlines.c | 6 + .../026-define-func-extra-newlines.c.expected | 3 + .../glcpp/tests/027-define-chain-obj-to-func.c | 3 + .../tests/027-define-chain-obj-to-func.c.expected | 3 + .../glcpp/tests/028-define-chain-obj-to-non-func.c | 3 + .../028-define-chain-obj-to-non-func.c.expected | 3 + .../tests/029-define-chain-obj-to-func-with-args.c | 3 + ...9-define-chain-obj-to-func-with-args.c.expected | 3 + .../tests/030-define-chain-obj-to-func-compose.c | 4 + ...030-define-chain-obj-to-func-compose.c.expected | 4 + .../tests/031-define-chain-func-to-func-compose.c | 4 + ...31-define-chain-func-to-func-compose.c.expected | 4 + .../glcpp/tests/032-define-func-self-recurse.c | 2 + .../tests/032-define-func-self-recurse.c.expected | 2 + .../glcpp/tests/033-define-func-self-compose.c | 2 + .../tests/033-define-func-self-compose.c.expected | 2 + .../tests/034-define-func-self-compose-non-func.c | 2 + ...34-define-func-self-compose-non-func.c.expected | 2 + ...nc-self-compose-non-func-multi-token-argument.c | 2 + ...ompose-non-func-multi-token-argument.c.expected | 2 + ...36-define-func-non-macro-multi-token-argument.c | 3 + ...-func-non-macro-multi-token-argument.c.expected | 3 + .../glcpp/tests/037-finalize-unexpanded-macro.c | 3 + .../tests/037-finalize-unexpanded-macro.c.expected | 3 + .../glsl/glcpp/tests/038-func-arg-with-commas.c | 2 + .../tests/038-func-arg-with-commas.c.expected | 2 + .../tests/039-func-arg-obj-macro-with-comma.c | 24 + .../039-func-arg-obj-macro-with-comma.c.expected | 26 + src/compiler/glsl/glcpp/tests/040-token-pasting.c | 2 + .../glsl/glcpp/tests/040-token-pasting.c.expected | 2 + src/compiler/glsl/glcpp/tests/041-if-0.c | 5 + src/compiler/glsl/glcpp/tests/041-if-0.c.expected | 5 + src/compiler/glsl/glcpp/tests/042-if-1.c | 5 + src/compiler/glsl/glcpp/tests/042-if-1.c.expected | 5 + src/compiler/glsl/glcpp/tests/043-if-0-else.c | 7 + .../glsl/glcpp/tests/043-if-0-else.c.expected | 7 + src/compiler/glsl/glcpp/tests/044-if-1-else.c | 7 + .../glsl/glcpp/tests/044-if-1-else.c.expected | 7 + src/compiler/glsl/glcpp/tests/045-if-0-elif.c | 11 + .../glsl/glcpp/tests/045-if-0-elif.c.expected | 11 + src/compiler/glsl/glcpp/tests/046-if-1-elsif.c | 11 + .../glsl/glcpp/tests/046-if-1-elsif.c.expected | 11 + src/compiler/glsl/glcpp/tests/047-if-elif-else.c | 11 + .../glsl/glcpp/tests/047-if-elif-else.c.expected | 11 + src/compiler/glsl/glcpp/tests/048-if-nested.c | 11 + .../glsl/glcpp/tests/048-if-nested.c.expected | 11 + .../glcpp/tests/049-if-expression-precedence.c | 5 + .../tests/049-if-expression-precedence.c.expected | 5 + src/compiler/glsl/glcpp/tests/050-if-defined.c | 17 + .../glsl/glcpp/tests/050-if-defined.c.expected | 17 + src/compiler/glsl/glcpp/tests/051-if-relational.c | 35 + .../glsl/glcpp/tests/051-if-relational.c.expected | 35 + src/compiler/glsl/glcpp/tests/052-if-bitwise.c | 20 + .../glsl/glcpp/tests/052-if-bitwise.c.expected | 20 + .../glsl/glcpp/tests/053-if-divide-and-shift.c | 15 + .../glcpp/tests/053-if-divide-and-shift.c.expected | 15 + src/compiler/glsl/glcpp/tests/054-if-with-macros.c | 34 + .../glsl/glcpp/tests/054-if-with-macros.c.expected | 34 + .../055-define-chain-obj-to-func-parens-in-text.c | 3 + ...ine-chain-obj-to-func-parens-in-text.c.expected | 3 + .../glcpp/tests/056-macro-argument-with-comma.c | 4 + .../tests/056-macro-argument-with-comma.c.expected | 4 + .../glsl/glcpp/tests/057-empty-arguments.c | 6 + .../glcpp/tests/057-empty-arguments.c.expected | 6 + .../tests/058-token-pasting-empty-arguments.c | 5 + .../058-token-pasting-empty-arguments.c.expected | 5 + .../glsl/glcpp/tests/059-token-pasting-integer.c | 4 + .../tests/059-token-pasting-integer.c.expected | 4 + .../060-left-paren-in-macro-right-paren-in-text.c | 3 + ...t-paren-in-macro-right-paren-in-text.c.expected | 3 + .../tests/061-define-chain-obj-to-func-multi.c | 5 + .../061-define-chain-obj-to-func-multi.c.expected | 5 + .../glsl/glcpp/tests/062-if-0-skips-garbage.c | 5 + .../glcpp/tests/062-if-0-skips-garbage.c.expected | 5 + src/compiler/glsl/glcpp/tests/063-comments.c | 20 + .../glsl/glcpp/tests/063-comments.c.expected | 20 + src/compiler/glsl/glcpp/tests/064-version.c | 2 + .../glsl/glcpp/tests/064-version.c.expected | 2 + .../glsl/glcpp/tests/065-if-defined-parens.c | 17 + .../glcpp/tests/065-if-defined-parens.c.expected | 17 + .../glsl/glcpp/tests/066-if-nospace-expression.c | 3 + .../tests/066-if-nospace-expression.c.expected | 3 + .../glsl/glcpp/tests/067-nested-ifdef-ifndef.c | 40 + .../glcpp/tests/067-nested-ifdef-ifndef.c.expected | 40 + .../glsl/glcpp/tests/068-accidental-pasting.c | 11 + .../glcpp/tests/068-accidental-pasting.c.expected | 11 + .../glsl/glcpp/tests/069-repeated-argument.c | 2 + .../glcpp/tests/069-repeated-argument.c.expected | 2 + .../tests/070-undefined-macro-in-expression.c | 5 + .../070-undefined-macro-in-expression.c.expected | 5 + src/compiler/glsl/glcpp/tests/071-punctuator.c | 1 + .../glsl/glcpp/tests/071-punctuator.c.expected | 1 + .../glsl/glcpp/tests/072-token-pasting-same-line.c | 2 + .../tests/072-token-pasting-same-line.c.expected | 2 + src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c | 4 + .../glsl/glcpp/tests/073-if-in-ifdef.c.expected | 4 + src/compiler/glsl/glcpp/tests/074-elif-undef.c | 3 + .../glsl/glcpp/tests/074-elif-undef.c.expected | 3 + .../glsl/glcpp/tests/075-elif-elif-undef.c | 4 + .../glcpp/tests/075-elif-elif-undef.c.expected | 4 + .../glsl/glcpp/tests/076-elif-undef-nested.c | 5 + .../glcpp/tests/076-elif-undef-nested.c.expected | 5 + .../glsl/glcpp/tests/077-else-without-if.c | 1 + .../glcpp/tests/077-else-without-if.c.expected | 3 + .../glsl/glcpp/tests/078-elif-without-if.c | 1 + .../glcpp/tests/078-elif-without-if.c.expected | 3 + .../glsl/glcpp/tests/079-endif-without-if.c | 1 + .../glcpp/tests/079-endif-without-if.c.expected | 3 + .../glsl/glcpp/tests/080-if-without-expression.c | 4 + .../tests/080-if-without-expression.c.expected | 5 + .../glsl/glcpp/tests/081-elif-without-expression.c | 3 + .../tests/081-elif-without-expression.c.expected | 4 + src/compiler/glsl/glcpp/tests/082-invalid-paste.c | 7 + .../glsl/glcpp/tests/082-invalid-paste.c.expected | 19 + .../glsl/glcpp/tests/083-unterminated-if.c | 2 + .../glcpp/tests/083-unterminated-if.c.expected | 4 + .../glsl/glcpp/tests/084-unbalanced-parentheses.c | 2 + .../tests/084-unbalanced-parentheses.c.expected | 2 + .../glcpp/tests/085-incorrect-argument-count.c | 5 + .../tests/085-incorrect-argument-count.c.expected | 11 + .../glsl/glcpp/tests/086-reserved-macro-names.c | 3 + .../tests/086-reserved-macro-names.c.expected | 9 + src/compiler/glsl/glcpp/tests/087-if-comments.c | 5 + .../glsl/glcpp/tests/087-if-comments.c.expected | 5 + .../glcpp/tests/088-redefine-macro-legitimate.c | 5 + .../tests/088-redefine-macro-legitimate.c.expected | 5 + .../glsl/glcpp/tests/089-redefine-macro-error.c | 17 + .../tests/089-redefine-macro-error.c.expected | 29 + src/compiler/glsl/glcpp/tests/090-hash-error.c | 1 + .../glsl/glcpp/tests/090-hash-error.c.expected | 1 + src/compiler/glsl/glcpp/tests/091-hash-line.c | 14 + .../glsl/glcpp/tests/091-hash-line.c.expected | 14 + .../glsl/glcpp/tests/092-redefine-macro-error-2.c | 5 + .../tests/092-redefine-macro-error-2.c.expected | 9 + src/compiler/glsl/glcpp/tests/093-divide-by-zero.c | 2 + .../glsl/glcpp/tests/093-divide-by-zero.c.expected | 3 + .../glcpp/tests/094-divide-by-zero-short-circuit.c | 13 + .../094-divide-by-zero-short-circuit.c.expected | 14 + .../glsl/glcpp/tests/095-recursive-define.c | 3 + .../glcpp/tests/095-recursive-define.c.expected | 3 + src/compiler/glsl/glcpp/tests/096-paste-twice.c | 3 + .../glsl/glcpp/tests/096-paste-twice.c.expected | 3 + .../tests/097-paste-with-non-function-macro.c | 3 + .../097-paste-with-non-function-macro.c.expected | 3 + src/compiler/glsl/glcpp/tests/098-elif-undefined.c | 7 + .../glsl/glcpp/tests/098-elif-undefined.c.expected | 7 + src/compiler/glsl/glcpp/tests/099-c99-example.c | 17 + .../glsl/glcpp/tests/099-c99-example.c.expected | 16 + .../glsl/glcpp/tests/100-macro-with-colon.c | 7 + .../glcpp/tests/100-macro-with-colon.c.expected | 7 + .../glsl/glcpp/tests/101-macros-used-twice.c | 16 + .../glcpp/tests/101-macros-used-twice.c.expected | 16 + .../glsl/glcpp/tests/102-garbage-after-endif.c | 2 + .../glcpp/tests/102-garbage-after-endif.c.expected | 2 + .../glsl/glcpp/tests/103-garbage-after-else-0.c | 3 + .../tests/103-garbage-after-else-0.c.expected | 4 + .../glcpp/tests/104-hash-line-followed-by-code.c | 2 + .../104-hash-line-followed-by-code.c.expected | 2 + .../glsl/glcpp/tests/105-multiline-hash-line.c | 5 + .../glcpp/tests/105-multiline-hash-line.c.expected | 5 + .../glsl/glcpp/tests/106-multiline-hash-if.c | 6 + .../glcpp/tests/106-multiline-hash-if.c.expected | 6 + .../glsl/glcpp/tests/107-multiline-hash-elif.c | 7 + .../glcpp/tests/107-multiline-hash-elif.c.expected | 7 + .../glcpp/tests/108-no-space-after-hash-version.c | 1 + .../108-no-space-after-hash-version.c.expected | 1 + .../glcpp/tests/109-no-space-after-hash-line.c | 1 + .../tests/109-no-space-after-hash-line.c.expected | 1 + .../tests/110-no-space-digits-after-hash-elif.c | 3 + .../110-no-space-digits-after-hash-elif.c.expected | 3 + .../tests/111-no-space-operator-after-hash-if.c | 19 + .../111-no-space-operator-after-hash-if.c.expected | 19 + .../tests/112-no-space-operator-after-hash-elif.c | 24 + ...12-no-space-operator-after-hash-elif.c.expected | 24 + .../glsl/glcpp/tests/113-line-and-file-macros.c | 7 + .../tests/113-line-and-file-macros.c.expected | 7 + .../glsl/glcpp/tests/114-paste-integer-tokens.c | 7 + .../tests/114-paste-integer-tokens.c.expected | 7 + .../glsl/glcpp/tests/115-line-continuations.c | 9 + .../glcpp/tests/115-line-continuations.c.expected | 9 + .../glcpp/tests/116-disable-line-continuations.c | 13 + .../116-disable-line-continuations.c.expected | 13 + ...e-continuation-and-non-continuation-backslash.c | 12 + ...ation-and-non-continuation-backslash.c.expected | 12 + .../glsl/glcpp/tests/118-comment-becomes-space.c | 4 + .../tests/118-comment-becomes-space.c.expected | 4 + .../glsl/glcpp/tests/119-elif-after-else.c | 6 + .../glcpp/tests/119-elif-after-else.c.expected | 7 + src/compiler/glsl/glcpp/tests/120-undef-builtin.c | 3 + .../glsl/glcpp/tests/120-undef-builtin.c.expected | 6 + .../glsl/glcpp/tests/121-comment-bug-72686.c | 2 + .../glcpp/tests/121-comment-bug-72686.c.expected | 2 + .../glsl/glcpp/tests/122-redefine-whitespace.c | 16 + .../glcpp/tests/122-redefine-whitespace.c.expected | 22 + .../glsl/glcpp/tests/123-garbage-after-else-1.c | 3 + .../tests/123-garbage-after-else-1.c.expected | 4 + .../glsl/glcpp/tests/124-preprocessing-numbers.c | 37 + .../tests/124-preprocessing-numbers.c.expected | 37 + .../glcpp/tests/125-es-short-circuit-undefined.c | 27 + .../125-es-short-circuit-undefined.c.expected | 29 + .../glsl/glcpp/tests/126-garbage-after-directive.c | 5 + .../tests/126-garbage-after-directive.c.expected | 7 + src/compiler/glsl/glcpp/tests/127-pragma-empty.c | 3 + .../glsl/glcpp/tests/127-pragma-empty.c.expected | 3 + .../glsl/glcpp/tests/128-space-before-hash.c | 21 + .../glcpp/tests/128-space-before-hash.c.expected | 21 + .../glsl/glcpp/tests/129-define-non-identifier.c | 1 + .../tests/129-define-non-identifier.c.expected | 2 + src/compiler/glsl/glcpp/tests/130-define-comment.c | 2 + .../glsl/glcpp/tests/130-define-comment.c.expected | 2 + .../glsl/glcpp/tests/131-eof-without-newline.c | 1 + .../glcpp/tests/131-eof-without-newline.c.expected | 1 + .../glcpp/tests/132-eof-without-newline-define.c | 1 + .../132-eof-without-newline-define.c.expected | 1 + .../glcpp/tests/133-eof-without-newline-comment.c | 1 + .../133-eof-without-newline-comment.c.expected | 2 + .../glsl/glcpp/tests/134-hash-comment-directive.c | 22 + .../tests/134-hash-comment-directive.c.expected | 22 + .../glsl/glcpp/tests/135-duplicate-parameter.c | 2 + .../glcpp/tests/135-duplicate-parameter.c.expected | 4 + .../glcpp/tests/136-plus-plus-and-minus-minus.c | 8 + .../tests/136-plus-plus-and-minus-minus.c.expected | 8 + .../glcpp/tests/137-expand-macro-after-period.c | 4 + .../tests/137-expand-macro-after-period.c.expected | 4 + .../glcpp/tests/138-multi-line-comment-in-if-0.c | 7 + .../138-multi-line-comment-in-if-0.c.expected | 7 + .../glcpp/tests/139-define-without-macro-name.c | 5 + .../tests/139-define-without-macro-name.c.expected | 5 + src/compiler/glsl/glcpp/tests/140-null-directive.c | 9 + .../glsl/glcpp/tests/140-null-directive.c.expected | 9 + .../glsl/glcpp/tests/141-pragma-and-__LINE__.c | 6 + .../glcpp/tests/141-pragma-and-__LINE__.c.expected | 6 + .../glsl/glcpp/tests/142-defined-within-macro.c | 94 + .../tests/142-defined-within-macro.c.expected | 94 + src/compiler/glsl/glcpp/tests/143-multiple-else.c | 6 + .../glsl/glcpp/tests/143-multiple-else.c.expected | 7 + src/compiler/glsl/glcpp/tests/glcpp-test | 110 + src/compiler/glsl/glcpp/tests/glcpp-test-cr-lf | 141 + src/compiler/glsl/glsl_lexer.ll | 635 ++ src/compiler/glsl/glsl_parser.yy | 2855 ++++++++ src/compiler/glsl/glsl_parser_extras.cpp | 1952 +++++ src/compiler/glsl/glsl_parser_extras.h | 752 ++ src/compiler/glsl/glsl_symbol_table.cpp | 280 + src/compiler/glsl/glsl_symbol_table.h | 110 + src/compiler/glsl/hir_field_selection.cpp | 81 + src/compiler/glsl/ir.cpp | 2039 ++++++ src/compiler/glsl/ir.h | 2632 +++++++ src/compiler/glsl/ir_basic_block.cpp | 99 + src/compiler/glsl/ir_basic_block.h | 28 + src/compiler/glsl/ir_builder.cpp | 612 ++ src/compiler/glsl/ir_builder.h | 230 + src/compiler/glsl/ir_clone.cpp | 440 ++ src/compiler/glsl/ir_constant_expression.cpp | 2092 ++++++ src/compiler/glsl/ir_equals.cpp | 211 + src/compiler/glsl/ir_expression_flattening.cpp | 86 + src/compiler/glsl/ir_expression_flattening.h | 38 + src/compiler/glsl/ir_function.cpp | 404 ++ src/compiler/glsl/ir_function_can_inline.cpp | 75 + src/compiler/glsl/ir_function_detect_recursion.cpp | 358 + src/compiler/glsl/ir_function_inlining.h | 30 + src/compiler/glsl/ir_hierarchical_visitor.cpp | 383 + src/compiler/glsl/ir_hierarchical_visitor.h | 209 + src/compiler/glsl/ir_hv_accept.cpp | 439 ++ src/compiler/glsl/ir_import_prototypes.cpp | 125 + src/compiler/glsl/ir_optimization.h | 147 + src/compiler/glsl/ir_print_visitor.cpp | 604 ++ src/compiler/glsl/ir_print_visitor.h | 96 + src/compiler/glsl/ir_reader.cpp | 1167 +++ src/compiler/glsl/ir_reader.h | 34 + src/compiler/glsl/ir_rvalue_visitor.cpp | 316 + src/compiler/glsl/ir_rvalue_visitor.h | 83 + src/compiler/glsl/ir_set_program_inouts.cpp | 453 ++ src/compiler/glsl/ir_uniform.h | 216 + src/compiler/glsl/ir_validate.cpp | 930 +++ src/compiler/glsl/ir_variable_refcount.cpp | 153 + src/compiler/glsl/ir_variable_refcount.h | 80 + src/compiler/glsl/ir_visitor.h | 93 + src/compiler/glsl/link_atomics.cpp | 346 + src/compiler/glsl/link_functions.cpp | 348 + src/compiler/glsl/link_interface_blocks.cpp | 357 + .../glsl/link_uniform_block_active_visitor.cpp | 296 + .../glsl/link_uniform_block_active_visitor.h | 74 + src/compiler/glsl/link_uniform_blocks.cpp | 472 ++ src/compiler/glsl/link_uniform_initializers.cpp | 355 + src/compiler/glsl/link_uniforms.cpp | 1330 ++++ src/compiler/glsl/link_varyings.cpp | 1888 +++++ src/compiler/glsl/link_varyings.h | 299 + src/compiler/glsl/linker.cpp | 4676 ++++++++++++ src/compiler/glsl/linker.h | 205 + src/compiler/glsl/list.h | 700 ++ src/compiler/glsl/loop_analysis.cpp | 640 ++ src/compiler/glsl/loop_analysis.h | 259 + src/compiler/glsl/loop_controls.cpp | 246 + src/compiler/glsl/loop_unroll.cpp | 432 ++ src/compiler/glsl/lower_buffer_access.cpp | 490 ++ src/compiler/glsl/lower_buffer_access.h | 65 + src/compiler/glsl/lower_clip_distance.cpp | 574 ++ .../glsl/lower_const_arrays_to_uniforms.cpp | 109 + src/compiler/glsl/lower_discard.cpp | 201 + src/compiler/glsl/lower_discard_flow.cpp | 155 + src/compiler/glsl/lower_if_to_cond_assign.cpp | 252 + src/compiler/glsl/lower_instructions.cpp | 1061 +++ src/compiler/glsl/lower_jumps.cpp | 1022 +++ src/compiler/glsl/lower_mat_op_to_vec.cpp | 438 ++ src/compiler/glsl/lower_named_interface_blocks.cpp | 280 + src/compiler/glsl/lower_noise.cpp | 71 + src/compiler/glsl/lower_offset_array.cpp | 91 + src/compiler/glsl/lower_output_reads.cpp | 178 + src/compiler/glsl/lower_packed_varyings.cpp | 749 ++ src/compiler/glsl/lower_packing_builtins.cpp | 1412 ++++ src/compiler/glsl/lower_shared_reference.cpp | 496 ++ src/compiler/glsl/lower_subroutine.cpp | 123 + src/compiler/glsl/lower_tess_level.cpp | 459 ++ src/compiler/glsl/lower_texture_projection.cpp | 103 + src/compiler/glsl/lower_ubo_reference.cpp | 1042 +++ .../glsl/lower_variable_index_to_cond_assign.cpp | 585 ++ .../glsl/lower_vec_index_to_cond_assign.cpp | 239 + src/compiler/glsl/lower_vec_index_to_swizzle.cpp | 171 + src/compiler/glsl/lower_vector.cpp | 228 + src/compiler/glsl/lower_vector_derefs.cpp | 104 + src/compiler/glsl/lower_vector_insert.cpp | 146 + src/compiler/glsl/lower_vertex_id.cpp | 144 + src/compiler/glsl/main.cpp | 431 ++ src/compiler/glsl/opt_algebraic.cpp | 984 +++ src/compiler/glsl/opt_array_splitting.cpp | 408 ++ src/compiler/glsl/opt_conditional_discard.cpp | 81 + src/compiler/glsl/opt_constant_folding.cpp | 190 + src/compiler/glsl/opt_constant_propagation.cpp | 524 ++ src/compiler/glsl/opt_constant_variable.cpp | 218 + src/compiler/glsl/opt_copy_propagation.cpp | 352 + .../glsl/opt_copy_propagation_elements.cpp | 509 ++ src/compiler/glsl/opt_dead_builtin_variables.cpp | 103 + src/compiler/glsl/opt_dead_builtin_varyings.cpp | 606 ++ src/compiler/glsl/opt_dead_code.cpp | 197 + src/compiler/glsl/opt_dead_code_local.cpp | 336 + src/compiler/glsl/opt_dead_functions.cpp | 152 + src/compiler/glsl/opt_flatten_nested_if_blocks.cpp | 103 + src/compiler/glsl/opt_flip_matrices.cpp | 123 + src/compiler/glsl/opt_function_inlining.cpp | 360 + src/compiler/glsl/opt_if_simplification.cpp | 126 + src/compiler/glsl/opt_minmax.cpp | 488 ++ src/compiler/glsl/opt_noop_swizzle.cpp | 83 + src/compiler/glsl/opt_rebalance_tree.cpp | 321 + src/compiler/glsl/opt_redundant_jumps.cpp | 124 + src/compiler/glsl/opt_structure_splitting.cpp | 367 + src/compiler/glsl/opt_swizzle_swizzle.cpp | 97 + src/compiler/glsl/opt_tree_grafting.cpp | 404 ++ src/compiler/glsl/opt_vectorize.cpp | 407 ++ src/compiler/glsl/program.h | 55 + src/compiler/glsl/s_expression.cpp | 218 + src/compiler/glsl/s_expression.h | 180 + src/compiler/glsl/standalone_scaffolding.cpp | 221 + src/compiler/glsl/standalone_scaffolding.h | 90 + src/compiler/glsl/test.cpp | 78 + src/compiler/glsl/test_optpass.cpp | 276 + src/compiler/glsl/test_optpass.h | 30 + src/compiler/glsl/tests/.gitignore | 5 + src/compiler/glsl/tests/blob_test.c | 320 + src/compiler/glsl/tests/builtin_variable_test.cpp | 393 + src/compiler/glsl/tests/compare_ir | 59 + .../glsl/tests/copy_constant_to_storage_tests.cpp | 300 + src/compiler/glsl/tests/general_ir_test.cpp | 80 + .../glsl/tests/invalidate_locations_test.cpp | 196 + src/compiler/glsl/tests/lower_jumps/.gitignore | 3 + .../glsl/tests/lower_jumps/create_test_cases.py | 643 ++ src/compiler/glsl/tests/optimization-test | 42 + src/compiler/glsl/tests/sampler_types_test.cpp | 100 + .../glsl/tests/set_uniform_initializer_tests.cpp | 594 ++ src/compiler/glsl/tests/sexps.py | 103 + .../glsl/tests/uniform_initializer_utils.cpp | 255 + .../glsl/tests/uniform_initializer_utils.h | 48 + src/compiler/glsl/tests/varyings_test.cpp | 349 + src/compiler/glsl_types.cpp | 2 +- src/compiler/nir/Makefile.sources | 71 + src/compiler/nir/glsl_to_nir.cpp | 6 +- src/compiler/nir/glsl_to_nir.h | 2 +- src/compiler/nir/nir.h | 2 +- src/compiler/nir/nir_lower_atomics.c | 2 +- src/compiler/nir/nir_lower_samplers.c | 2 +- src/compiler/nir_types.cpp | 2 +- src/gallium/auxiliary/nir/tgsi_to_nir.c | 2 +- src/glsl/.gitignore | 10 - src/glsl/Android.gen.mk | 76 - src/glsl/Android.mk | 76 - src/glsl/Makefile.am | 228 - src/glsl/Makefile.sources | 222 - src/glsl/README | 228 - src/glsl/SConscript | 122 - src/glsl/TODO | 12 - src/glsl/ast.h | 1204 ---- src/glsl/ast_array_index.cpp | 333 - src/glsl/ast_expr.cpp | 95 - src/glsl/ast_function.cpp | 2098 ------ src/glsl/ast_to_hir.cpp | 7583 -------------------- src/glsl/ast_type.cpp | 548 -- src/glsl/blob.c | 323 - src/glsl/blob.h | 289 - src/glsl/builtin_functions.cpp | 5502 -------------- src/glsl/builtin_types.cpp | 394 - src/glsl/builtin_variables.cpp | 1394 ---- src/glsl/glcpp/.gitignore | 6 - src/glsl/glcpp/README | 30 - src/glsl/glcpp/glcpp-lex.l | 577 -- src/glsl/glcpp/glcpp-parse.y | 2557 ------- src/glsl/glcpp/glcpp.c | 182 - src/glsl/glcpp/glcpp.h | 251 - src/glsl/glcpp/pp.c | 241 - src/glsl/glcpp/tests/.gitignore | 4 - src/glsl/glcpp/tests/000-content-with-spaces.c | 1 - .../glcpp/tests/000-content-with-spaces.c.expected | 1 - src/glsl/glcpp/tests/001-define.c | 2 - src/glsl/glcpp/tests/001-define.c.expected | 2 - src/glsl/glcpp/tests/002-define-chain.c | 3 - src/glsl/glcpp/tests/002-define-chain.c.expected | 3 - src/glsl/glcpp/tests/003-define-chain-reverse.c | 3 - .../tests/003-define-chain-reverse.c.expected | 3 - src/glsl/glcpp/tests/004-define-recursive.c | 6 - .../glcpp/tests/004-define-recursive.c.expected | 6 - src/glsl/glcpp/tests/005-define-composite-chain.c | 3 - .../tests/005-define-composite-chain.c.expected | 3 - .../tests/006-define-composite-chain-reverse.c | 3 - .../006-define-composite-chain-reverse.c.expected | 3 - .../glcpp/tests/007-define-composite-recursive.c | 6 - .../007-define-composite-recursive.c.expected | 6 - src/glsl/glcpp/tests/008-define-empty.c | 2 - src/glsl/glcpp/tests/008-define-empty.c.expected | 2 - src/glsl/glcpp/tests/009-undef.c | 4 - src/glsl/glcpp/tests/009-undef.c.expected | 4 - src/glsl/glcpp/tests/010-undef-re-define.c | 6 - .../glcpp/tests/010-undef-re-define.c.expected | 6 - src/glsl/glcpp/tests/011-define-func-empty.c | 2 - .../glcpp/tests/011-define-func-empty.c.expected | 2 - src/glsl/glcpp/tests/012-define-func-no-args.c | 2 - .../glcpp/tests/012-define-func-no-args.c.expected | 2 - .../glcpp/tests/013-define-func-1-arg-unused.c | 2 - .../tests/013-define-func-1-arg-unused.c.expected | 2 - .../glcpp/tests/014-define-func-2-arg-unused.c | 2 - .../tests/014-define-func-2-arg-unused.c.expected | 2 - .../glcpp/tests/015-define-object-with-parens.c | 4 - .../tests/015-define-object-with-parens.c.expected | 4 - src/glsl/glcpp/tests/016-define-func-1-arg.c | 2 - .../glcpp/tests/016-define-func-1-arg.c.expected | 2 - src/glsl/glcpp/tests/017-define-func-2-args.c | 2 - .../glcpp/tests/017-define-func-2-args.c.expected | 2 - .../tests/018-define-func-macro-as-parameter.c | 3 - .../018-define-func-macro-as-parameter.c.expected | 3 - src/glsl/glcpp/tests/019-define-func-1-arg-multi.c | 2 - .../tests/019-define-func-1-arg-multi.c.expected | 2 - src/glsl/glcpp/tests/020-define-func-2-arg-multi.c | 2 - .../tests/020-define-func-2-arg-multi.c.expected | 2 - src/glsl/glcpp/tests/021-define-func-compose.c | 3 - .../glcpp/tests/021-define-func-compose.c.expected | 3 - .../glcpp/tests/022-define-func-arg-with-parens.c | 2 - .../022-define-func-arg-with-parens.c.expected | 2 - src/glsl/glcpp/tests/023-define-extra-whitespace.c | 8 - .../tests/023-define-extra-whitespace.c.expected | 8 - .../tests/024-define-chain-to-self-recursion.c | 3 - .../024-define-chain-to-self-recursion.c.expected | 3 - src/glsl/glcpp/tests/025-func-macro-as-non-macro.c | 2 - .../tests/025-func-macro-as-non-macro.c.expected | 2 - .../glcpp/tests/026-define-func-extra-newlines.c | 6 - .../026-define-func-extra-newlines.c.expected | 3 - .../glcpp/tests/027-define-chain-obj-to-func.c | 3 - .../tests/027-define-chain-obj-to-func.c.expected | 3 - .../glcpp/tests/028-define-chain-obj-to-non-func.c | 3 - .../028-define-chain-obj-to-non-func.c.expected | 3 - .../tests/029-define-chain-obj-to-func-with-args.c | 3 - ...9-define-chain-obj-to-func-with-args.c.expected | 3 - .../tests/030-define-chain-obj-to-func-compose.c | 4 - ...030-define-chain-obj-to-func-compose.c.expected | 4 - .../tests/031-define-chain-func-to-func-compose.c | 4 - ...31-define-chain-func-to-func-compose.c.expected | 4 - .../glcpp/tests/032-define-func-self-recurse.c | 2 - .../tests/032-define-func-self-recurse.c.expected | 2 - .../glcpp/tests/033-define-func-self-compose.c | 2 - .../tests/033-define-func-self-compose.c.expected | 2 - .../tests/034-define-func-self-compose-non-func.c | 2 - ...34-define-func-self-compose-non-func.c.expected | 2 - ...nc-self-compose-non-func-multi-token-argument.c | 2 - ...ompose-non-func-multi-token-argument.c.expected | 2 - ...36-define-func-non-macro-multi-token-argument.c | 3 - ...-func-non-macro-multi-token-argument.c.expected | 3 - .../glcpp/tests/037-finalize-unexpanded-macro.c | 3 - .../tests/037-finalize-unexpanded-macro.c.expected | 3 - src/glsl/glcpp/tests/038-func-arg-with-commas.c | 2 - .../tests/038-func-arg-with-commas.c.expected | 2 - .../tests/039-func-arg-obj-macro-with-comma.c | 24 - .../039-func-arg-obj-macro-with-comma.c.expected | 26 - src/glsl/glcpp/tests/040-token-pasting.c | 2 - src/glsl/glcpp/tests/040-token-pasting.c.expected | 2 - src/glsl/glcpp/tests/041-if-0.c | 5 - src/glsl/glcpp/tests/041-if-0.c.expected | 5 - src/glsl/glcpp/tests/042-if-1.c | 5 - src/glsl/glcpp/tests/042-if-1.c.expected | 5 - src/glsl/glcpp/tests/043-if-0-else.c | 7 - src/glsl/glcpp/tests/043-if-0-else.c.expected | 7 - src/glsl/glcpp/tests/044-if-1-else.c | 7 - src/glsl/glcpp/tests/044-if-1-else.c.expected | 7 - src/glsl/glcpp/tests/045-if-0-elif.c | 11 - src/glsl/glcpp/tests/045-if-0-elif.c.expected | 11 - src/glsl/glcpp/tests/046-if-1-elsif.c | 11 - src/glsl/glcpp/tests/046-if-1-elsif.c.expected | 11 - src/glsl/glcpp/tests/047-if-elif-else.c | 11 - src/glsl/glcpp/tests/047-if-elif-else.c.expected | 11 - src/glsl/glcpp/tests/048-if-nested.c | 11 - src/glsl/glcpp/tests/048-if-nested.c.expected | 11 - .../glcpp/tests/049-if-expression-precedence.c | 5 - .../tests/049-if-expression-precedence.c.expected | 5 - src/glsl/glcpp/tests/050-if-defined.c | 17 - src/glsl/glcpp/tests/050-if-defined.c.expected | 17 - src/glsl/glcpp/tests/051-if-relational.c | 35 - src/glsl/glcpp/tests/051-if-relational.c.expected | 35 - src/glsl/glcpp/tests/052-if-bitwise.c | 20 - src/glsl/glcpp/tests/052-if-bitwise.c.expected | 20 - src/glsl/glcpp/tests/053-if-divide-and-shift.c | 15 - .../glcpp/tests/053-if-divide-and-shift.c.expected | 15 - src/glsl/glcpp/tests/054-if-with-macros.c | 34 - src/glsl/glcpp/tests/054-if-with-macros.c.expected | 34 - .../055-define-chain-obj-to-func-parens-in-text.c | 3 - ...ine-chain-obj-to-func-parens-in-text.c.expected | 3 - .../glcpp/tests/056-macro-argument-with-comma.c | 4 - .../tests/056-macro-argument-with-comma.c.expected | 4 - src/glsl/glcpp/tests/057-empty-arguments.c | 6 - .../glcpp/tests/057-empty-arguments.c.expected | 6 - .../tests/058-token-pasting-empty-arguments.c | 5 - .../058-token-pasting-empty-arguments.c.expected | 5 - src/glsl/glcpp/tests/059-token-pasting-integer.c | 4 - .../tests/059-token-pasting-integer.c.expected | 4 - .../060-left-paren-in-macro-right-paren-in-text.c | 3 - ...t-paren-in-macro-right-paren-in-text.c.expected | 3 - .../tests/061-define-chain-obj-to-func-multi.c | 5 - .../061-define-chain-obj-to-func-multi.c.expected | 5 - src/glsl/glcpp/tests/062-if-0-skips-garbage.c | 5 - .../glcpp/tests/062-if-0-skips-garbage.c.expected | 5 - src/glsl/glcpp/tests/063-comments.c | 20 - src/glsl/glcpp/tests/063-comments.c.expected | 20 - src/glsl/glcpp/tests/064-version.c | 2 - src/glsl/glcpp/tests/064-version.c.expected | 2 - src/glsl/glcpp/tests/065-if-defined-parens.c | 17 - .../glcpp/tests/065-if-defined-parens.c.expected | 17 - src/glsl/glcpp/tests/066-if-nospace-expression.c | 3 - .../tests/066-if-nospace-expression.c.expected | 3 - src/glsl/glcpp/tests/067-nested-ifdef-ifndef.c | 40 - .../glcpp/tests/067-nested-ifdef-ifndef.c.expected | 40 - src/glsl/glcpp/tests/068-accidental-pasting.c | 11 - .../glcpp/tests/068-accidental-pasting.c.expected | 11 - src/glsl/glcpp/tests/069-repeated-argument.c | 2 - .../glcpp/tests/069-repeated-argument.c.expected | 2 - .../tests/070-undefined-macro-in-expression.c | 5 - .../070-undefined-macro-in-expression.c.expected | 5 - src/glsl/glcpp/tests/071-punctuator.c | 1 - src/glsl/glcpp/tests/071-punctuator.c.expected | 1 - src/glsl/glcpp/tests/072-token-pasting-same-line.c | 2 - .../tests/072-token-pasting-same-line.c.expected | 2 - src/glsl/glcpp/tests/073-if-in-ifdef.c | 4 - src/glsl/glcpp/tests/073-if-in-ifdef.c.expected | 4 - src/glsl/glcpp/tests/074-elif-undef.c | 3 - src/glsl/glcpp/tests/074-elif-undef.c.expected | 3 - src/glsl/glcpp/tests/075-elif-elif-undef.c | 4 - .../glcpp/tests/075-elif-elif-undef.c.expected | 4 - src/glsl/glcpp/tests/076-elif-undef-nested.c | 5 - .../glcpp/tests/076-elif-undef-nested.c.expected | 5 - src/glsl/glcpp/tests/077-else-without-if.c | 1 - .../glcpp/tests/077-else-without-if.c.expected | 3 - src/glsl/glcpp/tests/078-elif-without-if.c | 1 - .../glcpp/tests/078-elif-without-if.c.expected | 3 - src/glsl/glcpp/tests/079-endif-without-if.c | 1 - .../glcpp/tests/079-endif-without-if.c.expected | 3 - src/glsl/glcpp/tests/080-if-without-expression.c | 4 - .../tests/080-if-without-expression.c.expected | 5 - src/glsl/glcpp/tests/081-elif-without-expression.c | 3 - .../tests/081-elif-without-expression.c.expected | 4 - src/glsl/glcpp/tests/082-invalid-paste.c | 7 - src/glsl/glcpp/tests/082-invalid-paste.c.expected | 19 - src/glsl/glcpp/tests/083-unterminated-if.c | 2 - .../glcpp/tests/083-unterminated-if.c.expected | 4 - src/glsl/glcpp/tests/084-unbalanced-parentheses.c | 2 - .../tests/084-unbalanced-parentheses.c.expected | 2 - .../glcpp/tests/085-incorrect-argument-count.c | 5 - .../tests/085-incorrect-argument-count.c.expected | 11 - src/glsl/glcpp/tests/086-reserved-macro-names.c | 3 - .../tests/086-reserved-macro-names.c.expected | 9 - src/glsl/glcpp/tests/087-if-comments.c | 5 - src/glsl/glcpp/tests/087-if-comments.c.expected | 5 - .../glcpp/tests/088-redefine-macro-legitimate.c | 5 - .../tests/088-redefine-macro-legitimate.c.expected | 5 - src/glsl/glcpp/tests/089-redefine-macro-error.c | 17 - .../tests/089-redefine-macro-error.c.expected | 29 - src/glsl/glcpp/tests/090-hash-error.c | 1 - src/glsl/glcpp/tests/090-hash-error.c.expected | 1 - src/glsl/glcpp/tests/091-hash-line.c | 14 - src/glsl/glcpp/tests/091-hash-line.c.expected | 14 - src/glsl/glcpp/tests/092-redefine-macro-error-2.c | 5 - .../tests/092-redefine-macro-error-2.c.expected | 9 - src/glsl/glcpp/tests/093-divide-by-zero.c | 2 - src/glsl/glcpp/tests/093-divide-by-zero.c.expected | 3 - .../glcpp/tests/094-divide-by-zero-short-circuit.c | 13 - .../094-divide-by-zero-short-circuit.c.expected | 14 - src/glsl/glcpp/tests/095-recursive-define.c | 3 - .../glcpp/tests/095-recursive-define.c.expected | 3 - src/glsl/glcpp/tests/096-paste-twice.c | 3 - src/glsl/glcpp/tests/096-paste-twice.c.expected | 3 - .../tests/097-paste-with-non-function-macro.c | 3 - .../097-paste-with-non-function-macro.c.expected | 3 - src/glsl/glcpp/tests/098-elif-undefined.c | 7 - src/glsl/glcpp/tests/098-elif-undefined.c.expected | 7 - src/glsl/glcpp/tests/099-c99-example.c | 17 - src/glsl/glcpp/tests/099-c99-example.c.expected | 16 - src/glsl/glcpp/tests/100-macro-with-colon.c | 7 - .../glcpp/tests/100-macro-with-colon.c.expected | 7 - src/glsl/glcpp/tests/101-macros-used-twice.c | 16 - .../glcpp/tests/101-macros-used-twice.c.expected | 16 - src/glsl/glcpp/tests/102-garbage-after-endif.c | 2 - .../glcpp/tests/102-garbage-after-endif.c.expected | 2 - src/glsl/glcpp/tests/103-garbage-after-else-0.c | 3 - .../tests/103-garbage-after-else-0.c.expected | 4 - .../glcpp/tests/104-hash-line-followed-by-code.c | 2 - .../104-hash-line-followed-by-code.c.expected | 2 - src/glsl/glcpp/tests/105-multiline-hash-line.c | 5 - .../glcpp/tests/105-multiline-hash-line.c.expected | 5 - src/glsl/glcpp/tests/106-multiline-hash-if.c | 6 - .../glcpp/tests/106-multiline-hash-if.c.expected | 6 - src/glsl/glcpp/tests/107-multiline-hash-elif.c | 7 - .../glcpp/tests/107-multiline-hash-elif.c.expected | 7 - .../glcpp/tests/108-no-space-after-hash-version.c | 1 - .../108-no-space-after-hash-version.c.expected | 1 - .../glcpp/tests/109-no-space-after-hash-line.c | 1 - .../tests/109-no-space-after-hash-line.c.expected | 1 - .../tests/110-no-space-digits-after-hash-elif.c | 3 - .../110-no-space-digits-after-hash-elif.c.expected | 3 - .../tests/111-no-space-operator-after-hash-if.c | 19 - .../111-no-space-operator-after-hash-if.c.expected | 19 - .../tests/112-no-space-operator-after-hash-elif.c | 24 - ...12-no-space-operator-after-hash-elif.c.expected | 24 - src/glsl/glcpp/tests/113-line-and-file-macros.c | 7 - .../tests/113-line-and-file-macros.c.expected | 7 - src/glsl/glcpp/tests/114-paste-integer-tokens.c | 7 - .../tests/114-paste-integer-tokens.c.expected | 7 - src/glsl/glcpp/tests/115-line-continuations.c | 9 - .../glcpp/tests/115-line-continuations.c.expected | 9 - .../glcpp/tests/116-disable-line-continuations.c | 13 - .../116-disable-line-continuations.c.expected | 13 - ...e-continuation-and-non-continuation-backslash.c | 12 - ...ation-and-non-continuation-backslash.c.expected | 12 - src/glsl/glcpp/tests/118-comment-becomes-space.c | 4 - .../tests/118-comment-becomes-space.c.expected | 4 - src/glsl/glcpp/tests/119-elif-after-else.c | 6 - .../glcpp/tests/119-elif-after-else.c.expected | 7 - src/glsl/glcpp/tests/120-undef-builtin.c | 3 - src/glsl/glcpp/tests/120-undef-builtin.c.expected | 6 - src/glsl/glcpp/tests/121-comment-bug-72686.c | 2 - .../glcpp/tests/121-comment-bug-72686.c.expected | 2 - src/glsl/glcpp/tests/122-redefine-whitespace.c | 16 - .../glcpp/tests/122-redefine-whitespace.c.expected | 22 - src/glsl/glcpp/tests/123-garbage-after-else-1.c | 3 - .../tests/123-garbage-after-else-1.c.expected | 4 - src/glsl/glcpp/tests/124-preprocessing-numbers.c | 37 - .../tests/124-preprocessing-numbers.c.expected | 37 - .../glcpp/tests/125-es-short-circuit-undefined.c | 27 - .../125-es-short-circuit-undefined.c.expected | 29 - src/glsl/glcpp/tests/126-garbage-after-directive.c | 5 - .../tests/126-garbage-after-directive.c.expected | 7 - src/glsl/glcpp/tests/127-pragma-empty.c | 3 - src/glsl/glcpp/tests/127-pragma-empty.c.expected | 3 - src/glsl/glcpp/tests/128-space-before-hash.c | 21 - .../glcpp/tests/128-space-before-hash.c.expected | 21 - src/glsl/glcpp/tests/129-define-non-identifier.c | 1 - .../tests/129-define-non-identifier.c.expected | 2 - src/glsl/glcpp/tests/130-define-comment.c | 2 - src/glsl/glcpp/tests/130-define-comment.c.expected | 2 - src/glsl/glcpp/tests/131-eof-without-newline.c | 1 - .../glcpp/tests/131-eof-without-newline.c.expected | 1 - .../glcpp/tests/132-eof-without-newline-define.c | 1 - .../132-eof-without-newline-define.c.expected | 1 - .../glcpp/tests/133-eof-without-newline-comment.c | 1 - .../133-eof-without-newline-comment.c.expected | 2 - src/glsl/glcpp/tests/134-hash-comment-directive.c | 22 - .../tests/134-hash-comment-directive.c.expected | 22 - src/glsl/glcpp/tests/135-duplicate-parameter.c | 2 - .../glcpp/tests/135-duplicate-parameter.c.expected | 4 - .../glcpp/tests/136-plus-plus-and-minus-minus.c | 8 - .../tests/136-plus-plus-and-minus-minus.c.expected | 8 - .../glcpp/tests/137-expand-macro-after-period.c | 4 - .../tests/137-expand-macro-after-period.c.expected | 4 - .../glcpp/tests/138-multi-line-comment-in-if-0.c | 7 - .../138-multi-line-comment-in-if-0.c.expected | 7 - .../glcpp/tests/139-define-without-macro-name.c | 5 - .../tests/139-define-without-macro-name.c.expected | 5 - src/glsl/glcpp/tests/140-null-directive.c | 9 - src/glsl/glcpp/tests/140-null-directive.c.expected | 9 - src/glsl/glcpp/tests/141-pragma-and-__LINE__.c | 6 - .../glcpp/tests/141-pragma-and-__LINE__.c.expected | 6 - src/glsl/glcpp/tests/142-defined-within-macro.c | 94 - .../tests/142-defined-within-macro.c.expected | 94 - src/glsl/glcpp/tests/143-multiple-else.c | 6 - src/glsl/glcpp/tests/143-multiple-else.c.expected | 7 - src/glsl/glcpp/tests/glcpp-test | 110 - src/glsl/glcpp/tests/glcpp-test-cr-lf | 141 - src/glsl/glsl_lexer.ll | 635 -- src/glsl/glsl_parser.yy | 2855 -------- src/glsl/glsl_parser_extras.cpp | 1952 ----- src/glsl/glsl_parser_extras.h | 752 -- src/glsl/glsl_symbol_table.cpp | 280 - src/glsl/glsl_symbol_table.h | 110 - src/glsl/hir_field_selection.cpp | 81 - src/glsl/ir.cpp | 2039 ------ src/glsl/ir.h | 2632 ------- src/glsl/ir_basic_block.cpp | 99 - src/glsl/ir_basic_block.h | 28 - src/glsl/ir_builder.cpp | 612 -- src/glsl/ir_builder.h | 230 - src/glsl/ir_clone.cpp | 440 -- src/glsl/ir_constant_expression.cpp | 2092 ------ src/glsl/ir_equals.cpp | 211 - src/glsl/ir_expression_flattening.cpp | 86 - src/glsl/ir_expression_flattening.h | 38 - src/glsl/ir_function.cpp | 404 -- src/glsl/ir_function_can_inline.cpp | 75 - src/glsl/ir_function_detect_recursion.cpp | 358 - src/glsl/ir_function_inlining.h | 30 - src/glsl/ir_hierarchical_visitor.cpp | 383 - src/glsl/ir_hierarchical_visitor.h | 209 - src/glsl/ir_hv_accept.cpp | 439 -- src/glsl/ir_import_prototypes.cpp | 125 - src/glsl/ir_optimization.h | 147 - src/glsl/ir_print_visitor.cpp | 604 -- src/glsl/ir_print_visitor.h | 96 - src/glsl/ir_reader.cpp | 1167 --- src/glsl/ir_reader.h | 34 - src/glsl/ir_rvalue_visitor.cpp | 316 - src/glsl/ir_rvalue_visitor.h | 83 - src/glsl/ir_set_program_inouts.cpp | 453 -- src/glsl/ir_uniform.h | 216 - src/glsl/ir_validate.cpp | 930 --- src/glsl/ir_variable_refcount.cpp | 153 - src/glsl/ir_variable_refcount.h | 80 - src/glsl/ir_visitor.h | 93 - src/glsl/link_atomics.cpp | 346 - src/glsl/link_functions.cpp | 348 - src/glsl/link_interface_blocks.cpp | 357 - src/glsl/link_uniform_block_active_visitor.cpp | 296 - src/glsl/link_uniform_block_active_visitor.h | 74 - src/glsl/link_uniform_blocks.cpp | 472 -- src/glsl/link_uniform_initializers.cpp | 355 - src/glsl/link_uniforms.cpp | 1330 ---- src/glsl/link_varyings.cpp | 1888 ----- src/glsl/link_varyings.h | 299 - src/glsl/linker.cpp | 4676 ------------ src/glsl/linker.h | 205 - src/glsl/list.h | 700 -- src/glsl/loop_analysis.cpp | 640 -- src/glsl/loop_analysis.h | 259 - src/glsl/loop_controls.cpp | 246 - src/glsl/loop_unroll.cpp | 432 -- src/glsl/lower_buffer_access.cpp | 490 -- src/glsl/lower_buffer_access.h | 65 - src/glsl/lower_clip_distance.cpp | 574 -- src/glsl/lower_const_arrays_to_uniforms.cpp | 109 - src/glsl/lower_discard.cpp | 201 - src/glsl/lower_discard_flow.cpp | 155 - src/glsl/lower_if_to_cond_assign.cpp | 252 - src/glsl/lower_instructions.cpp | 1061 --- src/glsl/lower_jumps.cpp | 1022 --- src/glsl/lower_mat_op_to_vec.cpp | 438 -- src/glsl/lower_named_interface_blocks.cpp | 280 - src/glsl/lower_noise.cpp | 71 - src/glsl/lower_offset_array.cpp | 91 - src/glsl/lower_output_reads.cpp | 178 - src/glsl/lower_packed_varyings.cpp | 749 -- src/glsl/lower_packing_builtins.cpp | 1412 ---- src/glsl/lower_shared_reference.cpp | 496 -- src/glsl/lower_subroutine.cpp | 123 - src/glsl/lower_tess_level.cpp | 459 -- src/glsl/lower_texture_projection.cpp | 103 - src/glsl/lower_ubo_reference.cpp | 1042 --- src/glsl/lower_variable_index_to_cond_assign.cpp | 585 -- src/glsl/lower_vec_index_to_cond_assign.cpp | 239 - src/glsl/lower_vec_index_to_swizzle.cpp | 171 - src/glsl/lower_vector.cpp | 228 - src/glsl/lower_vector_derefs.cpp | 104 - src/glsl/lower_vector_insert.cpp | 146 - src/glsl/lower_vertex_id.cpp | 144 - src/glsl/main.cpp | 431 -- src/glsl/opt_algebraic.cpp | 984 --- src/glsl/opt_array_splitting.cpp | 408 -- src/glsl/opt_conditional_discard.cpp | 81 - src/glsl/opt_constant_folding.cpp | 190 - src/glsl/opt_constant_propagation.cpp | 524 -- src/glsl/opt_constant_variable.cpp | 218 - src/glsl/opt_copy_propagation.cpp | 352 - src/glsl/opt_copy_propagation_elements.cpp | 509 -- src/glsl/opt_dead_builtin_variables.cpp | 103 - src/glsl/opt_dead_builtin_varyings.cpp | 606 -- src/glsl/opt_dead_code.cpp | 197 - src/glsl/opt_dead_code_local.cpp | 336 - src/glsl/opt_dead_functions.cpp | 152 - src/glsl/opt_flatten_nested_if_blocks.cpp | 103 - src/glsl/opt_flip_matrices.cpp | 123 - src/glsl/opt_function_inlining.cpp | 360 - src/glsl/opt_if_simplification.cpp | 126 - src/glsl/opt_minmax.cpp | 488 -- src/glsl/opt_noop_swizzle.cpp | 83 - src/glsl/opt_rebalance_tree.cpp | 321 - src/glsl/opt_redundant_jumps.cpp | 124 - src/glsl/opt_structure_splitting.cpp | 367 - src/glsl/opt_swizzle_swizzle.cpp | 97 - src/glsl/opt_tree_grafting.cpp | 404 -- src/glsl/opt_vectorize.cpp | 407 -- src/glsl/program.h | 55 - src/glsl/s_expression.cpp | 218 - src/glsl/s_expression.h | 180 - src/glsl/standalone_scaffolding.cpp | 221 - src/glsl/standalone_scaffolding.h | 90 - src/glsl/test.cpp | 78 - src/glsl/test_optpass.cpp | 276 - src/glsl/test_optpass.h | 30 - src/glsl/tests/.gitignore | 5 - src/glsl/tests/blob_test.c | 320 - src/glsl/tests/builtin_variable_test.cpp | 393 - src/glsl/tests/compare_ir | 59 - src/glsl/tests/copy_constant_to_storage_tests.cpp | 300 - src/glsl/tests/general_ir_test.cpp | 80 - src/glsl/tests/invalidate_locations_test.cpp | 196 - src/glsl/tests/lower_jumps/.gitignore | 3 - src/glsl/tests/lower_jumps/create_test_cases.py | 643 -- src/glsl/tests/optimization-test | 42 - src/glsl/tests/sampler_types_test.cpp | 100 - src/glsl/tests/set_uniform_initializer_tests.cpp | 594 -- src/glsl/tests/sexps.py | 103 - src/glsl/tests/uniform_initializer_utils.cpp | 255 - src/glsl/tests/uniform_initializer_utils.h | 48 - src/glsl/tests/varyings_test.cpp | 349 - src/mesa/Makefile.am | 4 +- src/mesa/Makefile.sources | 3 - src/mesa/drivers/dri/i965/brw_cs.c | 2 +- .../drivers/dri/i965/brw_cubemap_normalize.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs.h | 2 +- .../dri/i965/brw_fs_channel_expressions.cpp | 4 +- src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 2 +- .../drivers/dri/i965/brw_fs_vector_splitting.cpp | 4 +- src/mesa/drivers/dri/i965/brw_gs.c | 2 +- src/mesa/drivers/dri/i965/brw_link.cpp | 2 +- .../dri/i965/brw_lower_texture_gradients.cpp | 4 +- .../dri/i965/brw_lower_unnormalized_offset.cpp | 4 +- src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp | 2 +- src/mesa/drivers/dri/i965/brw_program.c | 2 +- src/mesa/drivers/dri/i965/brw_vec4.h | 2 +- src/mesa/drivers/dri/i965/gen7_cs_state.c | 2 +- src/mesa/drivers/dri/i965/intel_resolve_map.h | 2 +- src/mesa/main/context.c | 2 +- src/mesa/main/ff_fragment_shader.cpp | 8 +- src/mesa/main/pipelineobj.c | 4 +- src/mesa/main/program_resource.c | 2 +- src/mesa/main/shader_query.cpp | 6 +- src/mesa/main/shaderapi.c | 8 +- src/mesa/main/uniform_query.cpp | 8 +- src/mesa/main/uniforms.c | 2 +- src/mesa/main/uniforms.h | 2 +- src/mesa/program/ir_to_mesa.cpp | 18 +- src/mesa/program/prog_to_nir.c | 2 +- src/mesa/program/sampler.cpp | 8 +- src/mesa/state_tracker/st_draw.c | 2 +- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 4 +- 948 files changed, 85721 insertions(+), 85307 deletions(-) create mode 100644 src/compiler/glsl/.gitignore create mode 100644 src/compiler/glsl/Android.gen.mk create mode 100644 src/compiler/glsl/Android.mk create mode 100644 src/compiler/glsl/Makefile.am create mode 100644 src/compiler/glsl/Makefile.sources create mode 100644 src/compiler/glsl/README create mode 100644 src/compiler/glsl/SConscript create mode 100644 src/compiler/glsl/TODO create mode 100644 src/compiler/glsl/ast.h create mode 100644 src/compiler/glsl/ast_array_index.cpp create mode 100644 src/compiler/glsl/ast_expr.cpp create mode 100644 src/compiler/glsl/ast_function.cpp create mode 100644 src/compiler/glsl/ast_to_hir.cpp create mode 100644 src/compiler/glsl/ast_type.cpp create mode 100644 src/compiler/glsl/blob.c create mode 100644 src/compiler/glsl/blob.h create mode 100644 src/compiler/glsl/builtin_functions.cpp create mode 100644 src/compiler/glsl/builtin_types.cpp create mode 100644 src/compiler/glsl/builtin_variables.cpp create mode 100644 src/compiler/glsl/glcpp/.gitignore create mode 100644 src/compiler/glsl/glcpp/README create mode 100644 src/compiler/glsl/glcpp/glcpp-lex.l create mode 100644 src/compiler/glsl/glcpp/glcpp-parse.y create mode 100644 src/compiler/glsl/glcpp/glcpp.c create mode 100644 src/compiler/glsl/glcpp/glcpp.h create mode 100644 src/compiler/glsl/glcpp/pp.c create mode 100644 src/compiler/glsl/glcpp/tests/.gitignore create mode 100644 src/compiler/glsl/glcpp/tests/000-content-with-spaces.c create mode 100644 src/compiler/glsl/glcpp/tests/000-content-with-spaces.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/001-define.c create mode 100644 src/compiler/glsl/glcpp/tests/001-define.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/002-define-chain.c create mode 100644 src/compiler/glsl/glcpp/tests/002-define-chain.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c create mode 100644 src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/004-define-recursive.c create mode 100644 src/compiler/glsl/glcpp/tests/004-define-recursive.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/005-define-composite-chain.c create mode 100644 src/compiler/glsl/glcpp/tests/005-define-composite-chain.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c create mode 100644 src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c create mode 100644 src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/008-define-empty.c create mode 100644 src/compiler/glsl/glcpp/tests/008-define-empty.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/009-undef.c create mode 100644 src/compiler/glsl/glcpp/tests/009-undef.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/010-undef-re-define.c create mode 100644 src/compiler/glsl/glcpp/tests/010-undef-re-define.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/011-define-func-empty.c create mode 100644 src/compiler/glsl/glcpp/tests/011-define-func-empty.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/012-define-func-no-args.c create mode 100644 src/compiler/glsl/glcpp/tests/012-define-func-no-args.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c create mode 100644 src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c create mode 100644 src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c create mode 100644 src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c create mode 100644 src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/017-define-func-2-args.c create mode 100644 src/compiler/glsl/glcpp/tests/017-define-func-2-args.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c create mode 100644 src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c create mode 100644 src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c create mode 100644 src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/021-define-func-compose.c create mode 100644 src/compiler/glsl/glcpp/tests/021-define-func-compose.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c create mode 100644 src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c create mode 100644 src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c create mode 100644 src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c create mode 100644 src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c create mode 100644 src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c create mode 100644 src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c create mode 100644 src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c create mode 100644 src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c create mode 100644 src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c create mode 100644 src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c create mode 100644 src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c create mode 100644 src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c create mode 100644 src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c create mode 100644 src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c create mode 100644 src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c create mode 100644 src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c create mode 100644 src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c create mode 100644 src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/040-token-pasting.c create mode 100644 src/compiler/glsl/glcpp/tests/040-token-pasting.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/041-if-0.c create mode 100644 src/compiler/glsl/glcpp/tests/041-if-0.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/042-if-1.c create mode 100644 src/compiler/glsl/glcpp/tests/042-if-1.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/043-if-0-else.c create mode 100644 src/compiler/glsl/glcpp/tests/043-if-0-else.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/044-if-1-else.c create mode 100644 src/compiler/glsl/glcpp/tests/044-if-1-else.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/045-if-0-elif.c create mode 100644 src/compiler/glsl/glcpp/tests/045-if-0-elif.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/046-if-1-elsif.c create mode 100644 src/compiler/glsl/glcpp/tests/046-if-1-elsif.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/047-if-elif-else.c create mode 100644 src/compiler/glsl/glcpp/tests/047-if-elif-else.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/048-if-nested.c create mode 100644 src/compiler/glsl/glcpp/tests/048-if-nested.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c create mode 100644 src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/050-if-defined.c create mode 100644 src/compiler/glsl/glcpp/tests/050-if-defined.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/051-if-relational.c create mode 100644 src/compiler/glsl/glcpp/tests/051-if-relational.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/052-if-bitwise.c create mode 100644 src/compiler/glsl/glcpp/tests/052-if-bitwise.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c create mode 100644 src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/054-if-with-macros.c create mode 100644 src/compiler/glsl/glcpp/tests/054-if-with-macros.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c create mode 100644 src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c create mode 100644 src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/057-empty-arguments.c create mode 100644 src/compiler/glsl/glcpp/tests/057-empty-arguments.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c create mode 100644 src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c create mode 100644 src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c create mode 100644 src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c create mode 100644 src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c create mode 100644 src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/063-comments.c create mode 100644 src/compiler/glsl/glcpp/tests/063-comments.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/064-version.c create mode 100644 src/compiler/glsl/glcpp/tests/064-version.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/065-if-defined-parens.c create mode 100644 src/compiler/glsl/glcpp/tests/065-if-defined-parens.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c create mode 100644 src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c create mode 100644 src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/068-accidental-pasting.c create mode 100644 src/compiler/glsl/glcpp/tests/068-accidental-pasting.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/069-repeated-argument.c create mode 100644 src/compiler/glsl/glcpp/tests/069-repeated-argument.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c create mode 100644 src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/071-punctuator.c create mode 100644 src/compiler/glsl/glcpp/tests/071-punctuator.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c create mode 100644 src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c create mode 100644 src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/074-elif-undef.c create mode 100644 src/compiler/glsl/glcpp/tests/074-elif-undef.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c create mode 100644 src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c create mode 100644 src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/077-else-without-if.c create mode 100644 src/compiler/glsl/glcpp/tests/077-else-without-if.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/078-elif-without-if.c create mode 100644 src/compiler/glsl/glcpp/tests/078-elif-without-if.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/079-endif-without-if.c create mode 100644 src/compiler/glsl/glcpp/tests/079-endif-without-if.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/080-if-without-expression.c create mode 100644 src/compiler/glsl/glcpp/tests/080-if-without-expression.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/081-elif-without-expression.c create mode 100644 src/compiler/glsl/glcpp/tests/081-elif-without-expression.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/082-invalid-paste.c create mode 100644 src/compiler/glsl/glcpp/tests/082-invalid-paste.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/083-unterminated-if.c create mode 100644 src/compiler/glsl/glcpp/tests/083-unterminated-if.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c create mode 100644 src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c create mode 100644 src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c create mode 100644 src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/087-if-comments.c create mode 100644 src/compiler/glsl/glcpp/tests/087-if-comments.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c create mode 100644 src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c create mode 100644 src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/090-hash-error.c create mode 100644 src/compiler/glsl/glcpp/tests/090-hash-error.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/091-hash-line.c create mode 100644 src/compiler/glsl/glcpp/tests/091-hash-line.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c create mode 100644 src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/093-divide-by-zero.c create mode 100644 src/compiler/glsl/glcpp/tests/093-divide-by-zero.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c create mode 100644 src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/095-recursive-define.c create mode 100644 src/compiler/glsl/glcpp/tests/095-recursive-define.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/096-paste-twice.c create mode 100644 src/compiler/glsl/glcpp/tests/096-paste-twice.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c create mode 100644 src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/098-elif-undefined.c create mode 100644 src/compiler/glsl/glcpp/tests/098-elif-undefined.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/099-c99-example.c create mode 100644 src/compiler/glsl/glcpp/tests/099-c99-example.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/100-macro-with-colon.c create mode 100644 src/compiler/glsl/glcpp/tests/100-macro-with-colon.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/101-macros-used-twice.c create mode 100644 src/compiler/glsl/glcpp/tests/101-macros-used-twice.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c create mode 100644 src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c create mode 100644 src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c create mode 100644 src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c create mode 100644 src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c create mode 100644 src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c create mode 100644 src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c create mode 100644 src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c create mode 100644 src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c create mode 100644 src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c create mode 100644 src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c create mode 100644 src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c create mode 100644 src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c create mode 100644 src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/115-line-continuations.c create mode 100644 src/compiler/glsl/glcpp/tests/115-line-continuations.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c create mode 100644 src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c create mode 100644 src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c create mode 100644 src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/119-elif-after-else.c create mode 100644 src/compiler/glsl/glcpp/tests/119-elif-after-else.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/120-undef-builtin.c create mode 100644 src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c create mode 100644 src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c create mode 100644 src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c create mode 100644 src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c create mode 100644 src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c create mode 100644 src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c create mode 100644 src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/127-pragma-empty.c create mode 100644 src/compiler/glsl/glcpp/tests/127-pragma-empty.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/128-space-before-hash.c create mode 100644 src/compiler/glsl/glcpp/tests/128-space-before-hash.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/129-define-non-identifier.c create mode 100644 src/compiler/glsl/glcpp/tests/129-define-non-identifier.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/130-define-comment.c create mode 100644 src/compiler/glsl/glcpp/tests/130-define-comment.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/131-eof-without-newline.c create mode 100644 src/compiler/glsl/glcpp/tests/131-eof-without-newline.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c create mode 100644 src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c create mode 100644 src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c create mode 100644 src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c create mode 100644 src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c create mode 100644 src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c create mode 100644 src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c create mode 100644 src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c create mode 100644 src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/140-null-directive.c create mode 100644 src/compiler/glsl/glcpp/tests/140-null-directive.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c create mode 100644 src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/142-defined-within-macro.c create mode 100644 src/compiler/glsl/glcpp/tests/142-defined-within-macro.c.expected create mode 100644 src/compiler/glsl/glcpp/tests/143-multiple-else.c create mode 100644 src/compiler/glsl/glcpp/tests/143-multiple-else.c.expected create mode 100755 src/compiler/glsl/glcpp/tests/glcpp-test create mode 100755 src/compiler/glsl/glcpp/tests/glcpp-test-cr-lf create mode 100644 src/compiler/glsl/glsl_lexer.ll create mode 100644 src/compiler/glsl/glsl_parser.yy create mode 100644 src/compiler/glsl/glsl_parser_extras.cpp create mode 100644 src/compiler/glsl/glsl_parser_extras.h create mode 100644 src/compiler/glsl/glsl_symbol_table.cpp create mode 100644 src/compiler/glsl/glsl_symbol_table.h create mode 100644 src/compiler/glsl/hir_field_selection.cpp create mode 100644 src/compiler/glsl/ir.cpp create mode 100644 src/compiler/glsl/ir.h create mode 100644 src/compiler/glsl/ir_basic_block.cpp create mode 100644 src/compiler/glsl/ir_basic_block.h create mode 100644 src/compiler/glsl/ir_builder.cpp create mode 100644 src/compiler/glsl/ir_builder.h create mode 100644 src/compiler/glsl/ir_clone.cpp create mode 100644 src/compiler/glsl/ir_constant_expression.cpp create mode 100644 src/compiler/glsl/ir_equals.cpp create mode 100644 src/compiler/glsl/ir_expression_flattening.cpp create mode 100644 src/compiler/glsl/ir_expression_flattening.h create mode 100644 src/compiler/glsl/ir_function.cpp create mode 100644 src/compiler/glsl/ir_function_can_inline.cpp create mode 100644 src/compiler/glsl/ir_function_detect_recursion.cpp create mode 100644 src/compiler/glsl/ir_function_inlining.h create mode 100644 src/compiler/glsl/ir_hierarchical_visitor.cpp create mode 100644 src/compiler/glsl/ir_hierarchical_visitor.h create mode 100644 src/compiler/glsl/ir_hv_accept.cpp create mode 100644 src/compiler/glsl/ir_import_prototypes.cpp create mode 100644 src/compiler/glsl/ir_optimization.h create mode 100644 src/compiler/glsl/ir_print_visitor.cpp create mode 100644 src/compiler/glsl/ir_print_visitor.h create mode 100644 src/compiler/glsl/ir_reader.cpp create mode 100644 src/compiler/glsl/ir_reader.h create mode 100644 src/compiler/glsl/ir_rvalue_visitor.cpp create mode 100644 src/compiler/glsl/ir_rvalue_visitor.h create mode 100644 src/compiler/glsl/ir_set_program_inouts.cpp create mode 100644 src/compiler/glsl/ir_uniform.h create mode 100644 src/compiler/glsl/ir_validate.cpp create mode 100644 src/compiler/glsl/ir_variable_refcount.cpp create mode 100644 src/compiler/glsl/ir_variable_refcount.h create mode 100644 src/compiler/glsl/ir_visitor.h create mode 100644 src/compiler/glsl/link_atomics.cpp create mode 100644 src/compiler/glsl/link_functions.cpp create mode 100644 src/compiler/glsl/link_interface_blocks.cpp create mode 100644 src/compiler/glsl/link_uniform_block_active_visitor.cpp create mode 100644 src/compiler/glsl/link_uniform_block_active_visitor.h create mode 100644 src/compiler/glsl/link_uniform_blocks.cpp create mode 100644 src/compiler/glsl/link_uniform_initializers.cpp create mode 100644 src/compiler/glsl/link_uniforms.cpp create mode 100644 src/compiler/glsl/link_varyings.cpp create mode 100644 src/compiler/glsl/link_varyings.h create mode 100644 src/compiler/glsl/linker.cpp create mode 100644 src/compiler/glsl/linker.h create mode 100644 src/compiler/glsl/list.h create mode 100644 src/compiler/glsl/loop_analysis.cpp create mode 100644 src/compiler/glsl/loop_analysis.h create mode 100644 src/compiler/glsl/loop_controls.cpp create mode 100644 src/compiler/glsl/loop_unroll.cpp create mode 100644 src/compiler/glsl/lower_buffer_access.cpp create mode 100644 src/compiler/glsl/lower_buffer_access.h create mode 100644 src/compiler/glsl/lower_clip_distance.cpp create mode 100644 src/compiler/glsl/lower_const_arrays_to_uniforms.cpp create mode 100644 src/compiler/glsl/lower_discard.cpp create mode 100644 src/compiler/glsl/lower_discard_flow.cpp create mode 100644 src/compiler/glsl/lower_if_to_cond_assign.cpp create mode 100644 src/compiler/glsl/lower_instructions.cpp create mode 100644 src/compiler/glsl/lower_jumps.cpp create mode 100644 src/compiler/glsl/lower_mat_op_to_vec.cpp create mode 100644 src/compiler/glsl/lower_named_interface_blocks.cpp create mode 100644 src/compiler/glsl/lower_noise.cpp create mode 100644 src/compiler/glsl/lower_offset_array.cpp create mode 100644 src/compiler/glsl/lower_output_reads.cpp create mode 100644 src/compiler/glsl/lower_packed_varyings.cpp create mode 100644 src/compiler/glsl/lower_packing_builtins.cpp create mode 100644 src/compiler/glsl/lower_shared_reference.cpp create mode 100644 src/compiler/glsl/lower_subroutine.cpp create mode 100644 src/compiler/glsl/lower_tess_level.cpp create mode 100644 src/compiler/glsl/lower_texture_projection.cpp create mode 100644 src/compiler/glsl/lower_ubo_reference.cpp create mode 100644 src/compiler/glsl/lower_variable_index_to_cond_assign.cpp create mode 100644 src/compiler/glsl/lower_vec_index_to_cond_assign.cpp create mode 100644 src/compiler/glsl/lower_vec_index_to_swizzle.cpp create mode 100644 src/compiler/glsl/lower_vector.cpp create mode 100644 src/compiler/glsl/lower_vector_derefs.cpp create mode 100644 src/compiler/glsl/lower_vector_insert.cpp create mode 100644 src/compiler/glsl/lower_vertex_id.cpp create mode 100644 src/compiler/glsl/main.cpp create mode 100644 src/compiler/glsl/opt_algebraic.cpp create mode 100644 src/compiler/glsl/opt_array_splitting.cpp create mode 100644 src/compiler/glsl/opt_conditional_discard.cpp create mode 100644 src/compiler/glsl/opt_constant_folding.cpp create mode 100644 src/compiler/glsl/opt_constant_propagation.cpp create mode 100644 src/compiler/glsl/opt_constant_variable.cpp create mode 100644 src/compiler/glsl/opt_copy_propagation.cpp create mode 100644 src/compiler/glsl/opt_copy_propagation_elements.cpp create mode 100644 src/compiler/glsl/opt_dead_builtin_variables.cpp create mode 100644 src/compiler/glsl/opt_dead_builtin_varyings.cpp create mode 100644 src/compiler/glsl/opt_dead_code.cpp create mode 100644 src/compiler/glsl/opt_dead_code_local.cpp create mode 100644 src/compiler/glsl/opt_dead_functions.cpp create mode 100644 src/compiler/glsl/opt_flatten_nested_if_blocks.cpp create mode 100644 src/compiler/glsl/opt_flip_matrices.cpp create mode 100644 src/compiler/glsl/opt_function_inlining.cpp create mode 100644 src/compiler/glsl/opt_if_simplification.cpp create mode 100644 src/compiler/glsl/opt_minmax.cpp create mode 100644 src/compiler/glsl/opt_noop_swizzle.cpp create mode 100644 src/compiler/glsl/opt_rebalance_tree.cpp create mode 100644 src/compiler/glsl/opt_redundant_jumps.cpp create mode 100644 src/compiler/glsl/opt_structure_splitting.cpp create mode 100644 src/compiler/glsl/opt_swizzle_swizzle.cpp create mode 100644 src/compiler/glsl/opt_tree_grafting.cpp create mode 100644 src/compiler/glsl/opt_vectorize.cpp create mode 100644 src/compiler/glsl/program.h create mode 100644 src/compiler/glsl/s_expression.cpp create mode 100644 src/compiler/glsl/s_expression.h create mode 100644 src/compiler/glsl/standalone_scaffolding.cpp create mode 100644 src/compiler/glsl/standalone_scaffolding.h create mode 100644 src/compiler/glsl/test.cpp create mode 100644 src/compiler/glsl/test_optpass.cpp create mode 100644 src/compiler/glsl/test_optpass.h create mode 100644 src/compiler/glsl/tests/.gitignore create mode 100644 src/compiler/glsl/tests/blob_test.c create mode 100644 src/compiler/glsl/tests/builtin_variable_test.cpp create mode 100755 src/compiler/glsl/tests/compare_ir create mode 100644 src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp create mode 100644 src/compiler/glsl/tests/general_ir_test.cpp create mode 100644 src/compiler/glsl/tests/invalidate_locations_test.cpp create mode 100644 src/compiler/glsl/tests/lower_jumps/.gitignore create mode 100644 src/compiler/glsl/tests/lower_jumps/create_test_cases.py create mode 100755 src/compiler/glsl/tests/optimization-test create mode 100644 src/compiler/glsl/tests/sampler_types_test.cpp create mode 100644 src/compiler/glsl/tests/set_uniform_initializer_tests.cpp create mode 100644 src/compiler/glsl/tests/sexps.py create mode 100644 src/compiler/glsl/tests/uniform_initializer_utils.cpp create mode 100644 src/compiler/glsl/tests/uniform_initializer_utils.h create mode 100644 src/compiler/glsl/tests/varyings_test.cpp create mode 100644 src/compiler/nir/Makefile.sources delete mode 100644 src/glsl/.gitignore delete mode 100644 src/glsl/Android.gen.mk delete mode 100644 src/glsl/Android.mk delete mode 100644 src/glsl/Makefile.am delete mode 100644 src/glsl/Makefile.sources delete mode 100644 src/glsl/README delete mode 100644 src/glsl/SConscript delete mode 100644 src/glsl/TODO delete mode 100644 src/glsl/ast.h delete mode 100644 src/glsl/ast_array_index.cpp delete mode 100644 src/glsl/ast_expr.cpp delete mode 100644 src/glsl/ast_function.cpp delete mode 100644 src/glsl/ast_to_hir.cpp delete mode 100644 src/glsl/ast_type.cpp delete mode 100644 src/glsl/blob.c delete mode 100644 src/glsl/blob.h delete mode 100644 src/glsl/builtin_functions.cpp delete mode 100644 src/glsl/builtin_types.cpp delete mode 100644 src/glsl/builtin_variables.cpp delete mode 100644 src/glsl/glcpp/.gitignore delete mode 100644 src/glsl/glcpp/README delete mode 100644 src/glsl/glcpp/glcpp-lex.l delete mode 100644 src/glsl/glcpp/glcpp-parse.y delete mode 100644 src/glsl/glcpp/glcpp.c delete mode 100644 src/glsl/glcpp/glcpp.h delete mode 100644 src/glsl/glcpp/pp.c delete mode 100644 src/glsl/glcpp/tests/.gitignore delete mode 100644 src/glsl/glcpp/tests/000-content-with-spaces.c delete mode 100644 src/glsl/glcpp/tests/000-content-with-spaces.c.expected delete mode 100644 src/glsl/glcpp/tests/001-define.c delete mode 100644 src/glsl/glcpp/tests/001-define.c.expected delete mode 100644 src/glsl/glcpp/tests/002-define-chain.c delete mode 100644 src/glsl/glcpp/tests/002-define-chain.c.expected delete mode 100644 src/glsl/glcpp/tests/003-define-chain-reverse.c delete mode 100644 src/glsl/glcpp/tests/003-define-chain-reverse.c.expected delete mode 100644 src/glsl/glcpp/tests/004-define-recursive.c delete mode 100644 src/glsl/glcpp/tests/004-define-recursive.c.expected delete mode 100644 src/glsl/glcpp/tests/005-define-composite-chain.c delete mode 100644 src/glsl/glcpp/tests/005-define-composite-chain.c.expected delete mode 100644 src/glsl/glcpp/tests/006-define-composite-chain-reverse.c delete mode 100644 src/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected delete mode 100644 src/glsl/glcpp/tests/007-define-composite-recursive.c delete mode 100644 src/glsl/glcpp/tests/007-define-composite-recursive.c.expected delete mode 100644 src/glsl/glcpp/tests/008-define-empty.c delete mode 100644 src/glsl/glcpp/tests/008-define-empty.c.expected delete mode 100644 src/glsl/glcpp/tests/009-undef.c delete mode 100644 src/glsl/glcpp/tests/009-undef.c.expected delete mode 100644 src/glsl/glcpp/tests/010-undef-re-define.c delete mode 100644 src/glsl/glcpp/tests/010-undef-re-define.c.expected delete mode 100644 src/glsl/glcpp/tests/011-define-func-empty.c delete mode 100644 src/glsl/glcpp/tests/011-define-func-empty.c.expected delete mode 100644 src/glsl/glcpp/tests/012-define-func-no-args.c delete mode 100644 src/glsl/glcpp/tests/012-define-func-no-args.c.expected delete mode 100644 src/glsl/glcpp/tests/013-define-func-1-arg-unused.c delete mode 100644 src/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected delete mode 100644 src/glsl/glcpp/tests/014-define-func-2-arg-unused.c delete mode 100644 src/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected delete mode 100644 src/glsl/glcpp/tests/015-define-object-with-parens.c delete mode 100644 src/glsl/glcpp/tests/015-define-object-with-parens.c.expected delete mode 100644 src/glsl/glcpp/tests/016-define-func-1-arg.c delete mode 100644 src/glsl/glcpp/tests/016-define-func-1-arg.c.expected delete mode 100644 src/glsl/glcpp/tests/017-define-func-2-args.c delete mode 100644 src/glsl/glcpp/tests/017-define-func-2-args.c.expected delete mode 100644 src/glsl/glcpp/tests/018-define-func-macro-as-parameter.c delete mode 100644 src/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected delete mode 100644 src/glsl/glcpp/tests/019-define-func-1-arg-multi.c delete mode 100644 src/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected delete mode 100644 src/glsl/glcpp/tests/020-define-func-2-arg-multi.c delete mode 100644 src/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected delete mode 100644 src/glsl/glcpp/tests/021-define-func-compose.c delete mode 100644 src/glsl/glcpp/tests/021-define-func-compose.c.expected delete mode 100644 src/glsl/glcpp/tests/022-define-func-arg-with-parens.c delete mode 100644 src/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected delete mode 100644 src/glsl/glcpp/tests/023-define-extra-whitespace.c delete mode 100644 src/glsl/glcpp/tests/023-define-extra-whitespace.c.expected delete mode 100644 src/glsl/glcpp/tests/024-define-chain-to-self-recursion.c delete mode 100644 src/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected delete mode 100644 src/glsl/glcpp/tests/025-func-macro-as-non-macro.c delete mode 100644 src/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected delete mode 100644 src/glsl/glcpp/tests/026-define-func-extra-newlines.c delete mode 100644 src/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected delete mode 100644 src/glsl/glcpp/tests/027-define-chain-obj-to-func.c delete mode 100644 src/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected delete mode 100644 src/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c delete mode 100644 src/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected delete mode 100644 src/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c delete mode 100644 src/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected delete mode 100644 src/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c delete mode 100644 src/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected delete mode 100644 src/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c delete mode 100644 src/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected delete mode 100644 src/glsl/glcpp/tests/032-define-func-self-recurse.c delete mode 100644 src/glsl/glcpp/tests/032-define-func-self-recurse.c.expected delete mode 100644 src/glsl/glcpp/tests/033-define-func-self-compose.c delete mode 100644 src/glsl/glcpp/tests/033-define-func-self-compose.c.expected delete mode 100644 src/glsl/glcpp/tests/034-define-func-self-compose-non-func.c delete mode 100644 src/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected delete mode 100644 src/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c delete mode 100644 src/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected delete mode 100644 src/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c delete mode 100644 src/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected delete mode 100644 src/glsl/glcpp/tests/037-finalize-unexpanded-macro.c delete mode 100644 src/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected delete mode 100644 src/glsl/glcpp/tests/038-func-arg-with-commas.c delete mode 100644 src/glsl/glcpp/tests/038-func-arg-with-commas.c.expected delete mode 100644 src/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c delete mode 100644 src/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected delete mode 100644 src/glsl/glcpp/tests/040-token-pasting.c delete mode 100644 src/glsl/glcpp/tests/040-token-pasting.c.expected delete mode 100644 src/glsl/glcpp/tests/041-if-0.c delete mode 100644 src/glsl/glcpp/tests/041-if-0.c.expected delete mode 100644 src/glsl/glcpp/tests/042-if-1.c delete mode 100644 src/glsl/glcpp/tests/042-if-1.c.expected delete mode 100644 src/glsl/glcpp/tests/043-if-0-else.c delete mode 100644 src/glsl/glcpp/tests/043-if-0-else.c.expected delete mode 100644 src/glsl/glcpp/tests/044-if-1-else.c delete mode 100644 src/glsl/glcpp/tests/044-if-1-else.c.expected delete mode 100644 src/glsl/glcpp/tests/045-if-0-elif.c delete mode 100644 src/glsl/glcpp/tests/045-if-0-elif.c.expected delete mode 100644 src/glsl/glcpp/tests/046-if-1-elsif.c delete mode 100644 src/glsl/glcpp/tests/046-if-1-elsif.c.expected delete mode 100644 src/glsl/glcpp/tests/047-if-elif-else.c delete mode 100644 src/glsl/glcpp/tests/047-if-elif-else.c.expected delete mode 100644 src/glsl/glcpp/tests/048-if-nested.c delete mode 100644 src/glsl/glcpp/tests/048-if-nested.c.expected delete mode 100644 src/glsl/glcpp/tests/049-if-expression-precedence.c delete mode 100644 src/glsl/glcpp/tests/049-if-expression-precedence.c.expected delete mode 100644 src/glsl/glcpp/tests/050-if-defined.c delete mode 100644 src/glsl/glcpp/tests/050-if-defined.c.expected delete mode 100644 src/glsl/glcpp/tests/051-if-relational.c delete mode 100644 src/glsl/glcpp/tests/051-if-relational.c.expected delete mode 100644 src/glsl/glcpp/tests/052-if-bitwise.c delete mode 100644 src/glsl/glcpp/tests/052-if-bitwise.c.expected delete mode 100644 src/glsl/glcpp/tests/053-if-divide-and-shift.c delete mode 100644 src/glsl/glcpp/tests/053-if-divide-and-shift.c.expected delete mode 100644 src/glsl/glcpp/tests/054-if-with-macros.c delete mode 100644 src/glsl/glcpp/tests/054-if-with-macros.c.expected delete mode 100644 src/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c delete mode 100644 src/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected delete mode 100644 src/glsl/glcpp/tests/056-macro-argument-with-comma.c delete mode 100644 src/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected delete mode 100644 src/glsl/glcpp/tests/057-empty-arguments.c delete mode 100644 src/glsl/glcpp/tests/057-empty-arguments.c.expected delete mode 100644 src/glsl/glcpp/tests/058-token-pasting-empty-arguments.c delete mode 100644 src/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected delete mode 100644 src/glsl/glcpp/tests/059-token-pasting-integer.c delete mode 100644 src/glsl/glcpp/tests/059-token-pasting-integer.c.expected delete mode 100644 src/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c delete mode 100644 src/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected delete mode 100644 src/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c delete mode 100644 src/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected delete mode 100644 src/glsl/glcpp/tests/062-if-0-skips-garbage.c delete mode 100644 src/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected delete mode 100644 src/glsl/glcpp/tests/063-comments.c delete mode 100644 src/glsl/glcpp/tests/063-comments.c.expected delete mode 100644 src/glsl/glcpp/tests/064-version.c delete mode 100644 src/glsl/glcpp/tests/064-version.c.expected delete mode 100644 src/glsl/glcpp/tests/065-if-defined-parens.c delete mode 100644 src/glsl/glcpp/tests/065-if-defined-parens.c.expected delete mode 100644 src/glsl/glcpp/tests/066-if-nospace-expression.c delete mode 100644 src/glsl/glcpp/tests/066-if-nospace-expression.c.expected delete mode 100644 src/glsl/glcpp/tests/067-nested-ifdef-ifndef.c delete mode 100644 src/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected delete mode 100644 src/glsl/glcpp/tests/068-accidental-pasting.c delete mode 100644 src/glsl/glcpp/tests/068-accidental-pasting.c.expected delete mode 100644 src/glsl/glcpp/tests/069-repeated-argument.c delete mode 100644 src/glsl/glcpp/tests/069-repeated-argument.c.expected delete mode 100644 src/glsl/glcpp/tests/070-undefined-macro-in-expression.c delete mode 100644 src/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected delete mode 100644 src/glsl/glcpp/tests/071-punctuator.c delete mode 100644 src/glsl/glcpp/tests/071-punctuator.c.expected delete mode 100644 src/glsl/glcpp/tests/072-token-pasting-same-line.c delete mode 100644 src/glsl/glcpp/tests/072-token-pasting-same-line.c.expected delete mode 100644 src/glsl/glcpp/tests/073-if-in-ifdef.c delete mode 100644 src/glsl/glcpp/tests/073-if-in-ifdef.c.expected delete mode 100644 src/glsl/glcpp/tests/074-elif-undef.c delete mode 100644 src/glsl/glcpp/tests/074-elif-undef.c.expected delete mode 100644 src/glsl/glcpp/tests/075-elif-elif-undef.c delete mode 100644 src/glsl/glcpp/tests/075-elif-elif-undef.c.expected delete mode 100644 src/glsl/glcpp/tests/076-elif-undef-nested.c delete mode 100644 src/glsl/glcpp/tests/076-elif-undef-nested.c.expected delete mode 100644 src/glsl/glcpp/tests/077-else-without-if.c delete mode 100644 src/glsl/glcpp/tests/077-else-without-if.c.expected delete mode 100644 src/glsl/glcpp/tests/078-elif-without-if.c delete mode 100644 src/glsl/glcpp/tests/078-elif-without-if.c.expected delete mode 100644 src/glsl/glcpp/tests/079-endif-without-if.c delete mode 100644 src/glsl/glcpp/tests/079-endif-without-if.c.expected delete mode 100644 src/glsl/glcpp/tests/080-if-without-expression.c delete mode 100644 src/glsl/glcpp/tests/080-if-without-expression.c.expected delete mode 100644 src/glsl/glcpp/tests/081-elif-without-expression.c delete mode 100644 src/glsl/glcpp/tests/081-elif-without-expression.c.expected delete mode 100644 src/glsl/glcpp/tests/082-invalid-paste.c delete mode 100644 src/glsl/glcpp/tests/082-invalid-paste.c.expected delete mode 100644 src/glsl/glcpp/tests/083-unterminated-if.c delete mode 100644 src/glsl/glcpp/tests/083-unterminated-if.c.expected delete mode 100644 src/glsl/glcpp/tests/084-unbalanced-parentheses.c delete mode 100644 src/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected delete mode 100644 src/glsl/glcpp/tests/085-incorrect-argument-count.c delete mode 100644 src/glsl/glcpp/tests/085-incorrect-argument-count.c.expected delete mode 100644 src/glsl/glcpp/tests/086-reserved-macro-names.c delete mode 100644 src/glsl/glcpp/tests/086-reserved-macro-names.c.expected delete mode 100644 src/glsl/glcpp/tests/087-if-comments.c delete mode 100644 src/glsl/glcpp/tests/087-if-comments.c.expected delete mode 100644 src/glsl/glcpp/tests/088-redefine-macro-legitimate.c delete mode 100644 src/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected delete mode 100644 src/glsl/glcpp/tests/089-redefine-macro-error.c delete mode 100644 src/glsl/glcpp/tests/089-redefine-macro-error.c.expected delete mode 100644 src/glsl/glcpp/tests/090-hash-error.c delete mode 100644 src/glsl/glcpp/tests/090-hash-error.c.expected delete mode 100644 src/glsl/glcpp/tests/091-hash-line.c delete mode 100644 src/glsl/glcpp/tests/091-hash-line.c.expected delete mode 100644 src/glsl/glcpp/tests/092-redefine-macro-error-2.c delete mode 100644 src/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected delete mode 100644 src/glsl/glcpp/tests/093-divide-by-zero.c delete mode 100644 src/glsl/glcpp/tests/093-divide-by-zero.c.expected delete mode 100644 src/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c delete mode 100644 src/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected delete mode 100644 src/glsl/glcpp/tests/095-recursive-define.c delete mode 100644 src/glsl/glcpp/tests/095-recursive-define.c.expected delete mode 100644 src/glsl/glcpp/tests/096-paste-twice.c delete mode 100644 src/glsl/glcpp/tests/096-paste-twice.c.expected delete mode 100644 src/glsl/glcpp/tests/097-paste-with-non-function-macro.c delete mode 100644 src/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected delete mode 100644 src/glsl/glcpp/tests/098-elif-undefined.c delete mode 100644 src/glsl/glcpp/tests/098-elif-undefined.c.expected delete mode 100644 src/glsl/glcpp/tests/099-c99-example.c delete mode 100644 src/glsl/glcpp/tests/099-c99-example.c.expected delete mode 100644 src/glsl/glcpp/tests/100-macro-with-colon.c delete mode 100644 src/glsl/glcpp/tests/100-macro-with-colon.c.expected delete mode 100644 src/glsl/glcpp/tests/101-macros-used-twice.c delete mode 100644 src/glsl/glcpp/tests/101-macros-used-twice.c.expected delete mode 100644 src/glsl/glcpp/tests/102-garbage-after-endif.c delete mode 100644 src/glsl/glcpp/tests/102-garbage-after-endif.c.expected delete mode 100644 src/glsl/glcpp/tests/103-garbage-after-else-0.c delete mode 100644 src/glsl/glcpp/tests/103-garbage-after-else-0.c.expected delete mode 100644 src/glsl/glcpp/tests/104-hash-line-followed-by-code.c delete mode 100644 src/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected delete mode 100644 src/glsl/glcpp/tests/105-multiline-hash-line.c delete mode 100644 src/glsl/glcpp/tests/105-multiline-hash-line.c.expected delete mode 100644 src/glsl/glcpp/tests/106-multiline-hash-if.c delete mode 100644 src/glsl/glcpp/tests/106-multiline-hash-if.c.expected delete mode 100644 src/glsl/glcpp/tests/107-multiline-hash-elif.c delete mode 100644 src/glsl/glcpp/tests/107-multiline-hash-elif.c.expected delete mode 100644 src/glsl/glcpp/tests/108-no-space-after-hash-version.c delete mode 100644 src/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected delete mode 100644 src/glsl/glcpp/tests/109-no-space-after-hash-line.c delete mode 100644 src/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected delete mode 100644 src/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c delete mode 100644 src/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected delete mode 100644 src/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c delete mode 100644 src/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected delete mode 100644 src/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c delete mode 100644 src/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected delete mode 100644 src/glsl/glcpp/tests/113-line-and-file-macros.c delete mode 100644 src/glsl/glcpp/tests/113-line-and-file-macros.c.expected delete mode 100644 src/glsl/glcpp/tests/114-paste-integer-tokens.c delete mode 100644 src/glsl/glcpp/tests/114-paste-integer-tokens.c.expected delete mode 100644 src/glsl/glcpp/tests/115-line-continuations.c delete mode 100644 src/glsl/glcpp/tests/115-line-continuations.c.expected delete mode 100644 src/glsl/glcpp/tests/116-disable-line-continuations.c delete mode 100644 src/glsl/glcpp/tests/116-disable-line-continuations.c.expected delete mode 100644 src/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c delete mode 100644 src/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected delete mode 100644 src/glsl/glcpp/tests/118-comment-becomes-space.c delete mode 100644 src/glsl/glcpp/tests/118-comment-becomes-space.c.expected delete mode 100644 src/glsl/glcpp/tests/119-elif-after-else.c delete mode 100644 src/glsl/glcpp/tests/119-elif-after-else.c.expected delete mode 100644 src/glsl/glcpp/tests/120-undef-builtin.c delete mode 100644 src/glsl/glcpp/tests/120-undef-builtin.c.expected delete mode 100644 src/glsl/glcpp/tests/121-comment-bug-72686.c delete mode 100644 src/glsl/glcpp/tests/121-comment-bug-72686.c.expected delete mode 100644 src/glsl/glcpp/tests/122-redefine-whitespace.c delete mode 100644 src/glsl/glcpp/tests/122-redefine-whitespace.c.expected delete mode 100644 src/glsl/glcpp/tests/123-garbage-after-else-1.c delete mode 100644 src/glsl/glcpp/tests/123-garbage-after-else-1.c.expected delete mode 100644 src/glsl/glcpp/tests/124-preprocessing-numbers.c delete mode 100644 src/glsl/glcpp/tests/124-preprocessing-numbers.c.expected delete mode 100644 src/glsl/glcpp/tests/125-es-short-circuit-undefined.c delete mode 100644 src/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected delete mode 100644 src/glsl/glcpp/tests/126-garbage-after-directive.c delete mode 100644 src/glsl/glcpp/tests/126-garbage-after-directive.c.expected delete mode 100644 src/glsl/glcpp/tests/127-pragma-empty.c delete mode 100644 src/glsl/glcpp/tests/127-pragma-empty.c.expected delete mode 100644 src/glsl/glcpp/tests/128-space-before-hash.c delete mode 100644 src/glsl/glcpp/tests/128-space-before-hash.c.expected delete mode 100644 src/glsl/glcpp/tests/129-define-non-identifier.c delete mode 100644 src/glsl/glcpp/tests/129-define-non-identifier.c.expected delete mode 100644 src/glsl/glcpp/tests/130-define-comment.c delete mode 100644 src/glsl/glcpp/tests/130-define-comment.c.expected delete mode 100644 src/glsl/glcpp/tests/131-eof-without-newline.c delete mode 100644 src/glsl/glcpp/tests/131-eof-without-newline.c.expected delete mode 100644 src/glsl/glcpp/tests/132-eof-without-newline-define.c delete mode 100644 src/glsl/glcpp/tests/132-eof-without-newline-define.c.expected delete mode 100644 src/glsl/glcpp/tests/133-eof-without-newline-comment.c delete mode 100644 src/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected delete mode 100644 src/glsl/glcpp/tests/134-hash-comment-directive.c delete mode 100644 src/glsl/glcpp/tests/134-hash-comment-directive.c.expected delete mode 100644 src/glsl/glcpp/tests/135-duplicate-parameter.c delete mode 100644 src/glsl/glcpp/tests/135-duplicate-parameter.c.expected delete mode 100644 src/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c delete mode 100644 src/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected delete mode 100644 src/glsl/glcpp/tests/137-expand-macro-after-period.c delete mode 100644 src/glsl/glcpp/tests/137-expand-macro-after-period.c.expected delete mode 100644 src/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c delete mode 100644 src/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected delete mode 100644 src/glsl/glcpp/tests/139-define-without-macro-name.c delete mode 100644 src/glsl/glcpp/tests/139-define-without-macro-name.c.expected delete mode 100644 src/glsl/glcpp/tests/140-null-directive.c delete mode 100644 src/glsl/glcpp/tests/140-null-directive.c.expected delete mode 100644 src/glsl/glcpp/tests/141-pragma-and-__LINE__.c delete mode 100644 src/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected delete mode 100644 src/glsl/glcpp/tests/142-defined-within-macro.c delete mode 100644 src/glsl/glcpp/tests/142-defined-within-macro.c.expected delete mode 100644 src/glsl/glcpp/tests/143-multiple-else.c delete mode 100644 src/glsl/glcpp/tests/143-multiple-else.c.expected delete mode 100755 src/glsl/glcpp/tests/glcpp-test delete mode 100755 src/glsl/glcpp/tests/glcpp-test-cr-lf delete mode 100644 src/glsl/glsl_lexer.ll delete mode 100644 src/glsl/glsl_parser.yy delete mode 100644 src/glsl/glsl_parser_extras.cpp delete mode 100644 src/glsl/glsl_parser_extras.h delete mode 100644 src/glsl/glsl_symbol_table.cpp delete mode 100644 src/glsl/glsl_symbol_table.h delete mode 100644 src/glsl/hir_field_selection.cpp delete mode 100644 src/glsl/ir.cpp delete mode 100644 src/glsl/ir.h delete mode 100644 src/glsl/ir_basic_block.cpp delete mode 100644 src/glsl/ir_basic_block.h delete mode 100644 src/glsl/ir_builder.cpp delete mode 100644 src/glsl/ir_builder.h delete mode 100644 src/glsl/ir_clone.cpp delete mode 100644 src/glsl/ir_constant_expression.cpp delete mode 100644 src/glsl/ir_equals.cpp delete mode 100644 src/glsl/ir_expression_flattening.cpp delete mode 100644 src/glsl/ir_expression_flattening.h delete mode 100644 src/glsl/ir_function.cpp delete mode 100644 src/glsl/ir_function_can_inline.cpp delete mode 100644 src/glsl/ir_function_detect_recursion.cpp delete mode 100644 src/glsl/ir_function_inlining.h delete mode 100644 src/glsl/ir_hierarchical_visitor.cpp delete mode 100644 src/glsl/ir_hierarchical_visitor.h delete mode 100644 src/glsl/ir_hv_accept.cpp delete mode 100644 src/glsl/ir_import_prototypes.cpp delete mode 100644 src/glsl/ir_optimization.h delete mode 100644 src/glsl/ir_print_visitor.cpp delete mode 100644 src/glsl/ir_print_visitor.h delete mode 100644 src/glsl/ir_reader.cpp delete mode 100644 src/glsl/ir_reader.h delete mode 100644 src/glsl/ir_rvalue_visitor.cpp delete mode 100644 src/glsl/ir_rvalue_visitor.h delete mode 100644 src/glsl/ir_set_program_inouts.cpp delete mode 100644 src/glsl/ir_uniform.h delete mode 100644 src/glsl/ir_validate.cpp delete mode 100644 src/glsl/ir_variable_refcount.cpp delete mode 100644 src/glsl/ir_variable_refcount.h delete mode 100644 src/glsl/ir_visitor.h delete mode 100644 src/glsl/link_atomics.cpp delete mode 100644 src/glsl/link_functions.cpp delete mode 100644 src/glsl/link_interface_blocks.cpp delete mode 100644 src/glsl/link_uniform_block_active_visitor.cpp delete mode 100644 src/glsl/link_uniform_block_active_visitor.h delete mode 100644 src/glsl/link_uniform_blocks.cpp delete mode 100644 src/glsl/link_uniform_initializers.cpp delete mode 100644 src/glsl/link_uniforms.cpp delete mode 100644 src/glsl/link_varyings.cpp delete mode 100644 src/glsl/link_varyings.h delete mode 100644 src/glsl/linker.cpp delete mode 100644 src/glsl/linker.h delete mode 100644 src/glsl/list.h delete mode 100644 src/glsl/loop_analysis.cpp delete mode 100644 src/glsl/loop_analysis.h delete mode 100644 src/glsl/loop_controls.cpp delete mode 100644 src/glsl/loop_unroll.cpp delete mode 100644 src/glsl/lower_buffer_access.cpp delete mode 100644 src/glsl/lower_buffer_access.h delete mode 100644 src/glsl/lower_clip_distance.cpp delete mode 100644 src/glsl/lower_const_arrays_to_uniforms.cpp delete mode 100644 src/glsl/lower_discard.cpp delete mode 100644 src/glsl/lower_discard_flow.cpp delete mode 100644 src/glsl/lower_if_to_cond_assign.cpp delete mode 100644 src/glsl/lower_instructions.cpp delete mode 100644 src/glsl/lower_jumps.cpp delete mode 100644 src/glsl/lower_mat_op_to_vec.cpp delete mode 100644 src/glsl/lower_named_interface_blocks.cpp delete mode 100644 src/glsl/lower_noise.cpp delete mode 100644 src/glsl/lower_offset_array.cpp delete mode 100644 src/glsl/lower_output_reads.cpp delete mode 100644 src/glsl/lower_packed_varyings.cpp delete mode 100644 src/glsl/lower_packing_builtins.cpp delete mode 100644 src/glsl/lower_shared_reference.cpp delete mode 100644 src/glsl/lower_subroutine.cpp delete mode 100644 src/glsl/lower_tess_level.cpp delete mode 100644 src/glsl/lower_texture_projection.cpp delete mode 100644 src/glsl/lower_ubo_reference.cpp delete mode 100644 src/glsl/lower_variable_index_to_cond_assign.cpp delete mode 100644 src/glsl/lower_vec_index_to_cond_assign.cpp delete mode 100644 src/glsl/lower_vec_index_to_swizzle.cpp delete mode 100644 src/glsl/lower_vector.cpp delete mode 100644 src/glsl/lower_vector_derefs.cpp delete mode 100644 src/glsl/lower_vector_insert.cpp delete mode 100644 src/glsl/lower_vertex_id.cpp delete mode 100644 src/glsl/main.cpp delete mode 100644 src/glsl/opt_algebraic.cpp delete mode 100644 src/glsl/opt_array_splitting.cpp delete mode 100644 src/glsl/opt_conditional_discard.cpp delete mode 100644 src/glsl/opt_constant_folding.cpp delete mode 100644 src/glsl/opt_constant_propagation.cpp delete mode 100644 src/glsl/opt_constant_variable.cpp delete mode 100644 src/glsl/opt_copy_propagation.cpp delete mode 100644 src/glsl/opt_copy_propagation_elements.cpp delete mode 100644 src/glsl/opt_dead_builtin_variables.cpp delete mode 100644 src/glsl/opt_dead_builtin_varyings.cpp delete mode 100644 src/glsl/opt_dead_code.cpp delete mode 100644 src/glsl/opt_dead_code_local.cpp delete mode 100644 src/glsl/opt_dead_functions.cpp delete mode 100644 src/glsl/opt_flatten_nested_if_blocks.cpp delete mode 100644 src/glsl/opt_flip_matrices.cpp delete mode 100644 src/glsl/opt_function_inlining.cpp delete mode 100644 src/glsl/opt_if_simplification.cpp delete mode 100644 src/glsl/opt_minmax.cpp delete mode 100644 src/glsl/opt_noop_swizzle.cpp delete mode 100644 src/glsl/opt_rebalance_tree.cpp delete mode 100644 src/glsl/opt_redundant_jumps.cpp delete mode 100644 src/glsl/opt_structure_splitting.cpp delete mode 100644 src/glsl/opt_swizzle_swizzle.cpp delete mode 100644 src/glsl/opt_tree_grafting.cpp delete mode 100644 src/glsl/opt_vectorize.cpp delete mode 100644 src/glsl/program.h delete mode 100644 src/glsl/s_expression.cpp delete mode 100644 src/glsl/s_expression.h delete mode 100644 src/glsl/standalone_scaffolding.cpp delete mode 100644 src/glsl/standalone_scaffolding.h delete mode 100644 src/glsl/test.cpp delete mode 100644 src/glsl/test_optpass.cpp delete mode 100644 src/glsl/test_optpass.h delete mode 100644 src/glsl/tests/.gitignore delete mode 100644 src/glsl/tests/blob_test.c delete mode 100644 src/glsl/tests/builtin_variable_test.cpp delete mode 100755 src/glsl/tests/compare_ir delete mode 100644 src/glsl/tests/copy_constant_to_storage_tests.cpp delete mode 100644 src/glsl/tests/general_ir_test.cpp delete mode 100644 src/glsl/tests/invalidate_locations_test.cpp delete mode 100644 src/glsl/tests/lower_jumps/.gitignore delete mode 100644 src/glsl/tests/lower_jumps/create_test_cases.py delete mode 100755 src/glsl/tests/optimization-test delete mode 100644 src/glsl/tests/sampler_types_test.cpp delete mode 100644 src/glsl/tests/set_uniform_initializer_tests.cpp delete mode 100644 src/glsl/tests/sexps.py delete mode 100644 src/glsl/tests/uniform_initializer_utils.cpp delete mode 100644 src/glsl/tests/uniform_initializer_utils.h delete mode 100644 src/glsl/tests/varyings_test.cpp (limited to 'src/mesa') diff --git a/configure.ac b/configure.ac index dc1e995b8d5..4172eaeefd9 100644 --- a/configure.ac +++ b/configure.ac @@ -2506,7 +2506,6 @@ AC_CONFIG_FILES([Makefile src/gallium/winsys/virgl/vtest/Makefile src/gbm/Makefile src/gbm/main/gbm.pc - src/glsl/Makefile src/glx/Makefile src/glx/apple/Makefile src/glx/tests/Makefile diff --git a/src/Makefile.am b/src/Makefile.am index a0ec40146b0..9b23cf58f4f 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -25,7 +25,7 @@ SUBDIRS = . gtest util mapi/glapi/gen mapi SUBDIRS += compiler if NEED_OPENGL_COMMON -SUBDIRS += glsl mesa +SUBDIRS += mesa endif SUBDIRS += loader diff --git a/src/SConscript b/src/SConscript index 8acf9b04ab6..4ba0a32d669 100644 --- a/src/SConscript +++ b/src/SConscript @@ -6,7 +6,6 @@ if env['platform'] == 'windows': SConscript('util/SConscript') SConscript('compiler/SConscript') -SConscript('glsl/SConscript') if env['hostonly']: # We are just compiling the things necessary on the host for cross diff --git a/src/compiler/Makefile.am b/src/compiler/Makefile.am index 1e3778df8d5..0bc8e48efa6 100644 --- a/src/compiler/Makefile.am +++ b/src/compiler/Makefile.am @@ -1,4 +1,5 @@ # +# Copyright © 2012 Jon TURNEY # Copyright (C) 2015 Intel Corporation # # Permission is hereby granted, free of charge, to any person obtaining a @@ -27,6 +28,9 @@ AM_CPPFLAGS = \ -I$(top_srcdir)/src \ -I$(top_srcdir)/src/mapi \ -I$(top_srcdir)/src/mesa/ \ + -I$(top_builddir)/src/compiler/glsl\ + -I$(top_srcdir)/src/compiler/glsl\ + -I$(top_srcdir)/src/compiler/glsl/glcpp\ -I$(top_srcdir)/src/gallium/include \ -I$(top_srcdir)/src/gallium/auxiliary \ -I$(top_srcdir)/src/gtest/include \ @@ -51,6 +55,201 @@ CLEANFILES = EXTRA_DIST = SConscript +EXTRA_DIST += glsl/tests glsl/glcpp/tests glsl/README \ + glsl/TODO glsl/glcpp/README \ + glsl/glsl_lexer.ll \ + glsl/glsl_parser.yy \ + glsl/glcpp/glcpp-lex.l \ + glsl/glcpp/glcpp-parse.y \ + glsl/Makefile.sources \ + glsl/SConscript + +TESTS += glsl/glcpp/tests/glcpp-test \ + glsl/glcpp/tests/glcpp-test-cr-lf \ + glsl/tests/blob-test \ + glsl/tests/general-ir-test \ + glsl/tests/optimization-test \ + glsl/tests/sampler-types-test \ + glsl/tests/uniform-initializer-test + +TESTS_ENVIRONMENT= \ + export PYTHON2=$(PYTHON2); \ + export PYTHON_FLAGS=$(PYTHON_FLAGS); + +check_PROGRAMS += \ + glsl/glcpp/glcpp \ + glsl/glsl_test \ + glsl/tests/blob-test \ + glsl/tests/general-ir-test \ + glsl/tests/sampler-types-test \ + glsl/tests/uniform-initializer-test + +noinst_PROGRAMS = glsl_compiler + +glsl_tests_blob_test_SOURCES = \ + glsl/tests/blob_test.c +glsl_tests_blob_test_LDADD = \ + glsl/libglsl.la + +glsl_tests_general_ir_test_SOURCES = \ + glsl/standalone_scaffolding.cpp \ + glsl/tests/builtin_variable_test.cpp \ + glsl/tests/invalidate_locations_test.cpp \ + glsl/tests/general_ir_test.cpp \ + glsl/tests/varyings_test.cpp +glsl_tests_general_ir_test_CFLAGS = \ + $(PTHREAD_CFLAGS) +glsl_tests_general_ir_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +glsl_tests_uniform_initializer_test_SOURCES = \ + glsl/tests/copy_constant_to_storage_tests.cpp \ + glsl/tests/set_uniform_initializer_tests.cpp \ + glsl/tests/uniform_initializer_utils.cpp \ + glsl/tests/uniform_initializer_utils.h +glsl_tests_uniform_initializer_test_CFLAGS = \ + $(PTHREAD_CFLAGS) +glsl_tests_uniform_initializer_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +glsl_tests_sampler_types_test_SOURCES = \ + glsl/tests/sampler_types_test.cpp +glsl_tests_sampler_types_test_CFLAGS = \ + $(PTHREAD_CFLAGS) +glsl_tests_sampler_types_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +noinst_LTLIBRARIES += glsl/libglsl.la glsl/libglcpp.la + +glsl_libglcpp_la_LIBADD = \ + $(top_builddir)/src/util/libmesautil.la +glsl_libglcpp_la_SOURCES = \ + glsl/glcpp/glcpp-lex.c \ + glsl/glcpp/glcpp-parse.c \ + glsl/glcpp/glcpp-parse.h \ + $(LIBGLCPP_FILES) + +glsl_glcpp_glcpp_SOURCES = \ + glsl/glcpp/glcpp.c +glsl_glcpp_glcpp_LDADD = \ + glsl/libglcpp.la \ + $(top_builddir)/src/libglsl_util.la \ + -lm + +glsl_libglsl_la_LIBADD = \ + nir/libnir.la \ + glsl/libglcpp.la + +glsl_libglsl_la_SOURCES = \ + glsl/glsl_lexer.cpp \ + glsl/glsl_parser.cpp \ + glsl/glsl_parser.h \ + $(LIBGLSL_FILES) + + +glsl_compiler_SOURCES = \ + $(GLSL_COMPILER_CXX_FILES) + +glsl_compiler_LDADD = \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(PTHREAD_LIBS) + +glsl_glsl_test_SOURCES = \ + glsl/standalone_scaffolding.cpp \ + glsl/test.cpp \ + glsl/test_optpass.cpp \ + glsl/test_optpass.h + +glsl_glsl_test_LDADD = \ + glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +# We write our own rules for yacc and lex below. We'd rather use automake, +# but automake makes it especially difficult for a number of reasons: +# +# * < automake-1.12 generates .h files from .yy and .ypp files, but +# >=automake-1.12 generates .hh and .hpp files respectively. There's no +# good way of making a project that uses C++ yacc files compatible with +# both versions of automake. Strong work automake developers. +# +# * Since we're generating code from .l/.y files in a subdirectory (glcpp/) +# we'd like the resulting generated code to also go in glcpp/ for purposes +# of distribution. Automake gives no way to do this. +# +# * Since we're building multiple yacc parsers into one library (and via one +# Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes +# automake to name the resulting generated code as _filename.c. +# Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file. + +# In order to make build output print "LEX" and "YACC", we reproduce the +# automake variables below. + +AM_V_LEX = $(am__v_LEX_$(V)) +am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY)) +am__v_LEX_0 = @echo " LEX " $@; +am__v_LEX_1 = + +AM_V_YACC = $(am__v_YACC_$(V)) +am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY)) +am__v_YACC_0 = @echo " YACC " $@; +am__v_YACC_1 = + +MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) +YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS) +LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS) + +glsl/glsl_parser.cpp glsl/glsl_parser.h: glsl/glsl_parser.yy + $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl/glsl_parser.h $(srcdir)/glsl/glsl_parser.yy + +glsl/glsl_lexer.cpp: glsl/glsl_lexer.ll + $(LEX_GEN) -o $@ $(srcdir)/glsl/glsl_lexer.ll + +glsl/glcpp/glcpp-parse.c glsl/glcpp/glcpp-parse.h: glsl/glcpp/glcpp-parse.y + $(MKDIR_GEN) + $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glsl/glcpp/glcpp-parse.h $(srcdir)/glsl/glcpp/glcpp-parse.y + +glsl/glcpp/glcpp-lex.c: glsl/glcpp/glcpp-lex.l + $(MKDIR_GEN) + $(LEX_GEN) -o $@ $(srcdir)/glsl/glcpp/glcpp-lex.l + +# Only the parsers (specifically the header files generated at the same time) +# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is +# called for the .c/.cpp file and the .h files. By listing the .c/.cpp files +# YACC is only executed once for each parser. The rest of the generated code +# will be created at the appropriate times according to standard automake +# dependency rules. +BUILT_SOURCES += \ + glsl/glsl_parser.cpp \ + glsl/glsl_lexer.cpp \ + glsl/glcpp/glcpp-parse.c \ + glsl/glcpp/glcpp-lex.c +CLEANFILES += \ + glsl/glcpp/glcpp-parse.h \ + glsl/glsl_parser.h \ + glsl/glsl_parser.cpp \ + glsl/glsl_lexer.cpp \ + glsl/glcpp/glcpp-parse.c \ + glsl/glcpp/glcpp-lex.c + +clean-local: + $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr + +dist-hook: + $(RM) glsl/glcpp/tests/*.out + $(RM) glsl/glcpp/tests/subtest*/*.out + noinst_LTLIBRARIES += nir/libnir.la nir_libnir_la_CPPFLAGS = \ @@ -101,7 +300,7 @@ nir_tests_control_flow_tests_CFLAGS = \ $(PTHREAD_CFLAGS) nir_tests_control_flow_tests_LDADD = \ $(top_builddir)/src/gtest/libgtest.la \ - $(top_builddir)/src/compiler/nir/libnir.la \ + nir/libnir.la \ $(top_builddir)/src/util/libmesautil.la \ $(PTHREAD_LIBS) diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources index fe12e419afb..c9780d6d6f7 100644 --- a/src/compiler/Makefile.sources +++ b/src/compiler/Makefile.sources @@ -7,6 +7,153 @@ LIBCOMPILER_FILES = \ shader_enums.c \ shader_enums.h +# libglsl + +LIBGLSL_FILES = \ + glsl/ast.h \ + glsl/ast_array_index.cpp \ + glsl/ast_expr.cpp \ + glsl/ast_function.cpp \ + glsl/ast_to_hir.cpp \ + glsl/ast_type.cpp \ + glsl/blob.c \ + glsl/blob.h \ + glsl/builtin_functions.cpp \ + glsl/builtin_types.cpp \ + glsl/builtin_variables.cpp \ + glsl/glsl_parser_extras.cpp \ + glsl/glsl_parser_extras.h \ + glsl/glsl_symbol_table.cpp \ + glsl/glsl_symbol_table.h \ + glsl/hir_field_selection.cpp \ + glsl/ir_basic_block.cpp \ + glsl/ir_basic_block.h \ + glsl/ir_builder.cpp \ + glsl/ir_builder.h \ + glsl/ir_clone.cpp \ + glsl/ir_constant_expression.cpp \ + glsl/ir.cpp \ + glsl/ir.h \ + glsl/ir_equals.cpp \ + glsl/ir_expression_flattening.cpp \ + glsl/ir_expression_flattening.h \ + glsl/ir_function_can_inline.cpp \ + glsl/ir_function_detect_recursion.cpp \ + glsl/ir_function_inlining.h \ + glsl/ir_function.cpp \ + glsl/ir_hierarchical_visitor.cpp \ + glsl/ir_hierarchical_visitor.h \ + glsl/ir_hv_accept.cpp \ + glsl/ir_import_prototypes.cpp \ + glsl/ir_optimization.h \ + glsl/ir_print_visitor.cpp \ + glsl/ir_print_visitor.h \ + glsl/ir_reader.cpp \ + glsl/ir_reader.h \ + glsl/ir_rvalue_visitor.cpp \ + glsl/ir_rvalue_visitor.h \ + glsl/ir_set_program_inouts.cpp \ + glsl/ir_uniform.h \ + glsl/ir_validate.cpp \ + glsl/ir_variable_refcount.cpp \ + glsl/ir_variable_refcount.h \ + glsl/ir_visitor.h \ + glsl/linker.cpp \ + glsl/linker.h \ + glsl/link_atomics.cpp \ + glsl/link_functions.cpp \ + glsl/link_interface_blocks.cpp \ + glsl/link_uniforms.cpp \ + glsl/link_uniform_initializers.cpp \ + glsl/link_uniform_block_active_visitor.cpp \ + glsl/link_uniform_block_active_visitor.h \ + glsl/link_uniform_blocks.cpp \ + glsl/link_varyings.cpp \ + glsl/link_varyings.h \ + glsl/list.h \ + glsl/loop_analysis.cpp \ + glsl/loop_analysis.h \ + glsl/loop_controls.cpp \ + glsl/loop_unroll.cpp \ + glsl/lower_buffer_access.cpp \ + glsl/lower_buffer_access.h \ + glsl/lower_clip_distance.cpp \ + glsl/lower_const_arrays_to_uniforms.cpp \ + glsl/lower_discard.cpp \ + glsl/lower_discard_flow.cpp \ + glsl/lower_if_to_cond_assign.cpp \ + glsl/lower_instructions.cpp \ + glsl/lower_jumps.cpp \ + glsl/lower_mat_op_to_vec.cpp \ + glsl/lower_noise.cpp \ + glsl/lower_offset_array.cpp \ + glsl/lower_packed_varyings.cpp \ + glsl/lower_named_interface_blocks.cpp \ + glsl/lower_packing_builtins.cpp \ + glsl/lower_subroutine.cpp \ + glsl/lower_tess_level.cpp \ + glsl/lower_texture_projection.cpp \ + glsl/lower_variable_index_to_cond_assign.cpp \ + glsl/lower_vec_index_to_cond_assign.cpp \ + glsl/lower_vec_index_to_swizzle.cpp \ + glsl/lower_vector.cpp \ + glsl/lower_vector_derefs.cpp \ + glsl/lower_vector_insert.cpp \ + glsl/lower_vertex_id.cpp \ + glsl/lower_output_reads.cpp \ + glsl/lower_shared_reference.cpp \ + glsl/lower_ubo_reference.cpp \ + glsl/opt_algebraic.cpp \ + glsl/opt_array_splitting.cpp \ + glsl/opt_conditional_discard.cpp \ + glsl/opt_constant_folding.cpp \ + glsl/opt_constant_propagation.cpp \ + glsl/opt_constant_variable.cpp \ + glsl/opt_copy_propagation.cpp \ + glsl/opt_copy_propagation_elements.cpp \ + glsl/opt_dead_builtin_variables.cpp \ + glsl/opt_dead_builtin_varyings.cpp \ + glsl/opt_dead_code.cpp \ + glsl/opt_dead_code_local.cpp \ + glsl/opt_dead_functions.cpp \ + glsl/opt_flatten_nested_if_blocks.cpp \ + glsl/opt_flip_matrices.cpp \ + glsl/opt_function_inlining.cpp \ + glsl/opt_if_simplification.cpp \ + glsl/opt_minmax.cpp \ + glsl/opt_noop_swizzle.cpp \ + glsl/opt_rebalance_tree.cpp \ + glsl/opt_redundant_jumps.cpp \ + glsl/opt_structure_splitting.cpp \ + glsl/opt_swizzle_swizzle.cpp \ + glsl/opt_tree_grafting.cpp \ + glsl/opt_vectorize.cpp \ + glsl/program.h \ + glsl/s_expression.cpp \ + glsl/s_expression.h + +# glsl_compiler + +GLSL_COMPILER_CXX_FILES = \ + glsl/standalone_scaffolding.cpp \ + glsl/standalone_scaffolding.h \ + glsl/main.cpp + +# libglsl generated sources +LIBGLSL_GENERATED_CXX_FILES = \ + glsl/glsl_lexer.cpp \ + glsl/glsl_parser.cpp + +# libglcpp + +LIBGLCPP_FILES = \ + glsl/glcpp/glcpp.h \ + glsl/glcpp/pp.c + +LIBGLCPP_GENERATED_FILES = \ + glsl/glcpp/glcpp-lex.c \ + glsl/glcpp/glcpp-parse.c + NIR_GENERATED_FILES = \ nir/nir_builder_opcodes.h \ nir/nir_constant_expressions.c \ diff --git a/src/compiler/SConscript b/src/compiler/SConscript index 412da0e62bd..8d71b82bee0 100644 --- a/src/compiler/SConscript +++ b/src/compiler/SConscript @@ -20,3 +20,5 @@ compiler = env.ConvenienceLibrary( source = sources ) Export('compiler') + +SConscript('glsl/SConscript') diff --git a/src/compiler/glsl/.gitignore b/src/compiler/glsl/.gitignore new file mode 100644 index 00000000000..dda423f83db --- /dev/null +++ b/src/compiler/glsl/.gitignore @@ -0,0 +1,10 @@ +glsl_compiler +glsl_lexer.cpp +glsl_parser.cpp +glsl_parser.h +glsl_parser.output +glsl_test +subtest-cr/ +subtest-lf/ +subtest-cr-lf/ +subtest-lf-cr/ diff --git a/src/compiler/glsl/Android.gen.mk b/src/compiler/glsl/Android.gen.mk new file mode 100644 index 00000000000..c5741b40bc5 --- /dev/null +++ b/src/compiler/glsl/Android.gen.mk @@ -0,0 +1,76 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# included by glsl Android.mk for source generation + +ifeq ($(LOCAL_MODULE_CLASS),) +LOCAL_MODULE_CLASS := STATIC_LIBRARIES +endif + +intermediates := $(call local-generated-sources-dir) + +LOCAL_SRC_FILES := $(LOCAL_SRC_FILES) + +LOCAL_C_INCLUDES += \ + $(intermediates)/glcpp \ + $(MESA_TOP)/src/glsl/glcpp \ + +LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ + $(LIBGLCPP_GENERATED_FILES) \ + $(LIBGLSL_GENERATED_CXX_FILES)) + +define local-l-or-ll-to-c-or-cpp + @mkdir -p $(dir $@) + @echo "Mesa Lex: $(PRIVATE_MODULE) <= $<" + $(hide) $(LEX) --nounistd -o$@ $< +endef + +define glsl_local-y-to-c-and-h + @mkdir -p $(dir $@) + @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" + $(hide) $(YACC) -o $@ -p "glcpp_parser_" $< +endef + +define local-yy-to-cpp-and-h + @mkdir -p $(dir $@) + @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" + $(hide) $(YACC) -p "_mesa_glsl_" -o $@ $< + touch $(@:$1=$(YACC_HEADER_SUFFIX)) + echo '#ifndef '$(@F:$1=_h) > $(@:$1=.h) + echo '#define '$(@F:$1=_h) >> $(@:$1=.h) + cat $(@:$1=$(YACC_HEADER_SUFFIX)) >> $(@:$1=.h) + echo '#endif' >> $(@:$1=.h) + rm -f $(@:$1=$(YACC_HEADER_SUFFIX)) +endef + +$(intermediates)/glsl_lexer.cpp: $(LOCAL_PATH)/glsl_lexer.ll + $(call local-l-or-ll-to-c-or-cpp) + +$(intermediates)/glsl_parser.cpp: $(LOCAL_PATH)/glsl_parser.yy + $(call local-yy-to-cpp-and-h,.cpp) + +$(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l + $(call local-l-or-ll-to-c-or-cpp) + +$(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y + $(call glsl_local-y-to-c-and-h) diff --git a/src/compiler/glsl/Android.mk b/src/compiler/glsl/Android.mk new file mode 100644 index 00000000000..9cbb9a339a1 --- /dev/null +++ b/src/compiler/glsl/Android.mk @@ -0,0 +1,76 @@ +# Mesa 3-D graphics library +# +# Copyright (C) 2010-2011 Chia-I Wu +# Copyright (C) 2010-2011 LunarG Inc. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included +# in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Android.mk for glsl + +LOCAL_PATH := $(call my-dir) + +include $(LOCAL_PATH)/Makefile.sources + +# --------------------------------------- +# Build libmesa_glsl +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(LIBGLCPP_FILES) \ + $(LIBGLSL_FILES) \ + $(NIR_FILES) + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary + +LOCAL_STATIC_LIBRARIES := libmesa_compiler + +LOCAL_MODULE := libmesa_glsl + +include $(LOCAL_PATH)/Android.gen.mk +include $(MESA_COMMON_MK) +include $(BUILD_STATIC_LIBRARY) + +# --------------------------------------- +# Build glsl_compiler +# --------------------------------------- + +include $(CLEAR_VARS) + +LOCAL_SRC_FILES := \ + $(GLSL_COMPILER_CXX_FILES) + +LOCAL_C_INCLUDES := \ + $(MESA_TOP)/src/mapi \ + $(MESA_TOP)/src/mesa \ + $(MESA_TOP)/src/gallium/include \ + $(MESA_TOP)/src/gallium/auxiliary + +LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_glsl_utils libmesa_util + +LOCAL_MODULE_TAGS := eng +LOCAL_MODULE := glsl_compiler + +include $(MESA_COMMON_MK) +include $(BUILD_EXECUTABLE) diff --git a/src/compiler/glsl/Makefile.am b/src/compiler/glsl/Makefile.am new file mode 100644 index 00000000000..9954b812403 --- /dev/null +++ b/src/compiler/glsl/Makefile.am @@ -0,0 +1,228 @@ +# Copyright © 2012 Jon TURNEY +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS +# IN THE SOFTWARE. + +AM_CPPFLAGS = \ + -I$(top_srcdir)/include \ + -I$(top_srcdir)/src \ + -I$(top_srcdir)/src/mapi \ + -I$(top_srcdir)/src/mesa/ \ + -I$(top_srcdir)/src/gallium/include \ + -I$(top_srcdir)/src/gallium/auxiliary \ + -I$(top_srcdir)/src/glsl/glcpp \ + -I$(top_srcdir)/src/gtest/include \ + $(DEFINES) +AM_CFLAGS = \ + $(VISIBILITY_CFLAGS) \ + $(MSVC2013_COMPAT_CFLAGS) +AM_CXXFLAGS = \ + $(VISIBILITY_CXXFLAGS) \ + $(MSVC2013_COMPAT_CXXFLAGS) + +EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \ + glsl_lexer.ll \ + glsl_parser.yy \ + glcpp/glcpp-lex.l \ + glcpp/glcpp-parse.y \ + SConscript + +include Makefile.sources + +TESTS = glcpp/tests/glcpp-test \ + glcpp/tests/glcpp-test-cr-lf \ + tests/blob-test \ + tests/general-ir-test \ + tests/optimization-test \ + tests/sampler-types-test \ + tests/uniform-initializer-test + +TESTS_ENVIRONMENT= \ + export PYTHON2=$(PYTHON2); \ + export PYTHON_FLAGS=$(PYTHON_FLAGS); + +noinst_LTLIBRARIES = libglsl.la libglcpp.la +check_PROGRAMS = \ + glcpp/glcpp \ + glsl_test \ + tests/blob-test \ + tests/general-ir-test \ + tests/sampler-types-test \ + tests/uniform-initializer-test + +noinst_PROGRAMS = glsl_compiler + +tests_blob_test_SOURCES = \ + tests/blob_test.c +tests_blob_test_LDADD = \ + $(top_builddir)/src/glsl/libglsl.la + +tests_general_ir_test_SOURCES = \ + standalone_scaffolding.cpp \ + tests/builtin_variable_test.cpp \ + tests/invalidate_locations_test.cpp \ + tests/general_ir_test.cpp \ + tests/varyings_test.cpp +tests_general_ir_test_CFLAGS = \ + $(PTHREAD_CFLAGS) +tests_general_ir_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +tests_uniform_initializer_test_SOURCES = \ + tests/copy_constant_to_storage_tests.cpp \ + tests/set_uniform_initializer_tests.cpp \ + tests/uniform_initializer_utils.cpp \ + tests/uniform_initializer_utils.h +tests_uniform_initializer_test_CFLAGS = \ + $(PTHREAD_CFLAGS) +tests_uniform_initializer_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +tests_sampler_types_test_SOURCES = \ + tests/sampler_types_test.cpp +tests_sampler_types_test_CFLAGS = \ + $(PTHREAD_CFLAGS) +tests_sampler_types_test_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +libglcpp_la_LIBADD = \ + $(top_builddir)/src/util/libmesautil.la +libglcpp_la_SOURCES = \ + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c \ + glcpp/glcpp-parse.h \ + $(LIBGLCPP_FILES) + +glcpp_glcpp_SOURCES = \ + glcpp/glcpp.c +glcpp_glcpp_LDADD = \ + libglcpp.la \ + $(top_builddir)/src/libglsl_util.la \ + -lm + +libglsl_la_LIBADD = \ + $(top_builddir)/src/compiler/nir/libnir.la \ + libglcpp.la + +libglsl_la_SOURCES = \ + glsl_lexer.cpp \ + glsl_parser.cpp \ + glsl_parser.h \ + $(LIBGLSL_FILES) + + +glsl_compiler_SOURCES = \ + $(GLSL_COMPILER_CXX_FILES) + +glsl_compiler_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(top_builddir)/src/util/libmesautil.la \ + $(PTHREAD_LIBS) + +glsl_test_SOURCES = \ + standalone_scaffolding.cpp \ + test.cpp \ + test_optpass.cpp \ + test_optpass.h + +glsl_test_LDADD = \ + libglsl.la \ + $(top_builddir)/src/libglsl_util.la \ + $(PTHREAD_LIBS) + +# We write our own rules for yacc and lex below. We'd rather use automake, +# but automake makes it especially difficult for a number of reasons: +# +# * < automake-1.12 generates .h files from .yy and .ypp files, but +# >=automake-1.12 generates .hh and .hpp files respectively. There's no +# good way of making a project that uses C++ yacc files compatible with +# both versions of automake. Strong work automake developers. +# +# * Since we're generating code from .l/.y files in a subdirectory (glcpp/) +# we'd like the resulting generated code to also go in glcpp/ for purposes +# of distribution. Automake gives no way to do this. +# +# * Since we're building multiple yacc parsers into one library (and via one +# Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes +# automake to name the resulting generated code as _filename.c. +# Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file. + +# In order to make build output print "LEX" and "YACC", we reproduce the +# automake variables below. + +AM_V_LEX = $(am__v_LEX_$(V)) +am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY)) +am__v_LEX_0 = @echo " LEX " $@; +am__v_LEX_1 = + +AM_V_YACC = $(am__v_YACC_$(V)) +am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY)) +am__v_YACC_0 = @echo " YACC " $@; +am__v_YACC_1 = + +MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) +YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS) +LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS) + +glsl_parser.cpp glsl_parser.h: glsl_parser.yy + $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy + +glsl_lexer.cpp: glsl_lexer.ll + $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll + +glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y + $(MKDIR_GEN) + $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y + +glcpp/glcpp-lex.c: glcpp/glcpp-lex.l + $(MKDIR_GEN) + $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l + +# Only the parsers (specifically the header files generated at the same time) +# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is +# called for the .c/.cpp file and the .h files. By listing the .c/.cpp files +# YACC is only executed once for each parser. The rest of the generated code +# will be created at the appropriate times according to standard automake +# dependency rules. +BUILT_SOURCES = \ + glsl_parser.cpp \ + glsl_lexer.cpp \ + glcpp/glcpp-parse.c \ + glcpp/glcpp-lex.c +CLEANFILES = \ + glcpp/glcpp-parse.h \ + glsl_parser.h \ + $(BUILT_SOURCES) + +clean-local: + $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr + +dist-hook: + $(RM) glcpp/tests/*.out + $(RM) glcpp/tests/subtest*/*.out diff --git a/src/compiler/glsl/Makefile.sources b/src/compiler/glsl/Makefile.sources new file mode 100644 index 00000000000..08b40c5cc8f --- /dev/null +++ b/src/compiler/glsl/Makefile.sources @@ -0,0 +1,222 @@ +# shared source lists for Makefile, SConscript, and Android.mk + +# libglcpp + +LIBGLCPP_FILES = \ + glcpp/glcpp.h \ + glcpp/pp.c + +LIBGLCPP_GENERATED_FILES = \ + glcpp/glcpp-lex.c \ + glcpp/glcpp-parse.c + +NIR_GENERATED_FILES = \ + nir/nir_builder_opcodes.h \ + nir/nir_constant_expressions.c \ + nir/nir_opcodes.c \ + nir/nir_opcodes.h \ + nir/nir_opt_algebraic.c + +NIR_FILES = \ + nir/nir.c \ + nir/nir.h \ + nir/nir_array.h \ + nir/nir_builder.h \ + nir/nir_clone.c \ + nir/nir_constant_expressions.h \ + nir/nir_control_flow.c \ + nir/nir_control_flow.h \ + nir/nir_control_flow_private.h \ + nir/nir_dominance.c \ + nir/nir_from_ssa.c \ + nir/nir_gs_count_vertices.c \ + nir/nir_intrinsics.c \ + nir/nir_intrinsics.h \ + nir/nir_instr_set.c \ + nir/nir_instr_set.h \ + nir/nir_liveness.c \ + nir/nir_lower_alu_to_scalar.c \ + nir/nir_lower_atomics.c \ + nir/nir_lower_clip.c \ + nir/nir_lower_global_vars_to_local.c \ + nir/nir_lower_gs_intrinsics.c \ + nir/nir_lower_load_const_to_scalar.c \ + nir/nir_lower_locals_to_regs.c \ + nir/nir_lower_idiv.c \ + nir/nir_lower_io.c \ + nir/nir_lower_outputs_to_temporaries.c \ + nir/nir_lower_phis_to_scalar.c \ + nir/nir_lower_samplers.c \ + nir/nir_lower_system_values.c \ + nir/nir_lower_tex.c \ + nir/nir_lower_to_source_mods.c \ + nir/nir_lower_two_sided_color.c \ + nir/nir_lower_vars_to_ssa.c \ + nir/nir_lower_var_copies.c \ + nir/nir_lower_vec_to_movs.c \ + nir/nir_metadata.c \ + nir/nir_move_vec_src_uses_to_dest.c \ + nir/nir_normalize_cubemap_coords.c \ + nir/nir_opt_constant_folding.c \ + nir/nir_opt_copy_propagate.c \ + nir/nir_opt_cse.c \ + nir/nir_opt_dce.c \ + nir/nir_opt_dead_cf.c \ + nir/nir_opt_gcm.c \ + nir/nir_opt_global_to_local.c \ + nir/nir_opt_peephole_select.c \ + nir/nir_opt_remove_phis.c \ + nir/nir_opt_undef.c \ + nir/nir_print.c \ + nir/nir_remove_dead_variables.c \ + nir/nir_search.c \ + nir/nir_search.h \ + nir/nir_split_var_copies.c \ + nir/nir_sweep.c \ + nir/nir_to_ssa.c \ + nir/nir_validate.c \ + nir/nir_vla.h \ + nir/nir_worklist.c \ + nir/nir_worklist.h + +# libglsl + +LIBGLSL_FILES = \ + ast.h \ + ast_array_index.cpp \ + ast_expr.cpp \ + ast_function.cpp \ + ast_to_hir.cpp \ + ast_type.cpp \ + blob.c \ + blob.h \ + builtin_functions.cpp \ + builtin_types.cpp \ + builtin_variables.cpp \ + glsl_parser_extras.cpp \ + glsl_parser_extras.h \ + glsl_symbol_table.cpp \ + glsl_symbol_table.h \ + hir_field_selection.cpp \ + ir_basic_block.cpp \ + ir_basic_block.h \ + ir_builder.cpp \ + ir_builder.h \ + ir_clone.cpp \ + ir_constant_expression.cpp \ + ir.cpp \ + ir.h \ + ir_equals.cpp \ + ir_expression_flattening.cpp \ + ir_expression_flattening.h \ + ir_function_can_inline.cpp \ + ir_function_detect_recursion.cpp \ + ir_function_inlining.h \ + ir_function.cpp \ + ir_hierarchical_visitor.cpp \ + ir_hierarchical_visitor.h \ + ir_hv_accept.cpp \ + ir_import_prototypes.cpp \ + ir_optimization.h \ + ir_print_visitor.cpp \ + ir_print_visitor.h \ + ir_reader.cpp \ + ir_reader.h \ + ir_rvalue_visitor.cpp \ + ir_rvalue_visitor.h \ + ir_set_program_inouts.cpp \ + ir_uniform.h \ + ir_validate.cpp \ + ir_variable_refcount.cpp \ + ir_variable_refcount.h \ + ir_visitor.h \ + linker.cpp \ + linker.h \ + link_atomics.cpp \ + link_functions.cpp \ + link_interface_blocks.cpp \ + link_uniforms.cpp \ + link_uniform_initializers.cpp \ + link_uniform_block_active_visitor.cpp \ + link_uniform_block_active_visitor.h \ + link_uniform_blocks.cpp \ + link_varyings.cpp \ + link_varyings.h \ + list.h \ + loop_analysis.cpp \ + loop_analysis.h \ + loop_controls.cpp \ + loop_unroll.cpp \ + lower_buffer_access.cpp \ + lower_buffer_access.h \ + lower_clip_distance.cpp \ + lower_const_arrays_to_uniforms.cpp \ + lower_discard.cpp \ + lower_discard_flow.cpp \ + lower_if_to_cond_assign.cpp \ + lower_instructions.cpp \ + lower_jumps.cpp \ + lower_mat_op_to_vec.cpp \ + lower_noise.cpp \ + lower_offset_array.cpp \ + lower_packed_varyings.cpp \ + lower_named_interface_blocks.cpp \ + lower_packing_builtins.cpp \ + lower_subroutine.cpp \ + lower_tess_level.cpp \ + lower_texture_projection.cpp \ + lower_variable_index_to_cond_assign.cpp \ + lower_vec_index_to_cond_assign.cpp \ + lower_vec_index_to_swizzle.cpp \ + lower_vector.cpp \ + lower_vector_derefs.cpp \ + lower_vector_insert.cpp \ + lower_vertex_id.cpp \ + lower_output_reads.cpp \ + lower_shared_reference.cpp \ + lower_ubo_reference.cpp \ + opt_algebraic.cpp \ + opt_array_splitting.cpp \ + opt_conditional_discard.cpp \ + opt_constant_folding.cpp \ + opt_constant_propagation.cpp \ + opt_constant_variable.cpp \ + opt_copy_propagation.cpp \ + opt_copy_propagation_elements.cpp \ + opt_dead_builtin_variables.cpp \ + opt_dead_builtin_varyings.cpp \ + opt_dead_code.cpp \ + opt_dead_code_local.cpp \ + opt_dead_functions.cpp \ + opt_flatten_nested_if_blocks.cpp \ + opt_flip_matrices.cpp \ + opt_function_inlining.cpp \ + opt_if_simplification.cpp \ + opt_minmax.cpp \ + opt_noop_swizzle.cpp \ + opt_rebalance_tree.cpp \ + opt_redundant_jumps.cpp \ + opt_structure_splitting.cpp \ + opt_swizzle_swizzle.cpp \ + opt_tree_grafting.cpp \ + opt_vectorize.cpp \ + program.h \ + s_expression.cpp \ + s_expression.h + +# glsl to nir pass +GLSL_TO_NIR_FILES = \ + nir/glsl_to_nir.cpp \ + nir/glsl_to_nir.h + +# glsl_compiler + +GLSL_COMPILER_CXX_FILES = \ + standalone_scaffolding.cpp \ + standalone_scaffolding.h \ + main.cpp + +# libglsl generated sources +LIBGLSL_GENERATED_CXX_FILES = \ + glsl_lexer.cpp \ + glsl_parser.cpp diff --git a/src/compiler/glsl/README b/src/compiler/glsl/README new file mode 100644 index 00000000000..bfcf69f903a --- /dev/null +++ b/src/compiler/glsl/README @@ -0,0 +1,228 @@ +Welcome to Mesa's GLSL compiler. A brief overview of how things flow: + +1) lex and yacc-based preprocessor takes the incoming shader string +and produces a new string containing the preprocessed shader. This +takes care of things like #if, #ifdef, #define, and preprocessor macro +invocations. Note that #version, #extension, and some others are +passed straight through. See glcpp/* + +2) lex and yacc-based parser takes the preprocessed string and +generates the AST (abstract syntax tree). Almost no checking is +performed in this stage. See glsl_lexer.ll and glsl_parser.yy. + +3) The AST is converted to "HIR". This is the intermediate +representation of the compiler. Constructors are generated, function +calls are resolved to particular function signatures, and all the +semantic checking is performed. See ast_*.cpp for the conversion, and +ir.h for the IR structures. + +4) The driver (Mesa, or main.cpp for the standalone binary) performs +optimizations. These include copy propagation, dead code elimination, +constant folding, and others. Generally the driver will call +optimizations in a loop, as each may open up opportunities for other +optimizations to do additional work. See most files called ir_*.cpp + +5) linking is performed. This does checking to ensure that the +outputs of the vertex shader match the inputs of the fragment shader, +and assigns locations to uniforms, attributes, and varyings. See +linker.cpp. + +6) The driver may perform additional optimization at this point, as +for example dead code elimination previously couldn't remove functions +or global variable usage when we didn't know what other code would be +linked in. + +7) The driver performs code generation out of the IR, taking a linked +shader program and producing a compiled program for each stage. See +../mesa/program/ir_to_mesa.cpp for Mesa IR code generation. + +FAQ: + +Q: What is HIR versus IR versus LIR? + +A: The idea behind the naming was that ast_to_hir would produce a +high-level IR ("HIR"), with things like matrix operations, structure +assignments, etc., present. A series of lowering passes would occur +that do things like break matrix multiplication into a series of dot +products/MADs, make structure assignment be a series of assignment of +components, flatten if statements into conditional moves, and such, +producing a low level IR ("LIR"). + +However, it now appears that each driver will have different +requirements from a LIR. A 915-generation chipset wants all functions +inlined, all loops unrolled, all ifs flattened, no variable array +accesses, and matrix multiplication broken down. The Mesa IR backend +for swrast would like matrices and structure assignment broken down, +but it can support function calls and dynamic branching. A 965 vertex +shader IR backend could potentially even handle some matrix operations +without breaking them down, but the 965 fragment shader IR backend +would want to break to have (almost) all operations down channel-wise +and perform optimization on that. As a result, there's no single +low-level IR that will make everyone happy. So that usage has fallen +out of favor, and each driver will perform a series of lowering passes +to take the HIR down to whatever restrictions it wants to impose +before doing codegen. + +Q: How is the IR structured? + +A: The best way to get started seeing it would be to run the +standalone compiler against a shader: + +./glsl_compiler --dump-lir \ + ~/src/piglit/tests/shaders/glsl-orangebook-ch06-bump.frag + +So for example one of the ir_instructions in main() contains: + +(assign (constant bool (1)) (var_ref litColor) (expression vec3 * (var_ref Surf +aceColor) (var_ref __retval) ) ) + +Or more visually: + (assign) + / | \ + (var_ref) (expression *) (constant bool 1) + / / \ +(litColor) (var_ref) (var_ref) + / \ + (SurfaceColor) (__retval) + +which came from: + +litColor = SurfaceColor * max(dot(normDelta, LightDir), 0.0); + +(the max call is not represented in this expression tree, as it was a +function call that got inlined but not brought into this expression +tree) + +Each of those nodes is a subclass of ir_instruction. A particular +ir_instruction instance may only appear once in the whole IR tree with +the exception of ir_variables, which appear once as variable +declarations: + +(declare () vec3 normDelta) + +and multiple times as the targets of variable dereferences: +... +(assign (constant bool (1)) (var_ref __retval) (expression float dot + (var_ref normDelta) (var_ref LightDir) ) ) +... +(assign (constant bool (1)) (var_ref __retval) (expression vec3 - + (var_ref LightDir) (expression vec3 * (constant float (2.000000)) + (expression vec3 * (expression float dot (var_ref normDelta) (var_ref + LightDir) ) (var_ref normDelta) ) ) ) ) +... + +Each node has a type. Expressions may involve several different types: +(declare (uniform ) mat4 gl_ModelViewMatrix) +((assign (constant bool (1)) (var_ref constructor_tmp) (expression + vec4 * (var_ref gl_ModelViewMatrix) (var_ref gl_Vertex) ) ) + +An expression tree can be arbitrarily deep, and the compiler tries to +keep them structured like that so that things like algebraic +optimizations ((color * 1.0 == color) and ((mat1 * mat2) * vec == mat1 +* (mat2 * vec))) or recognizing operation patterns for code generation +(vec1 * vec2 + vec3 == mad(vec1, vec2, vec3)) are easier. This comes +at the expense of additional trickery in implementing some +optimizations like CSE where one must navigate an expression tree. + +Q: Why no SSA representation? + +A: Converting an IR tree to SSA form makes dead code elimination, +common subexpression elimination, and many other optimizations much +easier. However, in our primarily vector-based language, there's some +major questions as to how it would work. Do we do SSA on the scalar +or vector level? If we do it at the vector level, we're going to end +up with many different versions of the variable when encountering code +like: + +(assign (constant bool (1)) (swiz x (var_ref __retval) ) (var_ref a) ) +(assign (constant bool (1)) (swiz y (var_ref __retval) ) (var_ref b) ) +(assign (constant bool (1)) (swiz z (var_ref __retval) ) (var_ref c) ) + +If every masked update of a component relies on the previous value of +the variable, then we're probably going to be quite limited in our +dead code elimination wins, and recognizing common expressions may +just not happen. On the other hand, if we operate channel-wise, then +we'll be prone to optimizing the operation on one of the channels at +the expense of making its instruction flow different from the other +channels, and a vector-based GPU would end up with worse code than if +we didn't optimize operations on that channel! + +Once again, it appears that our optimization requirements are driven +significantly by the target architecture. For now, targeting the Mesa +IR backend, SSA does not appear to be that important to producing +excellent code, but we do expect to do some SSA-based optimizations +for the 965 fragment shader backend when that is developed. + +Q: How should I expand instructions that take multiple backend instructions? + +Sometimes you'll have to do the expansion in your code generation -- +see, for example, ir_to_mesa.cpp's handling of ir_unop_sqrt. However, +in many cases you'll want to do a pass over the IR to convert +non-native instructions to a series of native instructions. For +example, for the Mesa backend we have ir_div_to_mul_rcp.cpp because +Mesa IR (and many hardware backends) only have a reciprocal +instruction, not a divide. Implementing non-native instructions this +way gives the chance for constant folding to occur, so (a / 2.0) +becomes (a * 0.5) after codegen instead of (a * (1.0 / 2.0)) + +Q: How shoud I handle my special hardware instructions with respect to IR? + +Our current theory is that if multiple targets have an instruction for +some operation, then we should probably be able to represent that in +the IR. Generally this is in the form of an ir_{bin,un}op expression +type. For example, we initially implemented fract() using (a - +floor(a)), but both 945 and 965 have instructions to give that result, +and it would also simplify the implementation of mod(), so +ir_unop_fract was added. The following areas need updating to add a +new expression type: + +ir.h (new enum) +ir.cpp:operator_strs (used for ir_reader) +ir_constant_expression.cpp (you probably want to be able to constant fold) +ir_validate.cpp (check users have the right types) + +You may also need to update the backends if they will see the new expr type: + +../mesa/program/ir_to_mesa.cpp + +You can then use the new expression from builtins (if all backends +would rather see it), or scan the IR and convert to use your new +expression type (see ir_mod_to_floor, for example). + +Q: How is memory management handled in the compiler? + +The hierarchical memory allocator "talloc" developed for the Samba +project is used, so that things like optimization passes don't have to +worry about their garbage collection so much. It has a few nice +features, including low performance overhead and good debugging +support that's trivially available. + +Generally, each stage of the compile creates a talloc context and +allocates its memory out of that or children of it. At the end of the +stage, the pieces still live are stolen to a new context and the old +one freed, or the whole context is kept for use by the next stage. + +For IR transformations, a temporary context is used, then at the end +of all transformations, reparent_ir reparents all live nodes under the +shader's IR list, and the old context full of dead nodes is freed. +When developing a single IR transformation pass, this means that you +want to allocate instruction nodes out of the temporary context, so if +it becomes dead it doesn't live on as the child of a live node. At +the moment, optimization passes aren't passed that temporary context, +so they find it by calling talloc_parent() on a nearby IR node. The +talloc_parent() call is expensive, so many passes will cache the +result of the first talloc_parent(). Cleaning up all the optimization +passes to take a context argument and not call talloc_parent() is left +as an exercise. + +Q: What is the file naming convention in this directory? + +Initially, there really wasn't one. We have since adopted one: + + - Files that implement code lowering passes should be named lower_* + (e.g., lower_noise.cpp). + - Files that implement optimization passes should be named opt_*. + - Files that implement a class that is used throught the code should + take the name of that class (e.g., ir_hierarchical_visitor.cpp). + - Files that contain code not fitting in one of the previous + categories should have a sensible name (e.g., glsl_parser.yy). diff --git a/src/compiler/glsl/SConscript b/src/compiler/glsl/SConscript new file mode 100644 index 00000000000..ef82a9d317a --- /dev/null +++ b/src/compiler/glsl/SConscript @@ -0,0 +1,122 @@ +import common + +Import('*') + +from sys import executable as python_cmd + +env = env.Clone() + +env.MSVC2013Compat() + +env.Prepend(CPPPATH = [ + '#include', + '#src', + '#src/mapi', + '#src/mesa', + '#src/gallium/include', + '#src/gallium/auxiliary', + '#src/glsl', + '#src/glsl/glcpp', +]) + +env.Prepend(LIBS = [mesautil]) + +# Make glcpp-parse.h and glsl_parser.h reachable from the include path. +env.Append(CPPPATH = [Dir('.').abspath, Dir('glcpp').abspath]) + +glcpp_env = env.Clone() +glcpp_env.Append(YACCFLAGS = [ + '-d', + '-p', 'glcpp_parser_' +]) + +glsl_env = env.Clone() +glsl_env.Append(YACCFLAGS = [ + '--defines=%s' % File('glsl_parser.h').abspath, + '-p', '_mesa_glsl_', +]) + +# without this line scons will expect "glsl_parser.hpp" instead of +# "glsl_parser.h", causing glsl_parser.cpp to be regenerated every time +glsl_env['YACCHXXFILESUFFIX'] = '.h' + +glcpp_lexer = glcpp_env.CFile('glcpp/glcpp-lex.c', 'glcpp/glcpp-lex.l') +glcpp_parser = glcpp_env.CFile('glcpp/glcpp-parse.c', 'glcpp/glcpp-parse.y') +glsl_lexer = glsl_env.CXXFile('glsl_lexer.cpp', 'glsl_lexer.ll') +glsl_parser = glsl_env.CXXFile('glsl_parser.cpp', 'glsl_parser.yy') + +# common generated sources +glsl_sources = [ + glcpp_lexer, + glcpp_parser[0], + glsl_lexer, + glsl_parser[0], +] + +# parse Makefile.sources +source_lists = env.ParseSourceList('Makefile.sources') + +# add non-generated sources +for l in ('LIBGLCPP_FILES', 'LIBGLSL_FILES'): + glsl_sources += source_lists[l] + +if env['msvc']: + env.Prepend(CPPPATH = ['#/src/getopt']) + env.PrependUnique(LIBS = [getopt]) + +# Copy these files to avoid generation object files into src/mesa/program +env.Prepend(CPPPATH = ['#src/mesa/main']) +env.Command('imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', '$SOURCE')) +# Copy these files to avoid generation object files into src/mesa/program +env.Prepend(CPPPATH = ['#src/mesa/program']) +env.Command('prog_hash_table.c', '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE')) +env.Command('symbol_table.c', '#src/mesa/program/symbol_table.c', Copy('$TARGET', '$SOURCE')) +env.Command('dummy_errors.c', '#src/mesa/program/dummy_errors.c', Copy('$TARGET', '$SOURCE')) + +compiler_objs = env.StaticObject(source_lists['GLSL_COMPILER_CXX_FILES']) + +mesa_objs = env.StaticObject([ + 'imports.c', + 'prog_hash_table.c', + 'symbol_table.c', + 'dummy_errors.c', +]) + +compiler_objs += mesa_objs + +glsl = env.ConvenienceLibrary( + target = 'glsl', + source = glsl_sources, +) + +# SCons builtin dependency scanner doesn't detect that glsl_lexer.ll depends on +# glsl_parser.h +env.Depends(glsl, glsl_parser) + +Export('glsl') + +# Skip building these programs as they will cause SCons error "Two environments +# with different actions were specified for the same target" +if env['crosscompile'] or env['embedded']: + Return() + +env = env.Clone() + +if env['platform'] == 'windows': + env.PrependUnique(LIBS = [ + 'user32', + ]) + +env.Prepend(LIBS = [compiler, glsl]) + +glsl_compiler = env.Program( + target = 'glsl_compiler', + source = compiler_objs, +) +env.Alias('glsl_compiler', glsl_compiler) + +glcpp = env.Program( + target = 'glcpp/glcpp', + source = ['glcpp/glcpp.c'] + mesa_objs, +) +env.Alias('glcpp', glcpp) diff --git a/src/compiler/glsl/TODO b/src/compiler/glsl/TODO new file mode 100644 index 00000000000..bd077a85678 --- /dev/null +++ b/src/compiler/glsl/TODO @@ -0,0 +1,12 @@ +- Detect code paths in non-void functions that don't reach a return statement + +- Improve handling of constants and their initializers. Constant initializers + should never generate any code. This is trival for scalar constants. It is + also trivial for arrays, matrices, and vectors that are accessed with + constant index values. For others it is more complicated. Perhaps these + cases should be silently converted to uniforms? + +- Track source locations throughout the IR. There are currently several + places where we cannot emit line numbers for errors (and currently emit 0:0) + because we've "lost" the line number information. This is particularly + noticeable at link time. diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h new file mode 100644 index 00000000000..03df6c08b2b --- /dev/null +++ b/src/compiler/glsl/ast.h @@ -0,0 +1,1204 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef AST_H +#define AST_H + +#include "list.h" +#include "glsl_parser_extras.h" + +struct _mesa_glsl_parse_state; + +struct YYLTYPE; + +/** + * \defgroup AST Abstract syntax tree node definitions + * + * An abstract syntax tree is generated by the parser. This is a fairly + * direct representation of the gramma derivation for the source program. + * No symantic checking is done during the generation of the AST. Only + * syntactic checking is done. Symantic checking is performed by a later + * stage that converts the AST to a more generic intermediate representation. + * + *@{ + */ +/** + * Base class of all abstract syntax tree nodes + */ +class ast_node { +public: + DECLARE_RALLOC_CXX_OPERATORS(ast_node); + + /** + * Print an AST node in something approximating the original GLSL code + */ + virtual void print(void) const; + + /** + * Convert the AST node to the high-level intermediate representation + */ + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual bool has_sequence_subexpression() const; + + /** + * Retrieve the source location of an AST node + * + * This function is primarily used to get the source position of an AST node + * into a form that can be passed to \c _mesa_glsl_error. + * + * \sa _mesa_glsl_error, ast_node::set_location + */ + struct YYLTYPE get_location(void) const + { + struct YYLTYPE locp; + + locp.source = this->location.source; + locp.first_line = this->location.first_line; + locp.first_column = this->location.first_column; + locp.last_line = this->location.last_line; + locp.last_column = this->location.last_column; + + return locp; + } + + /** + * Set the source location of an AST node from a parser location + * + * \sa ast_node::get_location + */ + void set_location(const struct YYLTYPE &locp) + { + this->location.source = locp.source; + this->location.first_line = locp.first_line; + this->location.first_column = locp.first_column; + this->location.last_line = locp.last_line; + this->location.last_column = locp.last_column; + } + + /** + * Set the source location range of an AST node using two location nodes + * + * \sa ast_node::set_location + */ + void set_location_range(const struct YYLTYPE &begin, const struct YYLTYPE &end) + { + this->location.source = begin.source; + this->location.first_line = begin.first_line; + this->location.last_line = end.last_line; + this->location.first_column = begin.first_column; + this->location.last_column = end.last_column; + } + + /** + * Source location of the AST node. + */ + struct { + unsigned source; /**< GLSL source number. */ + unsigned first_line; /**< First line number within the source string. */ + unsigned first_column; /**< First column in the first line. */ + unsigned last_line; /**< Last line number within the source string. */ + unsigned last_column; /**< Last column in the last line. */ + } location; + + exec_node link; + +protected: + /** + * The only constructor is protected so that only derived class objects can + * be created. + */ + ast_node(void); +}; + + +/** + * Operators for AST expression nodes. + */ +enum ast_operators { + ast_assign, + ast_plus, /**< Unary + operator. */ + ast_neg, + ast_add, + ast_sub, + ast_mul, + ast_div, + ast_mod, + ast_lshift, + ast_rshift, + ast_less, + ast_greater, + ast_lequal, + ast_gequal, + ast_equal, + ast_nequal, + ast_bit_and, + ast_bit_xor, + ast_bit_or, + ast_bit_not, + ast_logic_and, + ast_logic_xor, + ast_logic_or, + ast_logic_not, + + ast_mul_assign, + ast_div_assign, + ast_mod_assign, + ast_add_assign, + ast_sub_assign, + ast_ls_assign, + ast_rs_assign, + ast_and_assign, + ast_xor_assign, + ast_or_assign, + + ast_conditional, + + ast_pre_inc, + ast_pre_dec, + ast_post_inc, + ast_post_dec, + ast_field_selection, + ast_array_index, + ast_unsized_array_dim, + + ast_function_call, + + ast_identifier, + ast_int_constant, + ast_uint_constant, + ast_float_constant, + ast_bool_constant, + ast_double_constant, + + ast_sequence, + ast_aggregate +}; + +/** + * Representation of any sort of expression. + */ +class ast_expression : public ast_node { +public: + ast_expression(int oper, ast_expression *, + ast_expression *, ast_expression *); + + ast_expression(const char *identifier) : + oper(ast_identifier) + { + subexpressions[0] = NULL; + subexpressions[1] = NULL; + subexpressions[2] = NULL; + primary_expression.identifier = identifier; + this->non_lvalue_description = NULL; + } + + static const char *operator_string(enum ast_operators op); + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual void hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual bool has_sequence_subexpression() const; + + ir_rvalue *do_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + bool needs_rvalue); + + virtual void print(void) const; + + enum ast_operators oper; + + ast_expression *subexpressions[3]; + + union { + const char *identifier; + int int_constant; + float float_constant; + unsigned uint_constant; + int bool_constant; + double double_constant; + } primary_expression; + + + /** + * List of expressions for an \c ast_sequence or parameters for an + * \c ast_function_call + */ + exec_list expressions; + + /** + * For things that can't be l-values, this describes what it is. + * + * This text is used by the code that generates IR for assignments to + * detect and emit useful messages for assignments to some things that + * can't be l-values. For example, pre- or post-incerement expressions. + * + * \note + * This pointer may be \c NULL. + */ + const char *non_lvalue_description; +}; + +class ast_expression_bin : public ast_expression { +public: + ast_expression_bin(int oper, ast_expression *, ast_expression *); + + virtual void print(void) const; +}; + +/** + * Subclass of expressions for function calls + */ +class ast_function_expression : public ast_expression { +public: + ast_function_expression(ast_expression *callee) + : ast_expression(ast_function_call, callee, + NULL, NULL), + cons(false) + { + /* empty */ + } + + ast_function_expression(class ast_type_specifier *type) + : ast_expression(ast_function_call, (ast_expression *) type, + NULL, NULL), + cons(true) + { + /* empty */ + } + + bool is_constructor() const + { + return cons; + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual void hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual bool has_sequence_subexpression() const; + +private: + /** + * Is this function call actually a constructor? + */ + bool cons; + ir_rvalue * + handle_method(exec_list *instructions, + struct _mesa_glsl_parse_state *state); +}; + +class ast_subroutine_list : public ast_node +{ +public: + virtual void print(void) const; + exec_list declarations; +}; + +class ast_array_specifier : public ast_node { +public: + ast_array_specifier(const struct YYLTYPE &locp, ast_expression *dim) + { + set_location(locp); + array_dimensions.push_tail(&dim->link); + } + + void add_dimension(ast_expression *dim) + { + array_dimensions.push_tail(&dim->link); + } + + bool is_single_dimension() const + { + return this->array_dimensions.tail_pred->prev != NULL && + this->array_dimensions.tail_pred->prev->is_head_sentinel(); + } + + virtual void print(void) const; + + /* This list contains objects of type ast_node containing the + * array dimensions in outermost-to-innermost order. + */ + exec_list array_dimensions; +}; + +class ast_layout_expression : public ast_node { +public: + ast_layout_expression(const struct YYLTYPE &locp, ast_expression *expr) + { + set_location(locp); + layout_const_expressions.push_tail(&expr->link); + } + + bool process_qualifier_constant(struct _mesa_glsl_parse_state *state, + const char *qual_indentifier, + unsigned *value, bool can_be_zero); + + void merge_qualifier(ast_layout_expression *l_expr) + { + layout_const_expressions.append_list(&l_expr->layout_const_expressions); + } + + exec_list layout_const_expressions; +}; + +/** + * C-style aggregate initialization class + * + * Represents C-style initializers of vectors, matrices, arrays, and + * structures. E.g., vec3 pos = {1.0, 0.0, -1.0} is equivalent to + * vec3 pos = vec3(1.0, 0.0, -1.0). + * + * Specified in GLSL 4.20 and GL_ARB_shading_language_420pack. + * + * \sa _mesa_ast_set_aggregate_type + */ +class ast_aggregate_initializer : public ast_expression { +public: + ast_aggregate_initializer() + : ast_expression(ast_aggregate, NULL, NULL, NULL), + constructor_type(NULL) + { + /* empty */ + } + + /** + * glsl_type of the aggregate, which is inferred from the LHS of whatever + * the aggregate is being used to initialize. This can't be inferred at + * parse time (since the parser deals with ast_type_specifiers, not + * glsl_types), so the parser leaves it NULL. However, the ast-to-hir + * conversion code makes sure to fill it in with the appropriate type + * before hir() is called. + */ + const glsl_type *constructor_type; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + virtual void hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state); +}; + +/** + * Number of possible operators for an ast_expression + * + * This is done as a define instead of as an additional value in the enum so + * that the compiler won't generate spurious messages like "warning: + * enumeration value ‘ast_num_operators’ not handled in switch" + */ +#define AST_NUM_OPERATORS (ast_sequence + 1) + + +class ast_compound_statement : public ast_node { +public: + ast_compound_statement(int new_scope, ast_node *statements); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + int new_scope; + exec_list statements; +}; + +class ast_declaration : public ast_node { +public: + ast_declaration(const char *identifier, + ast_array_specifier *array_specifier, + ast_expression *initializer); + virtual void print(void) const; + + const char *identifier; + + ast_array_specifier *array_specifier; + + ast_expression *initializer; +}; + + +enum { + ast_precision_none = 0, /**< Absence of precision qualifier. */ + ast_precision_high, + ast_precision_medium, + ast_precision_low +}; + +struct ast_type_qualifier { + DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier); + + union { + struct { + unsigned invariant:1; + unsigned precise:1; + unsigned constant:1; + unsigned attribute:1; + unsigned varying:1; + unsigned in:1; + unsigned out:1; + unsigned centroid:1; + unsigned sample:1; + unsigned patch:1; + unsigned uniform:1; + unsigned buffer:1; + unsigned shared_storage:1; + unsigned smooth:1; + unsigned flat:1; + unsigned noperspective:1; + + /** \name Layout qualifiers for GL_ARB_fragment_coord_conventions */ + /*@{*/ + unsigned origin_upper_left:1; + unsigned pixel_center_integer:1; + /*@}*/ + + /** + * Flag set if GL_ARB_explicit_attrib_location "location" layout + * qualifier is used. + */ + unsigned explicit_location:1; + /** + * Flag set if GL_ARB_explicit_attrib_location "index" layout + * qualifier is used. + */ + unsigned explicit_index:1; + + /** + * Flag set if GL_ARB_shading_language_420pack "binding" layout + * qualifier is used. + */ + unsigned explicit_binding:1; + + /** + * Flag set if GL_ARB_shader_atomic counter "offset" layout + * qualifier is used. + */ + unsigned explicit_offset:1; + + /** \name Layout qualifiers for GL_AMD_conservative_depth */ + /** \{ */ + unsigned depth_any:1; + unsigned depth_greater:1; + unsigned depth_less:1; + unsigned depth_unchanged:1; + /** \} */ + + /** \name Layout qualifiers for GL_ARB_uniform_buffer_object */ + /** \{ */ + unsigned std140:1; + unsigned std430:1; + unsigned shared:1; + unsigned packed:1; + unsigned column_major:1; + unsigned row_major:1; + /** \} */ + + /** \name Layout qualifiers for GLSL 1.50 geometry shaders */ + /** \{ */ + unsigned prim_type:1; + unsigned max_vertices:1; + /** \} */ + + /** + * local_size_{x,y,z} flags for compute shaders. Bit 0 represents + * local_size_x, and so on. + */ + unsigned local_size:3; + + /** \name Layout and memory qualifiers for ARB_shader_image_load_store. */ + /** \{ */ + unsigned early_fragment_tests:1; + unsigned explicit_image_format:1; + unsigned coherent:1; + unsigned _volatile:1; + unsigned restrict_flag:1; + unsigned read_only:1; /**< "readonly" qualifier. */ + unsigned write_only:1; /**< "writeonly" qualifier. */ + /** \} */ + + /** \name Layout qualifiers for GL_ARB_gpu_shader5 */ + /** \{ */ + unsigned invocations:1; + unsigned stream:1; /**< Has stream value assigned */ + unsigned explicit_stream:1; /**< stream value assigned explicitly by shader code */ + /** \} */ + + /** \name Layout qualifiers for GL_ARB_tessellation_shader */ + /** \{ */ + /* tess eval input layout */ + /* gs prim_type reused for primitive mode */ + unsigned vertex_spacing:1; + unsigned ordering:1; + unsigned point_mode:1; + /* tess control output layout */ + unsigned vertices:1; + /** \} */ + + /** \name Qualifiers for GL_ARB_shader_subroutine */ + /** \{ */ + unsigned subroutine:1; /**< Is this marked 'subroutine' */ + unsigned subroutine_def:1; /**< Is this marked 'subroutine' with a list of types */ + /** \} */ + } + /** \brief Set of flags, accessed by name. */ + q; + + /** \brief Set of flags, accessed as a bitmask. */ + uint64_t i; + } flags; + + /** Precision of the type (highp/medium/lowp). */ + unsigned precision:2; + + /** Geometry shader invocations for GL_ARB_gpu_shader5. */ + ast_layout_expression *invocations; + + /** + * Location specified via GL_ARB_explicit_attrib_location layout + * + * \note + * This field is only valid if \c explicit_location is set. + */ + ast_expression *location; + /** + * Index specified via GL_ARB_explicit_attrib_location layout + * + * \note + * This field is only valid if \c explicit_index is set. + */ + ast_expression *index; + + /** Maximum output vertices in GLSL 1.50 geometry shaders. */ + ast_layout_expression *max_vertices; + + /** Stream in GLSL 1.50 geometry shaders. */ + ast_expression *stream; + + /** + * Input or output primitive type in GLSL 1.50 geometry shaders + * and tessellation shaders. + */ + GLenum prim_type; + + /** + * Binding specified via GL_ARB_shading_language_420pack's "binding" keyword. + * + * \note + * This field is only valid if \c explicit_binding is set. + */ + ast_expression *binding; + + /** + * Offset specified via GL_ARB_shader_atomic_counter's "offset" + * keyword. + * + * \note + * This field is only valid if \c explicit_offset is set. + */ + ast_expression *offset; + + /** + * Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}" + * layout qualifier. Element i of this array is only valid if + * flags.q.local_size & (1 << i) is set. + */ + ast_layout_expression *local_size[3]; + + /** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */ + GLenum vertex_spacing; + + /** Tessellation evaluation shader: vertex ordering (CW or CCW) */ + GLenum ordering; + + /** Tessellation evaluation shader: point mode */ + bool point_mode; + + /** Tessellation control shader: number of output vertices */ + ast_layout_expression *vertices; + + /** + * Image format specified with an ARB_shader_image_load_store + * layout qualifier. + * + * \note + * This field is only valid if \c explicit_image_format is set. + */ + GLenum image_format; + + /** + * Base type of the data read from or written to this image. Only + * the following enumerants are allowed: GLSL_TYPE_UINT, + * GLSL_TYPE_INT, GLSL_TYPE_FLOAT. + * + * \note + * This field is only valid if \c explicit_image_format is set. + */ + glsl_base_type image_base_type; + + /** Flag to know if this represents a default value for a qualifier */ + bool is_default_qualifier; + + /** + * Return true if and only if an interpolation qualifier is present. + */ + bool has_interpolation() const; + + /** + * Return whether a layout qualifier is present. + */ + bool has_layout() const; + + /** + * Return whether a storage qualifier is present. + */ + bool has_storage() const; + + /** + * Return whether an auxiliary storage qualifier is present. + */ + bool has_auxiliary_storage() const; + + /** + * \brief Return string representation of interpolation qualifier. + * + * If an interpolation qualifier is present, then return that qualifier's + * string representation. Otherwise, return null. For example, if the + * noperspective bit is set, then this returns "noperspective". + * + * If multiple interpolation qualifiers are somehow present, then the + * returned string is undefined but not null. + */ + const char *interpolation_string() const; + + bool merge_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + bool is_single_layout_merge); + + bool merge_out_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + ast_node* &node, bool create_node); + + bool merge_in_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + ast_node* &node, bool create_node); + + ast_subroutine_list *subroutine_list; +}; + +class ast_declarator_list; + +class ast_struct_specifier : public ast_node { +public: + ast_struct_specifier(const char *identifier, + ast_declarator_list *declarator_list); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + const char *name; + ast_type_qualifier *layout; + /* List of ast_declarator_list * */ + exec_list declarations; + bool is_declaration; +}; + + + +class ast_type_specifier : public ast_node { +public: + /** Construct a type specifier from a type name */ + ast_type_specifier(const char *name) + : type_name(name), structure(NULL), array_specifier(NULL), + default_precision(ast_precision_none) + { + /* empty */ + } + + /** Construct a type specifier from a structure definition */ + ast_type_specifier(ast_struct_specifier *s) + : type_name(s->name), structure(s), array_specifier(NULL), + default_precision(ast_precision_none) + { + /* empty */ + } + + const struct glsl_type *glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) + const; + + virtual void print(void) const; + + ir_rvalue *hir(exec_list *, struct _mesa_glsl_parse_state *); + + const char *type_name; + ast_struct_specifier *structure; + + ast_array_specifier *array_specifier; + + /** For precision statements, this is the given precision; otherwise none. */ + unsigned default_precision:2; +}; + + +class ast_fully_specified_type : public ast_node { +public: + virtual void print(void) const; + bool has_qualifiers(_mesa_glsl_parse_state *state) const; + + ast_fully_specified_type() : qualifier(), specifier(NULL) + { + } + + const struct glsl_type *glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) + const; + + ast_type_qualifier qualifier; + ast_type_specifier *specifier; +}; + + +class ast_declarator_list : public ast_node { +public: + ast_declarator_list(ast_fully_specified_type *); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_fully_specified_type *type; + /** List of 'ast_declaration *' */ + exec_list declarations; + + /** + * Flags for redeclarations. In these cases, no type is specified, to + * `type` is allowed to be NULL. In all other cases, this would be an error. + */ + int invariant; /** < `invariant` redeclaration */ + int precise; /** < `precise` redeclaration */ +}; + + +class ast_parameter_declarator : public ast_node { +public: + ast_parameter_declarator() : + type(NULL), + identifier(NULL), + array_specifier(NULL), + formal_parameter(false), + is_void(false) + { + /* empty */ + } + + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_fully_specified_type *type; + const char *identifier; + ast_array_specifier *array_specifier; + + static void parameters_to_hir(exec_list *ast_parameters, + bool formal, exec_list *ir_parameters, + struct _mesa_glsl_parse_state *state); + +private: + /** Is this parameter declaration part of a formal parameter list? */ + bool formal_parameter; + + /** + * Is this parameter 'void' type? + * + * This field is set by \c ::hir. + */ + bool is_void; +}; + + +class ast_function : public ast_node { +public: + ast_function(void); + + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_fully_specified_type *return_type; + const char *identifier; + + exec_list parameters; + +private: + /** + * Is this prototype part of the function definition? + * + * Used by ast_function_definition::hir to process the parameters, etc. + * of the function. + * + * \sa ::hir + */ + bool is_definition; + + /** + * Function signature corresponding to this function prototype instance + * + * Used by ast_function_definition::hir to process the parameters, etc. + * of the function. + * + * \sa ::hir + */ + class ir_function_signature *signature; + + friend class ast_function_definition; +}; + + +class ast_expression_statement : public ast_node { +public: + ast_expression_statement(ast_expression *); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_expression *expression; +}; + + +class ast_case_label : public ast_node { +public: + ast_case_label(ast_expression *test_value); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + /** + * An test value of NULL means 'default'. + */ + ast_expression *test_value; +}; + + +class ast_case_label_list : public ast_node { +public: + ast_case_label_list(void); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + /** + * A list of case labels. + */ + exec_list labels; +}; + + +class ast_case_statement : public ast_node { +public: + ast_case_statement(ast_case_label_list *labels); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_case_label_list *labels; + + /** + * A list of statements. + */ + exec_list stmts; +}; + + +class ast_case_statement_list : public ast_node { +public: + ast_case_statement_list(void); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + /** + * A list of cases. + */ + exec_list cases; +}; + + +class ast_switch_body : public ast_node { +public: + ast_switch_body(ast_case_statement_list *stmts); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_case_statement_list *stmts; +}; + + +class ast_selection_statement : public ast_node { +public: + ast_selection_statement(ast_expression *condition, + ast_node *then_statement, + ast_node *else_statement); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_expression *condition; + ast_node *then_statement; + ast_node *else_statement; +}; + + +class ast_switch_statement : public ast_node { +public: + ast_switch_statement(ast_expression *test_expression, + ast_node *body); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_expression *test_expression; + ast_node *body; + +protected: + void test_to_hir(exec_list *, struct _mesa_glsl_parse_state *); +}; + +class ast_iteration_statement : public ast_node { +public: + ast_iteration_statement(int mode, ast_node *init, ast_node *condition, + ast_expression *rest_expression, ast_node *body); + + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *, struct _mesa_glsl_parse_state *); + + enum ast_iteration_modes { + ast_for, + ast_while, + ast_do_while + } mode; + + + ast_node *init_statement; + ast_node *condition; + ast_expression *rest_expression; + + ast_node *body; + + /** + * Generate IR from the condition of a loop + * + * This is factored out of ::hir because some loops have the condition + * test at the top (for and while), and others have it at the end (do-while). + */ + void condition_to_hir(exec_list *, struct _mesa_glsl_parse_state *); +}; + + +class ast_jump_statement : public ast_node { +public: + ast_jump_statement(int mode, ast_expression *return_value); + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + enum ast_jump_modes { + ast_continue, + ast_break, + ast_return, + ast_discard + } mode; + + ast_expression *opt_return_value; +}; + + +class ast_function_definition : public ast_node { +public: + ast_function_definition() : prototype(NULL), body(NULL) + { + } + + virtual void print(void) const; + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_function *prototype; + ast_compound_statement *body; +}; + +class ast_interface_block : public ast_node { +public: + ast_interface_block(ast_type_qualifier layout, + const char *instance_name, + ast_array_specifier *array_specifier) + : layout(layout), block_name(NULL), instance_name(instance_name), + array_specifier(array_specifier) + { + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + + ast_type_qualifier layout; + const char *block_name; + + /** + * Declared name of the block instance, if specified. + * + * If the block does not have an instance name, this field will be + * \c NULL. + */ + const char *instance_name; + + /** List of ast_declarator_list * */ + exec_list declarations; + + /** + * Declared array size of the block instance + * + * If the block is not declared as an array or if the block instance array + * is unsized, this field will be \c NULL. + */ + ast_array_specifier *array_specifier; +}; + + +/** + * AST node representing a declaration of the output layout for tessellation + * control shaders. + */ +class ast_tcs_output_layout : public ast_node +{ +public: + ast_tcs_output_layout(const struct YYLTYPE &locp) + { + set_location(locp); + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); +}; + + +/** + * AST node representing a declaration of the input layout for geometry + * shaders. + */ +class ast_gs_input_layout : public ast_node +{ +public: + ast_gs_input_layout(const struct YYLTYPE &locp, GLenum prim_type) + : prim_type(prim_type) + { + set_location(locp); + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +private: + const GLenum prim_type; +}; + + +/** + * AST node representing a decalaration of the input layout for compute + * shaders. + */ +class ast_cs_input_layout : public ast_node +{ +public: + ast_cs_input_layout(const struct YYLTYPE &locp, + ast_layout_expression *const *local_size) + { + for (int i = 0; i < 3; i++) { + this->local_size[i] = local_size[i]; + } + set_location(locp); + } + + virtual ir_rvalue *hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +private: + ast_layout_expression *local_size[3]; +}; + +/*@}*/ + +extern void +_mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state); + +extern ir_rvalue * +_mesa_ast_field_selection_to_hir(const ast_expression *expr, + exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +extern ir_rvalue * +_mesa_ast_array_index_to_hir(void *mem_ctx, + struct _mesa_glsl_parse_state *state, + ir_rvalue *array, ir_rvalue *idx, + YYLTYPE &loc, YYLTYPE &idx_loc); + +extern void +_mesa_ast_set_aggregate_type(const glsl_type *type, + ast_expression *expr); + +void +emit_function(_mesa_glsl_parse_state *state, ir_function *f); + +extern void +check_builtin_array_max_size(const char *name, unsigned size, + YYLTYPE loc, struct _mesa_glsl_parse_state *state); + +extern void _mesa_ast_process_interface_block(YYLTYPE *locp, + _mesa_glsl_parse_state *state, + ast_interface_block *const block, + const struct ast_type_qualifier &q); + +#endif /* AST_H */ diff --git a/src/compiler/glsl/ast_array_index.cpp b/src/compiler/glsl/ast_array_index.cpp new file mode 100644 index 00000000000..f5baeb9ea32 --- /dev/null +++ b/src/compiler/glsl/ast_array_index.cpp @@ -0,0 +1,333 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ast.h" +#include "compiler/glsl_types.h" +#include "ir.h" + +void +ast_array_specifier::print(void) const +{ + foreach_list_typed (ast_node, array_dimension, link, &this->array_dimensions) { + printf("[ "); + if (((ast_expression*)array_dimension)->oper != ast_unsized_array_dim) + array_dimension->print(); + printf("] "); + } +} + +/** + * If \c ir is a reference to an array for which we are tracking the max array + * element accessed, track that the given element has been accessed. + * Otherwise do nothing. + * + * This function also checks whether the array is a built-in array whose + * maximum size is too small to accommodate the given index, and if so uses + * loc and state to report the error. + */ +static void +update_max_array_access(ir_rvalue *ir, int idx, YYLTYPE *loc, + struct _mesa_glsl_parse_state *state) +{ + if (ir_dereference_variable *deref_var = ir->as_dereference_variable()) { + ir_variable *var = deref_var->var; + if (idx > (int)var->data.max_array_access) { + var->data.max_array_access = idx; + + /* Check whether this access will, as a side effect, implicitly cause + * the size of a built-in array to be too large. + */ + check_builtin_array_max_size(var->name, idx+1, *loc, state); + } + } else if (ir_dereference_record *deref_record = + ir->as_dereference_record()) { + /* There are three possibilities we need to consider: + * + * - Accessing an element of an array that is a member of a named + * interface block (e.g. ifc.foo[i]) + * + * - Accessing an element of an array that is a member of a named + * interface block array (e.g. ifc[j].foo[i]). + * + * - Accessing an element of an array that is a member of a named + * interface block array of arrays (e.g. ifc[j][k].foo[i]). + */ + ir_dereference_variable *deref_var = + deref_record->record->as_dereference_variable(); + if (deref_var == NULL) { + ir_dereference_array *deref_array = + deref_record->record->as_dereference_array(); + ir_dereference_array *deref_array_prev = NULL; + while (deref_array != NULL) { + deref_array_prev = deref_array; + deref_array = deref_array->array->as_dereference_array(); + } + if (deref_array_prev != NULL) + deref_var = deref_array_prev->array->as_dereference_variable(); + } + + if (deref_var != NULL) { + if (deref_var->var->is_interface_instance()) { + unsigned field_index = + deref_record->record->type->field_index(deref_record->field); + assert(field_index < deref_var->var->get_interface_type()->length); + + unsigned *const max_ifc_array_access = + deref_var->var->get_max_ifc_array_access(); + + assert(max_ifc_array_access != NULL); + + if (idx > (int)max_ifc_array_access[field_index]) { + max_ifc_array_access[field_index] = idx; + + /* Check whether this access will, as a side effect, implicitly + * cause the size of a built-in array to be too large. + */ + check_builtin_array_max_size(deref_record->field, idx+1, *loc, + state); + } + } + } + } +} + + +static int +get_implicit_array_size(struct _mesa_glsl_parse_state *state, + ir_rvalue *array) +{ + ir_variable *var = array->variable_referenced(); + + /* Inputs in control shader are implicitly sized + * to the maximum patch size. + */ + if (state->stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_in) { + return state->Const.MaxPatchVertices; + } + + /* Non-patch inputs in evaluation shader are implicitly sized + * to the maximum patch size. + */ + if (state->stage == MESA_SHADER_TESS_EVAL && + var->data.mode == ir_var_shader_in && + !var->data.patch) { + return state->Const.MaxPatchVertices; + } + + return 0; +} + + +ir_rvalue * +_mesa_ast_array_index_to_hir(void *mem_ctx, + struct _mesa_glsl_parse_state *state, + ir_rvalue *array, ir_rvalue *idx, + YYLTYPE &loc, YYLTYPE &idx_loc) +{ + if (!array->type->is_error() + && !array->type->is_array() + && !array->type->is_matrix() + && !array->type->is_vector()) { + _mesa_glsl_error(& idx_loc, state, + "cannot dereference non-array / non-matrix / " + "non-vector"); + } + + if (!idx->type->is_error()) { + if (!idx->type->is_integer()) { + _mesa_glsl_error(& idx_loc, state, "array index must be integer type"); + } else if (!idx->type->is_scalar()) { + _mesa_glsl_error(& idx_loc, state, "array index must be scalar"); + } + } + + /* If the array index is a constant expression and the array has a + * declared size, ensure that the access is in-bounds. If the array + * index is not a constant expression, ensure that the array has a + * declared size. + */ + ir_constant *const const_index = idx->constant_expression_value(); + if (const_index != NULL && idx->type->is_integer()) { + const int idx = const_index->value.i[0]; + const char *type_name = "error"; + unsigned bound = 0; + + /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec: + * + * "It is illegal to declare an array with a size, and then + * later (in the same shader) index the same array with an + * integral constant expression greater than or equal to the + * declared size. It is also illegal to index an array with a + * negative constant expression." + */ + if (array->type->is_matrix()) { + if (array->type->row_type()->vector_elements <= idx) { + type_name = "matrix"; + bound = array->type->row_type()->vector_elements; + } + } else if (array->type->is_vector()) { + if (array->type->vector_elements <= idx) { + type_name = "vector"; + bound = array->type->vector_elements; + } + } else { + /* glsl_type::array_size() returns -1 for non-array types. This means + * that we don't need to verify that the type is an array before + * doing the bounds checking. + */ + if ((array->type->array_size() > 0) + && (array->type->array_size() <= idx)) { + type_name = "array"; + bound = array->type->array_size(); + } + } + + if (bound > 0) { + _mesa_glsl_error(& loc, state, "%s index must be < %u", + type_name, bound); + } else if (idx < 0) { + _mesa_glsl_error(& loc, state, "%s index must be >= 0", + type_name); + } + + if (array->type->is_array()) + update_max_array_access(array, idx, &loc, state); + } else if (const_index == NULL && array->type->is_array()) { + if (array->type->is_unsized_array()) { + int implicit_size = get_implicit_array_size(state, array); + if (implicit_size) { + ir_variable *v = array->whole_variable_referenced(); + if (v != NULL) + v->data.max_array_access = implicit_size - 1; + } + else if (state->stage == MESA_SHADER_TESS_CTRL && + array->variable_referenced()->data.mode == ir_var_shader_out && + !array->variable_referenced()->data.patch) { + /* Tessellation control shader output non-patch arrays are + * initially unsized. Despite that, they are allowed to be + * indexed with a non-constant expression (typically + * "gl_InvocationID"). The array size will be determined + * by the linker. + */ + } + else if (array->variable_referenced()->data.mode != + ir_var_shader_storage) { + _mesa_glsl_error(&loc, state, "unsized array index must be constant"); + } + } else if (array->type->without_array()->is_interface() + && (array->variable_referenced()->data.mode == ir_var_uniform || + array->variable_referenced()->data.mode == ir_var_shader_storage) + && !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { + /* Page 50 in section 4.3.9 of the OpenGL ES 3.10 spec says: + * + * "All indices used to index a uniform or shader storage block + * array must be constant integral expressions." + */ + _mesa_glsl_error(&loc, state, "%s block array index must be constant", + array->variable_referenced()->data.mode + == ir_var_uniform ? "uniform" : "shader storage"); + } else { + /* whole_variable_referenced can return NULL if the array is a + * member of a structure. In this case it is safe to not update + * the max_array_access field because it is never used for fields + * of structures. + */ + ir_variable *v = array->whole_variable_referenced(); + if (v != NULL) + v->data.max_array_access = array->type->array_size() - 1; + } + + /* From page 23 (29 of the PDF) of the GLSL 1.30 spec: + * + * "Samplers aggregated into arrays within a shader (using square + * brackets [ ]) can only be indexed with integral constant + * expressions [...]." + * + * This restriction was added in GLSL 1.30. Shaders using earlier + * version of the language should not be rejected by the compiler + * front-end for using this construct. This allows useful things such + * as using a loop counter as the index to an array of samplers. If the + * loop in unrolled, the code should compile correctly. Instead, emit a + * warning. + * + * In GLSL 4.00 / ARB_gpu_shader5, this requirement is relaxed again to allow + * indexing with dynamically uniform expressions. Note that these are not + * required to be uniforms or expressions based on them, but merely that the + * values must not diverge between shader invocations run together. If the + * values *do* diverge, then the behavior of the operation requiring a + * dynamically uniform expression is undefined. + */ + if (array->type->without_array()->is_sampler()) { + if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { + if (state->is_version(130, 300)) + _mesa_glsl_error(&loc, state, + "sampler arrays indexed with non-constant " + "expressions are forbidden in GLSL %s " + "and later", + state->es_shader ? "ES 3.00" : "1.30"); + else if (state->es_shader) + _mesa_glsl_warning(&loc, state, + "sampler arrays indexed with non-constant " + "expressions will be forbidden in GLSL " + "3.00 and later"); + else + _mesa_glsl_warning(&loc, state, + "sampler arrays indexed with non-constant " + "expressions will be forbidden in GLSL " + "1.30 and later"); + } + } + + /* From page 27 of the GLSL ES 3.1 specification: + * + * "When aggregated into arrays within a shader, images can only be + * indexed with a constant integral expression." + * + * On the other hand the desktop GL specification extension allows + * non-constant indexing of image arrays, but behavior is left undefined + * in cases where the indexing expression is not dynamically uniform. + */ + if (state->es_shader && array->type->without_array()->is_image()) { + _mesa_glsl_error(&loc, state, + "image arrays indexed with non-constant " + "expressions are forbidden in GLSL ES."); + } + } + + /* After performing all of the error checking, generate the IR for the + * expression. + */ + if (array->type->is_array() + || array->type->is_matrix() + || array->type->is_vector()) { + return new(mem_ctx) ir_dereference_array(array, idx); + } else if (array->type->is_error()) { + return array; + } else { + ir_rvalue *result = new(mem_ctx) ir_dereference_array(array, idx); + result->type = glsl_type::error_type; + + return result; + } +} diff --git a/src/compiler/glsl/ast_expr.cpp b/src/compiler/glsl/ast_expr.cpp new file mode 100644 index 00000000000..e624d11cf3b --- /dev/null +++ b/src/compiler/glsl/ast_expr.cpp @@ -0,0 +1,95 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "ast.h" + +const char * +ast_expression::operator_string(enum ast_operators op) +{ + static const char *const operators[] = { + "=", + "+", + "-", + "+", + "-", + "*", + "/", + "%", + "<<", + ">>", + "<", + ">", + "<=", + ">=", + "==", + "!=", + "&", + "^", + "|", + "~", + "&&", + "^^", + "||", + "!", + + "*=", + "/=", + "%=", + "+=", + "-=", + "<<=", + ">>=", + "&=", + "^=", + "|=", + + "?:", + + "++", + "--", + "++", + "--", + ".", + }; + + assert((unsigned int)op < sizeof(operators) / sizeof(operators[0])); + + return operators[op]; +} + + +ast_expression_bin::ast_expression_bin(int oper, ast_expression *ex0, + ast_expression *ex1) : + ast_expression(oper, ex0, ex1, NULL) +{ + assert((oper >= ast_plus) && (oper <= ast_logic_not)); +} + + +void +ast_expression_bin::print(void) const +{ + subexpressions[0]->print(); + printf("%s ", operator_string(oper)); + subexpressions[1]->print(); +} diff --git a/src/compiler/glsl/ast_function.cpp b/src/compiler/glsl/ast_function.cpp new file mode 100644 index 00000000000..0eb456a2b1f --- /dev/null +++ b/src/compiler/glsl/ast_function.cpp @@ -0,0 +1,2098 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "glsl_symbol_table.h" +#include "ast.h" +#include "compiler/glsl_types.h" +#include "ir.h" +#include "main/core.h" /* for MIN2 */ +#include "main/shaderobj.h" + +static ir_rvalue * +convert_component(ir_rvalue *src, const glsl_type *desired_type); + +bool +apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from, + struct _mesa_glsl_parse_state *state); + +static unsigned +process_parameters(exec_list *instructions, exec_list *actual_parameters, + exec_list *parameters, + struct _mesa_glsl_parse_state *state) +{ + unsigned count = 0; + + foreach_list_typed(ast_node, ast, link, parameters) { + ir_rvalue *result = ast->hir(instructions, state); + + ir_constant *const constant = result->constant_expression_value(); + if (constant != NULL) + result = constant; + + actual_parameters->push_tail(result); + count++; + } + + return count; +} + + +/** + * Generate a source prototype for a function signature + * + * \param return_type Return type of the function. May be \c NULL. + * \param name Name of the function. + * \param parameters List of \c ir_instruction nodes representing the + * parameter list for the function. This may be either a + * formal (\c ir_variable) or actual (\c ir_rvalue) + * parameter list. Only the type is used. + * + * \return + * A ralloced string representing the prototype of the function. + */ +char * +prototype_string(const glsl_type *return_type, const char *name, + exec_list *parameters) +{ + char *str = NULL; + + if (return_type != NULL) + str = ralloc_asprintf(NULL, "%s ", return_type->name); + + ralloc_asprintf_append(&str, "%s(", name); + + const char *comma = ""; + foreach_in_list(const ir_variable, param, parameters) { + ralloc_asprintf_append(&str, "%s%s", comma, param->type->name); + comma = ", "; + } + + ralloc_strcat(&str, ")"); + return str; +} + +static bool +verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, + const ir_variable *formal, const ir_variable *actual) +{ + /** + * From the ARB_shader_image_load_store specification: + * + * "The values of image variables qualified with coherent, + * volatile, restrict, readonly, or writeonly may not be passed + * to functions whose formal parameters lack such + * qualifiers. [...] It is legal to have additional qualifiers + * on a formal parameter, but not to have fewer." + */ + if (actual->data.image_coherent && !formal->data.image_coherent) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`coherent' qualifier", formal->name); + return false; + } + + if (actual->data.image_volatile && !formal->data.image_volatile) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`volatile' qualifier", formal->name); + return false; + } + + if (actual->data.image_restrict && !formal->data.image_restrict) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`restrict' qualifier", formal->name); + return false; + } + + if (actual->data.image_read_only && !formal->data.image_read_only) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`readonly' qualifier", formal->name); + return false; + } + + if (actual->data.image_write_only && !formal->data.image_write_only) { + _mesa_glsl_error(loc, state, + "function call parameter `%s' drops " + "`writeonly' qualifier", formal->name); + return false; + } + + return true; +} + +static bool +verify_first_atomic_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, + ir_variable *var) +{ + if (!var || + (!var->is_in_shader_storage_block() && + var->data.mode != ir_var_shader_shared)) { + _mesa_glsl_error(loc, state, "First argument to atomic function " + "must be a buffer or shared variable"); + return false; + } + return true; +} + +static bool +is_atomic_function(const char *func_name) +{ + return !strcmp(func_name, "atomicAdd") || + !strcmp(func_name, "atomicMin") || + !strcmp(func_name, "atomicMax") || + !strcmp(func_name, "atomicAnd") || + !strcmp(func_name, "atomicOr") || + !strcmp(func_name, "atomicXor") || + !strcmp(func_name, "atomicExchange") || + !strcmp(func_name, "atomicCompSwap"); +} + +/** + * Verify that 'out' and 'inout' actual parameters are lvalues. Also, verify + * that 'const_in' formal parameters (an extension in our IR) correspond to + * ir_constant actual parameters. + */ +static bool +verify_parameter_modes(_mesa_glsl_parse_state *state, + ir_function_signature *sig, + exec_list &actual_ir_parameters, + exec_list &actual_ast_parameters) +{ + exec_node *actual_ir_node = actual_ir_parameters.head; + exec_node *actual_ast_node = actual_ast_parameters.head; + + foreach_in_list(const ir_variable, formal, &sig->parameters) { + /* The lists must be the same length. */ + assert(!actual_ir_node->is_tail_sentinel()); + assert(!actual_ast_node->is_tail_sentinel()); + + const ir_rvalue *const actual = (ir_rvalue *) actual_ir_node; + const ast_expression *const actual_ast = + exec_node_data(ast_expression, actual_ast_node, link); + + /* FIXME: 'loc' is incorrect (as of 2011-01-21). It is always + * FIXME: 0:0(0). + */ + YYLTYPE loc = actual_ast->get_location(); + + /* Verify that 'const_in' parameters are ir_constants. */ + if (formal->data.mode == ir_var_const_in && + actual->ir_type != ir_type_constant) { + _mesa_glsl_error(&loc, state, + "parameter `in %s' must be a constant expression", + formal->name); + return false; + } + + /* Verify that shader_in parameters are shader inputs */ + if (formal->data.must_be_shader_input) { + ir_variable *var = actual->variable_referenced(); + if (var && var->data.mode != ir_var_shader_in) { + _mesa_glsl_error(&loc, state, + "parameter `%s` must be a shader input", + formal->name); + return false; + } + + if (actual->ir_type == ir_type_swizzle) { + _mesa_glsl_error(&loc, state, + "parameter `%s` must not be swizzled", + formal->name); + return false; + } + } + + /* Verify that 'out' and 'inout' actual parameters are lvalues. */ + if (formal->data.mode == ir_var_function_out + || formal->data.mode == ir_var_function_inout) { + const char *mode = NULL; + switch (formal->data.mode) { + case ir_var_function_out: mode = "out"; break; + case ir_var_function_inout: mode = "inout"; break; + default: assert(false); break; + } + + /* This AST-based check catches errors like f(i++). The IR-based + * is_lvalue() is insufficient because the actual parameter at the + * IR-level is just a temporary value, which is an l-value. + */ + if (actual_ast->non_lvalue_description != NULL) { + _mesa_glsl_error(&loc, state, + "function parameter '%s %s' references a %s", + mode, formal->name, + actual_ast->non_lvalue_description); + return false; + } + + ir_variable *var = actual->variable_referenced(); + if (var) + var->data.assigned = true; + + if (var && var->data.read_only) { + _mesa_glsl_error(&loc, state, + "function parameter '%s %s' references the " + "read-only variable '%s'", + mode, formal->name, + actual->variable_referenced()->name); + return false; + } else if (!actual->is_lvalue()) { + _mesa_glsl_error(&loc, state, + "function parameter '%s %s' is not an lvalue", + mode, formal->name); + return false; + } + } + + if (formal->type->is_image() && + actual->variable_referenced()) { + if (!verify_image_parameter(&loc, state, formal, + actual->variable_referenced())) + return false; + } + + actual_ir_node = actual_ir_node->next; + actual_ast_node = actual_ast_node->next; + } + + /* The first parameter of atomic functions must be a buffer variable */ + const char *func_name = sig->function_name(); + bool is_atomic = is_atomic_function(func_name); + if (is_atomic) { + const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head; + + const ast_expression *const actual_ast = + exec_node_data(ast_expression, actual_ast_parameters.head, link); + YYLTYPE loc = actual_ast->get_location(); + + if (!verify_first_atomic_parameter(&loc, state, + actual->variable_referenced())) { + return false; + } + } + + return true; +} + +static void +fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type, + exec_list *before_instructions, exec_list *after_instructions, + bool parameter_is_inout) +{ + ir_expression *const expr = actual->as_expression(); + + /* If the types match exactly and the parameter is not a vector-extract, + * nothing needs to be done to fix the parameter. + */ + if (formal_type == actual->type + && (expr == NULL || expr->operation != ir_binop_vector_extract)) + return; + + /* To convert an out parameter, we need to create a temporary variable to + * hold the value before conversion, and then perform the conversion after + * the function call returns. + * + * This has the effect of transforming code like this: + * + * void f(out int x); + * float value; + * f(value); + * + * Into IR that's equivalent to this: + * + * void f(out int x); + * float value; + * int out_parameter_conversion; + * f(out_parameter_conversion); + * value = float(out_parameter_conversion); + * + * If the parameter is an ir_expression of ir_binop_vector_extract, + * additional conversion is needed in the post-call re-write. + */ + ir_variable *tmp = + new(mem_ctx) ir_variable(formal_type, "inout_tmp", ir_var_temporary); + + before_instructions->push_tail(tmp); + + /* If the parameter is an inout parameter, copy the value of the actual + * parameter to the new temporary. Note that no type conversion is allowed + * here because inout parameters must match types exactly. + */ + if (parameter_is_inout) { + /* Inout parameters should never require conversion, since that would + * require an implicit conversion to exist both to and from the formal + * parameter type, and there are no bidirectional implicit conversions. + */ + assert (actual->type == formal_type); + + ir_dereference_variable *const deref_tmp_1 = + new(mem_ctx) ir_dereference_variable(tmp); + ir_assignment *const assignment = + new(mem_ctx) ir_assignment(deref_tmp_1, actual); + before_instructions->push_tail(assignment); + } + + /* Replace the parameter in the call with a dereference of the new + * temporary. + */ + ir_dereference_variable *const deref_tmp_2 = + new(mem_ctx) ir_dereference_variable(tmp); + actual->replace_with(deref_tmp_2); + + + /* Copy the temporary variable to the actual parameter with optional + * type conversion applied. + */ + ir_rvalue *rhs = new(mem_ctx) ir_dereference_variable(tmp); + if (actual->type != formal_type) + rhs = convert_component(rhs, actual->type); + + ir_rvalue *lhs = actual; + if (expr != NULL && expr->operation == ir_binop_vector_extract) { + lhs = new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL), + expr->operands[1]->clone(mem_ctx, NULL)); + } + + ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs); + after_instructions->push_tail(assignment_2); +} + +/** + * Generate a function call. + * + * For non-void functions, this returns a dereference of the temporary variable + * which stores the return value for the call. For void functions, this returns + * NULL. + */ +static ir_rvalue * +generate_call(exec_list *instructions, ir_function_signature *sig, + exec_list *actual_parameters, + ir_variable *sub_var, + ir_rvalue *array_idx, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + exec_list post_call_conversions; + + /* Perform implicit conversion of arguments. For out parameters, we need + * to place them in a temporary variable and do the conversion after the + * call takes place. Since we haven't emitted the call yet, we'll place + * the post-call conversions in a temporary exec_list, and emit them later. + */ + foreach_two_lists(formal_node, &sig->parameters, + actual_node, actual_parameters) { + ir_rvalue *actual = (ir_rvalue *) actual_node; + ir_variable *formal = (ir_variable *) formal_node; + + if (formal->type->is_numeric() || formal->type->is_boolean()) { + switch (formal->data.mode) { + case ir_var_const_in: + case ir_var_function_in: { + ir_rvalue *converted + = convert_component(actual, formal->type); + actual->replace_with(converted); + break; + } + case ir_var_function_out: + case ir_var_function_inout: + fix_parameter(ctx, actual, formal->type, + instructions, &post_call_conversions, + formal->data.mode == ir_var_function_inout); + break; + default: + assert (!"Illegal formal parameter mode"); + break; + } + } + } + + /* Section 4.3.2 (Const) of the GLSL 1.10.59 spec says: + * + * "Initializers for const declarations must be formed from literal + * values, other const variables (not including function call + * paramaters), or expressions of these. + * + * Constructors may be used in such expressions, but function calls may + * not." + * + * Section 4.3.3 (Constant Expressions) of the GLSL 1.20.8 spec says: + * + * "A constant expression is one of + * + * ... + * + * - a built-in function call whose arguments are all constant + * expressions, with the exception of the texture lookup + * functions, the noise functions, and ftransform. The built-in + * functions dFdx, dFdy, and fwidth must return 0 when evaluated + * inside an initializer with an argument that is a constant + * expression." + * + * Section 5.10 (Constant Expressions) of the GLSL ES 1.00.17 spec says: + * + * "A constant expression is one of + * + * ... + * + * - a built-in function call whose arguments are all constant + * expressions, with the exception of the texture lookup + * functions." + * + * Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec says: + * + * "A constant expression is one of + * + * ... + * + * - a built-in function call whose arguments are all constant + * expressions, with the exception of the texture lookup + * functions. The built-in functions dFdx, dFdy, and fwidth must + * return 0 when evaluated inside an initializer with an argument + * that is a constant expression." + * + * If the function call is a constant expression, don't generate any + * instructions; just generate an ir_constant. + */ + if (state->is_version(120, 100)) { + ir_constant *value = sig->constant_expression_value(actual_parameters, NULL); + if (value != NULL) { + return value; + } + } + + ir_dereference_variable *deref = NULL; + if (!sig->return_type->is_void()) { + /* Create a new temporary to hold the return value. */ + char *const name = ir_variable::temporaries_allocate_names + ? ralloc_asprintf(ctx, "%s_retval", sig->function_name()) + : NULL; + + ir_variable *var; + + var = new(ctx) ir_variable(sig->return_type, name, ir_var_temporary); + instructions->push_tail(var); + + ralloc_free(name); + + deref = new(ctx) ir_dereference_variable(var); + } + + ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters, sub_var, array_idx); + instructions->push_tail(call); + + /* Also emit any necessary out-parameter conversions. */ + instructions->append_list(&post_call_conversions); + + return deref ? deref->clone(ctx, NULL) : NULL; +} + +/** + * Given a function name and parameter list, find the matching signature. + */ +static ir_function_signature * +match_function_by_name(const char *name, + exec_list *actual_parameters, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + ir_function *f = state->symbols->get_function(name); + ir_function_signature *local_sig = NULL; + ir_function_signature *sig = NULL; + + /* Is the function hidden by a record type constructor? */ + if (state->symbols->get_type(name)) + goto done; /* no match */ + + /* Is the function hidden by a variable (impossible in 1.10)? */ + if (!state->symbols->separate_function_namespace + && state->symbols->get_variable(name)) + goto done; /* no match */ + + if (f != NULL) { + /* In desktop GL, the presence of a user-defined signature hides any + * built-in signatures, so we must ignore them. In contrast, in ES2 + * user-defined signatures add new overloads, so we must consider them. + */ + bool allow_builtins = state->es_shader || !f->has_user_signature(); + + /* Look for a match in the local shader. If exact, we're done. */ + bool is_exact = false; + sig = local_sig = f->matching_signature(state, actual_parameters, + allow_builtins, &is_exact); + if (is_exact) + goto done; + + if (!allow_builtins) + goto done; + } + + /* Local shader has no exact candidates; check the built-ins. */ + _mesa_glsl_initialize_builtin_functions(); + sig = _mesa_glsl_find_builtin_function(state, name, actual_parameters); + +done: + if (sig != NULL) { + /* If the match is from a linked built-in shader, import the prototype. */ + if (sig != local_sig) { + if (f == NULL) { + f = new(ctx) ir_function(name); + state->symbols->add_global_function(f); + emit_function(state, f); + } + f->add_signature(sig->clone_prototype(f, NULL)); + } + } + return sig; +} + +static ir_function_signature * +match_subroutine_by_name(const char *name, + exec_list *actual_parameters, + struct _mesa_glsl_parse_state *state, + ir_variable **var_r) +{ + void *ctx = state; + ir_function_signature *sig = NULL; + ir_function *f, *found = NULL; + const char *new_name; + ir_variable *var; + bool is_exact = false; + + new_name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), name); + var = state->symbols->get_variable(new_name); + if (!var) + return NULL; + + for (int i = 0; i < state->num_subroutine_types; i++) { + f = state->subroutine_types[i]; + if (strcmp(f->name, var->type->without_array()->name)) + continue; + found = f; + break; + } + + if (!found) + return NULL; + *var_r = var; + sig = found->matching_signature(state, actual_parameters, + false, &is_exact); + return sig; +} + +static ir_rvalue * +generate_array_index(void *mem_ctx, exec_list *instructions, + struct _mesa_glsl_parse_state *state, YYLTYPE loc, + const ast_expression *array, ast_expression *idx, + const char **function_name, exec_list *actual_parameters) +{ + if (array->oper == ast_array_index) { + /* This handles arrays of arrays */ + ir_rvalue *outer_array = generate_array_index(mem_ctx, instructions, + state, loc, + array->subexpressions[0], + array->subexpressions[1], + function_name, actual_parameters); + ir_rvalue *outer_array_idx = idx->hir(instructions, state); + + YYLTYPE index_loc = idx->get_location(); + return _mesa_ast_array_index_to_hir(mem_ctx, state, outer_array, + outer_array_idx, loc, + index_loc); + } else { + ir_variable *sub_var = NULL; + *function_name = array->primary_expression.identifier; + + match_subroutine_by_name(*function_name, actual_parameters, + state, &sub_var); + + ir_rvalue *outer_array_idx = idx->hir(instructions, state); + return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx); + } +} + +static void +print_function_prototypes(_mesa_glsl_parse_state *state, YYLTYPE *loc, + ir_function *f) +{ + if (f == NULL) + return; + + foreach_in_list(ir_function_signature, sig, &f->signatures) { + if (sig->is_builtin() && !sig->is_builtin_available(state)) + continue; + + char *str = prototype_string(sig->return_type, f->name, &sig->parameters); + _mesa_glsl_error(loc, state, " %s", str); + ralloc_free(str); + } +} + +/** + * Raise a "no matching function" error, listing all possible overloads the + * compiler considered so developers can figure out what went wrong. + */ +static void +no_matching_function_error(const char *name, + YYLTYPE *loc, + exec_list *actual_parameters, + _mesa_glsl_parse_state *state) +{ + gl_shader *sh = _mesa_glsl_get_builtin_function_shader(); + + if (state->symbols->get_function(name) == NULL + && (!state->uses_builtin_functions + || sh->symbols->get_function(name) == NULL)) { + _mesa_glsl_error(loc, state, "no function with name '%s'", name); + } else { + char *str = prototype_string(NULL, name, actual_parameters); + _mesa_glsl_error(loc, state, + "no matching function for call to `%s'; candidates are:", + str); + ralloc_free(str); + + print_function_prototypes(state, loc, state->symbols->get_function(name)); + + if (state->uses_builtin_functions) { + print_function_prototypes(state, loc, sh->symbols->get_function(name)); + } + } +} + +/** + * Perform automatic type conversion of constructor parameters + * + * This implements the rules in the "Conversion and Scalar Constructors" + * section (GLSL 1.10 section 5.4.1), not the "Implicit Conversions" rules. + */ +static ir_rvalue * +convert_component(ir_rvalue *src, const glsl_type *desired_type) +{ + void *ctx = ralloc_parent(src); + const unsigned a = desired_type->base_type; + const unsigned b = src->type->base_type; + ir_expression *result = NULL; + + if (src->type->is_error()) + return src; + + assert(a <= GLSL_TYPE_BOOL); + assert(b <= GLSL_TYPE_BOOL); + + if (a == b) + return src; + + switch (a) { + case GLSL_TYPE_UINT: + switch (b) { + case GLSL_TYPE_INT: + result = new(ctx) ir_expression(ir_unop_i2u, src); + break; + case GLSL_TYPE_FLOAT: + result = new(ctx) ir_expression(ir_unop_f2u, src); + break; + case GLSL_TYPE_BOOL: + result = new(ctx) ir_expression(ir_unop_i2u, + new(ctx) ir_expression(ir_unop_b2i, src)); + break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2u, src); + break; + } + break; + case GLSL_TYPE_INT: + switch (b) { + case GLSL_TYPE_UINT: + result = new(ctx) ir_expression(ir_unop_u2i, src); + break; + case GLSL_TYPE_FLOAT: + result = new(ctx) ir_expression(ir_unop_f2i, src); + break; + case GLSL_TYPE_BOOL: + result = new(ctx) ir_expression(ir_unop_b2i, src); + break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2i, src); + break; + } + break; + case GLSL_TYPE_FLOAT: + switch (b) { + case GLSL_TYPE_UINT: + result = new(ctx) ir_expression(ir_unop_u2f, desired_type, src, NULL); + break; + case GLSL_TYPE_INT: + result = new(ctx) ir_expression(ir_unop_i2f, desired_type, src, NULL); + break; + case GLSL_TYPE_BOOL: + result = new(ctx) ir_expression(ir_unop_b2f, desired_type, src, NULL); + break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2f, desired_type, src, NULL); + break; + } + break; + case GLSL_TYPE_BOOL: + switch (b) { + case GLSL_TYPE_UINT: + result = new(ctx) ir_expression(ir_unop_i2b, + new(ctx) ir_expression(ir_unop_u2i, src)); + break; + case GLSL_TYPE_INT: + result = new(ctx) ir_expression(ir_unop_i2b, desired_type, src, NULL); + break; + case GLSL_TYPE_FLOAT: + result = new(ctx) ir_expression(ir_unop_f2b, desired_type, src, NULL); + break; + case GLSL_TYPE_DOUBLE: + result = new(ctx) ir_expression(ir_unop_d2b, desired_type, src, NULL); + break; + } + break; + case GLSL_TYPE_DOUBLE: + switch (b) { + case GLSL_TYPE_INT: + result = new(ctx) ir_expression(ir_unop_i2d, src); + break; + case GLSL_TYPE_UINT: + result = new(ctx) ir_expression(ir_unop_u2d, src); + break; + case GLSL_TYPE_BOOL: + result = new(ctx) ir_expression(ir_unop_f2d, + new(ctx) ir_expression(ir_unop_b2f, src)); + break; + case GLSL_TYPE_FLOAT: + result = new(ctx) ir_expression(ir_unop_f2d, desired_type, src, NULL); + break; + } + } + + assert(result != NULL); + assert(result->type == desired_type); + + /* Try constant folding; it may fold in the conversion we just added. */ + ir_constant *const constant = result->constant_expression_value(); + return (constant != NULL) ? (ir_rvalue *) constant : (ir_rvalue *) result; +} + +/** + * Dereference a specific component from a scalar, vector, or matrix + */ +static ir_rvalue * +dereference_component(ir_rvalue *src, unsigned component) +{ + void *ctx = ralloc_parent(src); + assert(component < src->type->components()); + + /* If the source is a constant, just create a new constant instead of a + * dereference of the existing constant. + */ + ir_constant *constant = src->as_constant(); + if (constant) + return new(ctx) ir_constant(constant, component); + + if (src->type->is_scalar()) { + return src; + } else if (src->type->is_vector()) { + return new(ctx) ir_swizzle(src, component, 0, 0, 0, 1); + } else { + assert(src->type->is_matrix()); + + /* Dereference a row of the matrix, then call this function again to get + * a specific element from that row. + */ + const int c = component / src->type->column_type()->vector_elements; + const int r = component % src->type->column_type()->vector_elements; + ir_constant *const col_index = new(ctx) ir_constant(c); + ir_dereference *const col = new(ctx) ir_dereference_array(src, col_index); + + col->type = src->type->column_type(); + + return dereference_component(col, r); + } + + assert(!"Should not get here."); + return NULL; +} + + +static ir_rvalue * +process_vec_mat_constructor(exec_list *instructions, + const glsl_type *constructor_type, + YYLTYPE *loc, exec_list *parameters, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + /* The ARB_shading_language_420pack spec says: + * + * "If an initializer is a list of initializers enclosed in curly braces, + * the variable being declared must be a vector, a matrix, an array, or a + * structure. + * + * int i = { 1 }; // illegal, i is not an aggregate" + */ + if (constructor_type->vector_elements <= 1) { + _mesa_glsl_error(loc, state, "aggregates can only initialize vectors, " + "matrices, arrays, and structs"); + return ir_rvalue::error_value(ctx); + } + + exec_list actual_parameters; + const unsigned parameter_count = + process_parameters(instructions, &actual_parameters, parameters, state); + + if (parameter_count == 0 + || (constructor_type->is_vector() && + constructor_type->vector_elements != parameter_count) + || (constructor_type->is_matrix() && + constructor_type->matrix_columns != parameter_count)) { + _mesa_glsl_error(loc, state, "%s constructor must have %u parameters", + constructor_type->is_vector() ? "vector" : "matrix", + constructor_type->vector_elements); + return ir_rvalue::error_value(ctx); + } + + bool all_parameters_are_constant = true; + + /* Type cast each parameter and, if possible, fold constants. */ + foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { + ir_rvalue *result = ir; + + /* Apply implicit conversions (not the scalar constructor rules!). See + * the spec quote above. */ + if (constructor_type->base_type != result->type->base_type) { + const glsl_type *desired_type = + glsl_type::get_instance(constructor_type->base_type, + ir->type->vector_elements, + ir->type->matrix_columns); + if (result->type->can_implicitly_convert_to(desired_type, state)) { + /* Even though convert_component() implements the constructor + * conversion rules (not the implicit conversion rules), its safe + * to use it here because we already checked that the implicit + * conversion is legal. + */ + result = convert_component(ir, desired_type); + } + } + + if (constructor_type->is_matrix()) { + if (result->type != constructor_type->column_type()) { + _mesa_glsl_error(loc, state, "type error in matrix constructor: " + "expected: %s, found %s", + constructor_type->column_type()->name, + result->type->name); + return ir_rvalue::error_value(ctx); + } + } else if (result->type != constructor_type->get_scalar_type()) { + _mesa_glsl_error(loc, state, "type error in vector constructor: " + "expected: %s, found %s", + constructor_type->get_scalar_type()->name, + result->type->name); + return ir_rvalue::error_value(ctx); + } + + /* Attempt to convert the parameter to a constant valued expression. + * After doing so, track whether or not all the parameters to the + * constructor are trivially constant valued expressions. + */ + ir_rvalue *const constant = result->constant_expression_value(); + + if (constant != NULL) + result = constant; + else + all_parameters_are_constant = false; + + ir->replace_with(result); + } + + if (all_parameters_are_constant) + return new(ctx) ir_constant(constructor_type, &actual_parameters); + + ir_variable *var = new(ctx) ir_variable(constructor_type, "vec_mat_ctor", + ir_var_temporary); + instructions->push_tail(var); + + int i = 0; + + foreach_in_list(ir_rvalue, rhs, &actual_parameters) { + ir_instruction *assignment = NULL; + + if (var->type->is_matrix()) { + ir_rvalue *lhs = new(ctx) ir_dereference_array(var, + new(ctx) ir_constant(i)); + assignment = new(ctx) ir_assignment(lhs, rhs, NULL); + } else { + /* use writemask rather than index for vector */ + assert(var->type->is_vector()); + assert(i < 4); + ir_dereference *lhs = new(ctx) ir_dereference_variable(var); + assignment = new(ctx) ir_assignment(lhs, rhs, NULL, (unsigned)(1 << i)); + } + + instructions->push_tail(assignment); + + i++; + } + + return new(ctx) ir_dereference_variable(var); +} + + +static ir_rvalue * +process_array_constructor(exec_list *instructions, + const glsl_type *constructor_type, + YYLTYPE *loc, exec_list *parameters, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + /* Array constructors come in two forms: sized and unsized. Sized array + * constructors look like 'vec4[2](a, b)', where 'a' and 'b' are vec4 + * variables. In this case the number of parameters must exactly match the + * specified size of the array. + * + * Unsized array constructors look like 'vec4[](a, b)', where 'a' and 'b' + * are vec4 variables. In this case the size of the array being constructed + * is determined by the number of parameters. + * + * From page 52 (page 58 of the PDF) of the GLSL 1.50 spec: + * + * "There must be exactly the same number of arguments as the size of + * the array being constructed. If no size is present in the + * constructor, then the array is explicitly sized to the number of + * arguments provided. The arguments are assigned in order, starting at + * element 0, to the elements of the constructed array. Each argument + * must be the same type as the element type of the array, or be a type + * that can be converted to the element type of the array according to + * Section 4.1.10 "Implicit Conversions."" + */ + exec_list actual_parameters; + const unsigned parameter_count = + process_parameters(instructions, &actual_parameters, parameters, state); + bool is_unsized_array = constructor_type->is_unsized_array(); + + if ((parameter_count == 0) || + (!is_unsized_array && (constructor_type->length != parameter_count))) { + const unsigned min_param = is_unsized_array + ? 1 : constructor_type->length; + + _mesa_glsl_error(loc, state, "array constructor must have %s %u " + "parameter%s", + is_unsized_array ? "at least" : "exactly", + min_param, (min_param <= 1) ? "" : "s"); + return ir_rvalue::error_value(ctx); + } + + if (is_unsized_array) { + constructor_type = + glsl_type::get_array_instance(constructor_type->fields.array, + parameter_count); + assert(constructor_type != NULL); + assert(constructor_type->length == parameter_count); + } + + bool all_parameters_are_constant = true; + const glsl_type *element_type = constructor_type->fields.array; + + /* Type cast each parameter and, if possible, fold constants. */ + foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { + ir_rvalue *result = ir; + + const glsl_base_type element_base_type = + constructor_type->fields.array->base_type; + + /* Apply implicit conversions (not the scalar constructor rules!). See + * the spec quote above. */ + if (element_base_type != result->type->base_type) { + const glsl_type *desired_type = + glsl_type::get_instance(element_base_type, + ir->type->vector_elements, + ir->type->matrix_columns); + + if (result->type->can_implicitly_convert_to(desired_type, state)) { + /* Even though convert_component() implements the constructor + * conversion rules (not the implicit conversion rules), its safe + * to use it here because we already checked that the implicit + * conversion is legal. + */ + result = convert_component(ir, desired_type); + } + } + + if (constructor_type->fields.array->is_unsized_array()) { + /* As the inner parameters of the constructor are created without + * knowledge of each other we need to check to make sure unsized + * parameters of unsized constructors all end up with the same size. + * + * e.g we make sure to fail for a constructor like this: + * vec4[][] a = vec4[][](vec4[](vec4(0.0), vec4(1.0)), + * vec4[](vec4(0.0), vec4(1.0), vec4(1.0)), + * vec4[](vec4(0.0), vec4(1.0))); + */ + if (element_type->is_unsized_array()) { + /* This is the first parameter so just get the type */ + element_type = result->type; + } else if (element_type != result->type) { + _mesa_glsl_error(loc, state, "type error in array constructor: " + "expected: %s, found %s", + element_type->name, + result->type->name); + return ir_rvalue::error_value(ctx); + } + } else if (result->type != constructor_type->fields.array) { + _mesa_glsl_error(loc, state, "type error in array constructor: " + "expected: %s, found %s", + constructor_type->fields.array->name, + result->type->name); + return ir_rvalue::error_value(ctx); + } else { + element_type = result->type; + } + + /* Attempt to convert the parameter to a constant valued expression. + * After doing so, track whether or not all the parameters to the + * constructor are trivially constant valued expressions. + */ + ir_rvalue *const constant = result->constant_expression_value(); + + if (constant != NULL) + result = constant; + else + all_parameters_are_constant = false; + + ir->replace_with(result); + } + + if (constructor_type->fields.array->is_unsized_array()) { + constructor_type = + glsl_type::get_array_instance(element_type, + parameter_count); + assert(constructor_type != NULL); + assert(constructor_type->length == parameter_count); + } + + if (all_parameters_are_constant) + return new(ctx) ir_constant(constructor_type, &actual_parameters); + + ir_variable *var = new(ctx) ir_variable(constructor_type, "array_ctor", + ir_var_temporary); + instructions->push_tail(var); + + int i = 0; + foreach_in_list(ir_rvalue, rhs, &actual_parameters) { + ir_rvalue *lhs = new(ctx) ir_dereference_array(var, + new(ctx) ir_constant(i)); + + ir_instruction *assignment = new(ctx) ir_assignment(lhs, rhs, NULL); + instructions->push_tail(assignment); + + i++; + } + + return new(ctx) ir_dereference_variable(var); +} + + +/** + * Try to convert a record constructor to a constant expression + */ +static ir_constant * +constant_record_constructor(const glsl_type *constructor_type, + exec_list *parameters, void *mem_ctx) +{ + foreach_in_list(ir_instruction, node, parameters) { + ir_constant *constant = node->as_constant(); + if (constant == NULL) + return NULL; + node->replace_with(constant); + } + + return new(mem_ctx) ir_constant(constructor_type, parameters); +} + + +/** + * Determine if a list consists of a single scalar r-value + */ +bool +single_scalar_parameter(exec_list *parameters) +{ + const ir_rvalue *const p = (ir_rvalue *) parameters->head; + assert(((ir_rvalue *)p)->as_rvalue() != NULL); + + return (p->type->is_scalar() && p->next->is_tail_sentinel()); +} + + +/** + * Generate inline code for a vector constructor + * + * The generated constructor code will consist of a temporary variable + * declaration of the same type as the constructor. A sequence of assignments + * from constructor parameters to the temporary will follow. + * + * \return + * An \c ir_dereference_variable of the temprorary generated in the constructor + * body. + */ +ir_rvalue * +emit_inline_vector_constructor(const glsl_type *type, + exec_list *instructions, + exec_list *parameters, + void *ctx) +{ + assert(!parameters->is_empty()); + + ir_variable *var = new(ctx) ir_variable(type, "vec_ctor", ir_var_temporary); + instructions->push_tail(var); + + /* There are three kinds of vector constructors. + * + * - Construct a vector from a single scalar by replicating that scalar to + * all components of the vector. + * + * - Construct a vector from at least a matrix. This case should already + * have been taken care of in ast_function_expression::hir by breaking + * down the matrix into a series of column vectors. + * + * - Construct a vector from an arbirary combination of vectors and + * scalars. The components of the constructor parameters are assigned + * to the vector in order until the vector is full. + */ + const unsigned lhs_components = type->components(); + if (single_scalar_parameter(parameters)) { + ir_rvalue *first_param = (ir_rvalue *)parameters->head; + ir_rvalue *rhs = new(ctx) ir_swizzle(first_param, 0, 0, 0, 0, + lhs_components); + ir_dereference_variable *lhs = new(ctx) ir_dereference_variable(var); + const unsigned mask = (1U << lhs_components) - 1; + + assert(rhs->type == lhs->type); + + ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL, mask); + instructions->push_tail(inst); + } else { + unsigned base_component = 0; + unsigned base_lhs_component = 0; + ir_constant_data data; + unsigned constant_mask = 0, constant_components = 0; + + memset(&data, 0, sizeof(data)); + + foreach_in_list(ir_rvalue, param, parameters) { + unsigned rhs_components = param->type->components(); + + /* Do not try to assign more components to the vector than it has! + */ + if ((rhs_components + base_lhs_component) > lhs_components) { + rhs_components = lhs_components - base_lhs_component; + } + + const ir_constant *const c = param->as_constant(); + if (c != NULL) { + for (unsigned i = 0; i < rhs_components; i++) { + switch (c->type->base_type) { + case GLSL_TYPE_UINT: + data.u[i + base_component] = c->get_uint_component(i); + break; + case GLSL_TYPE_INT: + data.i[i + base_component] = c->get_int_component(i); + break; + case GLSL_TYPE_FLOAT: + data.f[i + base_component] = c->get_float_component(i); + break; + case GLSL_TYPE_DOUBLE: + data.d[i + base_component] = c->get_double_component(i); + break; + case GLSL_TYPE_BOOL: + data.b[i + base_component] = c->get_bool_component(i); + break; + default: + assert(!"Should not get here."); + break; + } + } + + /* Mask of fields to be written in the assignment. + */ + constant_mask |= ((1U << rhs_components) - 1) << base_lhs_component; + constant_components += rhs_components; + + base_component += rhs_components; + } + /* Advance the component index by the number of components + * that were just assigned. + */ + base_lhs_component += rhs_components; + } + + if (constant_mask != 0) { + ir_dereference *lhs = new(ctx) ir_dereference_variable(var); + const glsl_type *rhs_type = glsl_type::get_instance(var->type->base_type, + constant_components, + 1); + ir_rvalue *rhs = new(ctx) ir_constant(rhs_type, &data); + + ir_instruction *inst = + new(ctx) ir_assignment(lhs, rhs, NULL, constant_mask); + instructions->push_tail(inst); + } + + base_component = 0; + foreach_in_list(ir_rvalue, param, parameters) { + unsigned rhs_components = param->type->components(); + + /* Do not try to assign more components to the vector than it has! + */ + if ((rhs_components + base_component) > lhs_components) { + rhs_components = lhs_components - base_component; + } + + /* If we do not have any components left to copy, break out of the + * loop. This can happen when initializing a vec4 with a mat3 as the + * mat3 would have been broken into a series of column vectors. + */ + if (rhs_components == 0) { + break; + } + + const ir_constant *const c = param->as_constant(); + if (c == NULL) { + /* Mask of fields to be written in the assignment. + */ + const unsigned write_mask = ((1U << rhs_components) - 1) + << base_component; + + ir_dereference *lhs = new(ctx) ir_dereference_variable(var); + + /* Generate a swizzle so that LHS and RHS sizes match. + */ + ir_rvalue *rhs = + new(ctx) ir_swizzle(param, 0, 1, 2, 3, rhs_components); + + ir_instruction *inst = + new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); + instructions->push_tail(inst); + } + + /* Advance the component index by the number of components that were + * just assigned. + */ + base_component += rhs_components; + } + } + return new(ctx) ir_dereference_variable(var); +} + + +/** + * Generate assignment of a portion of a vector to a portion of a matrix column + * + * \param src_base First component of the source to be used in assignment + * \param column Column of destination to be assiged + * \param row_base First component of the destination column to be assigned + * \param count Number of components to be assigned + * + * \note + * \c src_base + \c count must be less than or equal to the number of components + * in the source vector. + */ +ir_instruction * +assign_to_matrix_column(ir_variable *var, unsigned column, unsigned row_base, + ir_rvalue *src, unsigned src_base, unsigned count, + void *mem_ctx) +{ + ir_constant *col_idx = new(mem_ctx) ir_constant(column); + ir_dereference *column_ref = new(mem_ctx) ir_dereference_array(var, col_idx); + + assert(column_ref->type->components() >= (row_base + count)); + assert(src->type->components() >= (src_base + count)); + + /* Generate a swizzle that extracts the number of components from the source + * that are to be assigned to the column of the matrix. + */ + if (count < src->type->vector_elements) { + src = new(mem_ctx) ir_swizzle(src, + src_base + 0, src_base + 1, + src_base + 2, src_base + 3, + count); + } + + /* Mask of fields to be written in the assignment. + */ + const unsigned write_mask = ((1U << count) - 1) << row_base; + + return new(mem_ctx) ir_assignment(column_ref, src, NULL, write_mask); +} + + +/** + * Generate inline code for a matrix constructor + * + * The generated constructor code will consist of a temporary variable + * declaration of the same type as the constructor. A sequence of assignments + * from constructor parameters to the temporary will follow. + * + * \return + * An \c ir_dereference_variable of the temprorary generated in the constructor + * body. + */ +ir_rvalue * +emit_inline_matrix_constructor(const glsl_type *type, + exec_list *instructions, + exec_list *parameters, + void *ctx) +{ + assert(!parameters->is_empty()); + + ir_variable *var = new(ctx) ir_variable(type, "mat_ctor", ir_var_temporary); + instructions->push_tail(var); + + /* There are three kinds of matrix constructors. + * + * - Construct a matrix from a single scalar by replicating that scalar to + * along the diagonal of the matrix and setting all other components to + * zero. + * + * - Construct a matrix from an arbirary combination of vectors and + * scalars. The components of the constructor parameters are assigned + * to the matrix in column-major order until the matrix is full. + * + * - Construct a matrix from a single matrix. The source matrix is copied + * to the upper left portion of the constructed matrix, and the remaining + * elements take values from the identity matrix. + */ + ir_rvalue *const first_param = (ir_rvalue *) parameters->head; + if (single_scalar_parameter(parameters)) { + /* Assign the scalar to the X component of a vec4, and fill the remaining + * components with zero. + */ + glsl_base_type param_base_type = first_param->type->base_type; + assert(param_base_type == GLSL_TYPE_FLOAT || + param_base_type == GLSL_TYPE_DOUBLE); + ir_variable *rhs_var = + new(ctx) ir_variable(glsl_type::get_instance(param_base_type, 4, 1), + "mat_ctor_vec", + ir_var_temporary); + instructions->push_tail(rhs_var); + + ir_constant_data zero; + for (unsigned i = 0; i < 4; i++) + if (param_base_type == GLSL_TYPE_FLOAT) + zero.f[i] = 0.0; + else + zero.d[i] = 0.0; + + ir_instruction *inst = + new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var), + new(ctx) ir_constant(rhs_var->type, &zero), + NULL); + instructions->push_tail(inst); + + ir_dereference *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); + + inst = new(ctx) ir_assignment(rhs_ref, first_param, NULL, 0x01); + instructions->push_tail(inst); + + /* Assign the temporary vector to each column of the destination matrix + * with a swizzle that puts the X component on the diagonal of the + * matrix. In some cases this may mean that the X component does not + * get assigned into the column at all (i.e., when the matrix has more + * columns than rows). + */ + static const unsigned rhs_swiz[4][4] = { + { 0, 1, 1, 1 }, + { 1, 0, 1, 1 }, + { 1, 1, 0, 1 }, + { 1, 1, 1, 0 } + }; + + const unsigned cols_to_init = MIN2(type->matrix_columns, + type->vector_elements); + for (unsigned i = 0; i < cols_to_init; i++) { + ir_constant *const col_idx = new(ctx) ir_constant(i); + ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); + + ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i], + type->vector_elements); + + inst = new(ctx) ir_assignment(col_ref, rhs, NULL); + instructions->push_tail(inst); + } + + for (unsigned i = cols_to_init; i < type->matrix_columns; i++) { + ir_constant *const col_idx = new(ctx) ir_constant(i); + ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); + + ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1, + type->vector_elements); + + inst = new(ctx) ir_assignment(col_ref, rhs, NULL); + instructions->push_tail(inst); + } + } else if (first_param->type->is_matrix()) { + /* From page 50 (56 of the PDF) of the GLSL 1.50 spec: + * + * "If a matrix is constructed from a matrix, then each component + * (column i, row j) in the result that has a corresponding + * component (column i, row j) in the argument will be initialized + * from there. All other components will be initialized to the + * identity matrix. If a matrix argument is given to a matrix + * constructor, it is an error to have any other arguments." + */ + assert(first_param->next->is_tail_sentinel()); + ir_rvalue *const src_matrix = first_param; + + /* If the source matrix is smaller, pre-initialize the relavent parts of + * the destination matrix to the identity matrix. + */ + if ((src_matrix->type->matrix_columns < var->type->matrix_columns) + || (src_matrix->type->vector_elements < var->type->vector_elements)) { + + /* If the source matrix has fewer rows, every column of the destination + * must be initialized. Otherwise only the columns in the destination + * that do not exist in the source must be initialized. + */ + unsigned col = + (src_matrix->type->vector_elements < var->type->vector_elements) + ? 0 : src_matrix->type->matrix_columns; + + const glsl_type *const col_type = var->type->column_type(); + for (/* empty */; col < var->type->matrix_columns; col++) { + ir_constant_data ident; + + ident.f[0] = 0.0; + ident.f[1] = 0.0; + ident.f[2] = 0.0; + ident.f[3] = 0.0; + + ident.f[col] = 1.0; + + ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident); + + ir_rvalue *const lhs = + new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col)); + + ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL); + instructions->push_tail(inst); + } + } + + /* Assign columns from the source matrix to the destination matrix. + * + * Since the parameter will be used in the RHS of multiple assignments, + * generate a temporary and copy the paramter there. + */ + ir_variable *const rhs_var = + new(ctx) ir_variable(first_param->type, "mat_ctor_mat", + ir_var_temporary); + instructions->push_tail(rhs_var); + + ir_dereference *const rhs_var_ref = + new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *const inst = + new(ctx) ir_assignment(rhs_var_ref, first_param, NULL); + instructions->push_tail(inst); + + const unsigned last_row = MIN2(src_matrix->type->vector_elements, + var->type->vector_elements); + const unsigned last_col = MIN2(src_matrix->type->matrix_columns, + var->type->matrix_columns); + + unsigned swiz[4] = { 0, 0, 0, 0 }; + for (unsigned i = 1; i < last_row; i++) + swiz[i] = i; + + const unsigned write_mask = (1U << last_row) - 1; + + for (unsigned i = 0; i < last_col; i++) { + ir_dereference *const lhs = + new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i)); + ir_rvalue *const rhs_col = + new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i)); + + /* If one matrix has columns that are smaller than the columns of the + * other matrix, wrap the column access of the larger with a swizzle + * so that the LHS and RHS of the assignment have the same size (and + * therefore have the same type). + * + * It would be perfectly valid to unconditionally generate the + * swizzles, this this will typically result in a more compact IR tree. + */ + ir_rvalue *rhs; + if (lhs->type->vector_elements != rhs_col->type->vector_elements) { + rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row); + } else { + rhs = rhs_col; + } + + ir_instruction *inst = + new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); + instructions->push_tail(inst); + } + } else { + const unsigned cols = type->matrix_columns; + const unsigned rows = type->vector_elements; + unsigned remaining_slots = rows * cols; + unsigned col_idx = 0; + unsigned row_idx = 0; + + foreach_in_list(ir_rvalue, rhs, parameters) { + unsigned rhs_components = rhs->type->components(); + unsigned rhs_base = 0; + + if (remaining_slots == 0) + break; + + /* Since the parameter might be used in the RHS of two assignments, + * generate a temporary and copy the paramter there. + */ + ir_variable *rhs_var = + new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary); + instructions->push_tail(rhs_var); + + ir_dereference *rhs_var_ref = + new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL); + instructions->push_tail(inst); + + do { + /* Assign the current parameter to as many components of the matrix + * as it will fill. + * + * NOTE: A single vector parameter can span two matrix columns. A + * single vec4, for example, can completely fill a mat2. + */ + unsigned count = MIN2(rows - row_idx, + rhs_components - rhs_base); + + rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); + ir_instruction *inst = assign_to_matrix_column(var, col_idx, + row_idx, + rhs_var_ref, + rhs_base, + count, ctx); + instructions->push_tail(inst); + rhs_base += count; + row_idx += count; + remaining_slots -= count; + + /* Sometimes, there is still data left in the parameters and + * components left to be set in the destination but in other + * column. + */ + if (row_idx >= rows) { + row_idx = 0; + col_idx++; + } + } while(remaining_slots > 0 && rhs_base < rhs_components); + } + } + + return new(ctx) ir_dereference_variable(var); +} + + +ir_rvalue * +emit_inline_record_constructor(const glsl_type *type, + exec_list *instructions, + exec_list *parameters, + void *mem_ctx) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(type, "record_ctor", ir_var_temporary); + ir_dereference_variable *const d = new(mem_ctx) ir_dereference_variable(var); + + instructions->push_tail(var); + + exec_node *node = parameters->head; + for (unsigned i = 0; i < type->length; i++) { + assert(!node->is_tail_sentinel()); + + ir_dereference *const lhs = + new(mem_ctx) ir_dereference_record(d->clone(mem_ctx, NULL), + type->fields.structure[i].name); + + ir_rvalue *const rhs = ((ir_instruction *) node)->as_rvalue(); + assert(rhs != NULL); + + ir_instruction *const assign = new(mem_ctx) ir_assignment(lhs, rhs, NULL); + + instructions->push_tail(assign); + node = node->next; + } + + return d; +} + + +static ir_rvalue * +process_record_constructor(exec_list *instructions, + const glsl_type *constructor_type, + YYLTYPE *loc, exec_list *parameters, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + exec_list actual_parameters; + + process_parameters(instructions, &actual_parameters, + parameters, state); + + exec_node *node = actual_parameters.head; + for (unsigned i = 0; i < constructor_type->length; i++) { + ir_rvalue *ir = (ir_rvalue *) node; + + if (node->is_tail_sentinel()) { + _mesa_glsl_error(loc, state, + "insufficient parameters to constructor for `%s'", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + if (apply_implicit_conversion(constructor_type->fields.structure[i].type, + ir, state)) { + node->replace_with(ir); + } else { + _mesa_glsl_error(loc, state, + "parameter type mismatch in constructor for `%s.%s' " + "(%s vs %s)", + constructor_type->name, + constructor_type->fields.structure[i].name, + ir->type->name, + constructor_type->fields.structure[i].type->name); + return ir_rvalue::error_value(ctx);; + } + + node = node->next; + } + + if (!node->is_tail_sentinel()) { + _mesa_glsl_error(loc, state, "too many parameters in constructor " + "for `%s'", constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + ir_rvalue *const constant = + constant_record_constructor(constructor_type, &actual_parameters, + state); + + return (constant != NULL) + ? constant + : emit_inline_record_constructor(constructor_type, instructions, + &actual_parameters, state); +} + +ir_rvalue * +ast_function_expression::handle_method(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + const ast_expression *field = subexpressions[0]; + ir_rvalue *op; + ir_rvalue *result; + void *ctx = state; + /* Handle "method calls" in GLSL 1.20 - namely, array.length() */ + YYLTYPE loc = get_location(); + state->check_version(120, 300, &loc, "methods not supported"); + + const char *method; + method = field->primary_expression.identifier; + + op = field->subexpressions[0]->hir(instructions, state); + if (strcmp(method, "length") == 0) { + if (!this->expressions.is_empty()) { + _mesa_glsl_error(&loc, state, "length method takes no arguments"); + goto fail; + } + + if (op->type->is_array()) { + if (op->type->is_unsized_array()) { + if (!state->has_shader_storage_buffer_objects()) { + _mesa_glsl_error(&loc, state, "length called on unsized array" + " only available with " + "ARB_shader_storage_buffer_object"); + } + /* Calculate length of an unsized array in run-time */ + result = new(ctx) ir_expression(ir_unop_ssbo_unsized_array_length, op); + } else { + result = new(ctx) ir_constant(op->type->array_size()); + } + } else if (op->type->is_vector()) { + if (state->has_420pack()) { + /* .length() returns int. */ + result = new(ctx) ir_constant((int) op->type->vector_elements); + } else { + _mesa_glsl_error(&loc, state, "length method on matrix only available" + "with ARB_shading_language_420pack"); + goto fail; + } + } else if (op->type->is_matrix()) { + if (state->has_420pack()) { + /* .length() returns int. */ + result = new(ctx) ir_constant((int) op->type->matrix_columns); + } else { + _mesa_glsl_error(&loc, state, "length method on matrix only available" + "with ARB_shading_language_420pack"); + goto fail; + } + } else { + _mesa_glsl_error(&loc, state, "length called on scalar."); + goto fail; + } + } else { + _mesa_glsl_error(&loc, state, "unknown method: `%s'", method); + goto fail; + } + return result; +fail: + return ir_rvalue::error_value(ctx); +} + +ir_rvalue * +ast_function_expression::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + /* There are three sorts of function calls. + * + * 1. constructors - The first subexpression is an ast_type_specifier. + * 2. methods - Only the .length() method of array types. + * 3. functions - Calls to regular old functions. + * + */ + if (is_constructor()) { + const ast_type_specifier *type = (ast_type_specifier *) subexpressions[0]; + YYLTYPE loc = type->get_location(); + const char *name; + + const glsl_type *const constructor_type = type->glsl_type(& name, state); + + /* constructor_type can be NULL if a variable with the same name as the + * structure has come into scope. + */ + if (constructor_type == NULL) { + _mesa_glsl_error(& loc, state, "unknown type `%s' (structure name " + "may be shadowed by a variable with the same name)", + type->type_name); + return ir_rvalue::error_value(ctx); + } + + + /* Constructors for opaque types are illegal. + */ + if (constructor_type->contains_opaque()) { + _mesa_glsl_error(& loc, state, "cannot construct opaque type `%s'", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + if (constructor_type->is_array()) { + if (!state->check_version(120, 300, &loc, + "array constructors forbidden")) { + return ir_rvalue::error_value(ctx); + } + + return process_array_constructor(instructions, constructor_type, + & loc, &this->expressions, state); + } + + + /* There are two kinds of constructor calls. Constructors for arrays and + * structures must have the exact number of arguments with matching types + * in the correct order. These constructors follow essentially the same + * type matching rules as functions. + * + * Constructors for built-in language types, such as mat4 and vec2, are + * free form. The only requirements are that the parameters must provide + * enough values of the correct scalar type and that no arguments are + * given past the last used argument. + * + * When using the C-style initializer syntax from GLSL 4.20, constructors + * must have the exact number of arguments with matching types in the + * correct order. + */ + if (constructor_type->is_record()) { + return process_record_constructor(instructions, constructor_type, + &loc, &this->expressions, + state); + } + + if (!constructor_type->is_numeric() && !constructor_type->is_boolean()) + return ir_rvalue::error_value(ctx); + + /* Total number of components of the type being constructed. */ + const unsigned type_components = constructor_type->components(); + + /* Number of components from parameters that have actually been + * consumed. This is used to perform several kinds of error checking. + */ + unsigned components_used = 0; + + unsigned matrix_parameters = 0; + unsigned nonmatrix_parameters = 0; + exec_list actual_parameters; + + foreach_list_typed(ast_node, ast, link, &this->expressions) { + ir_rvalue *result = ast->hir(instructions, state); + + /* From page 50 (page 56 of the PDF) of the GLSL 1.50 spec: + * + * "It is an error to provide extra arguments beyond this + * last used argument." + */ + if (components_used >= type_components) { + _mesa_glsl_error(& loc, state, "too many parameters to `%s' " + "constructor", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + if (!result->type->is_numeric() && !result->type->is_boolean()) { + _mesa_glsl_error(& loc, state, "cannot construct `%s' from a " + "non-numeric data type", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + /* Count the number of matrix and nonmatrix parameters. This + * is used below to enforce some of the constructor rules. + */ + if (result->type->is_matrix()) + matrix_parameters++; + else + nonmatrix_parameters++; + + actual_parameters.push_tail(result); + components_used += result->type->components(); + } + + /* From page 28 (page 34 of the PDF) of the GLSL 1.10 spec: + * + * "It is an error to construct matrices from other matrices. This + * is reserved for future use." + */ + if (matrix_parameters > 0 + && constructor_type->is_matrix() + && !state->check_version(120, 100, &loc, + "cannot construct `%s' from a matrix", + constructor_type->name)) { + return ir_rvalue::error_value(ctx); + } + + /* From page 50 (page 56 of the PDF) of the GLSL 1.50 spec: + * + * "If a matrix argument is given to a matrix constructor, it is + * an error to have any other arguments." + */ + if ((matrix_parameters > 0) + && ((matrix_parameters + nonmatrix_parameters) > 1) + && constructor_type->is_matrix()) { + _mesa_glsl_error(& loc, state, "for matrix `%s' constructor, " + "matrix must be only parameter", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + /* From page 28 (page 34 of the PDF) of the GLSL 1.10 spec: + * + * "In these cases, there must be enough components provided in the + * arguments to provide an initializer for every component in the + * constructed value." + */ + if (components_used < type_components && components_used != 1 + && matrix_parameters == 0) { + _mesa_glsl_error(& loc, state, "too few components to construct " + "`%s'", + constructor_type->name); + return ir_rvalue::error_value(ctx); + } + + /* Matrices can never be consumed as is by any constructor but matrix + * constructors. If the constructor type is not matrix, always break the + * matrix up into a series of column vectors. + */ + if (!constructor_type->is_matrix()) { + foreach_in_list_safe(ir_rvalue, matrix, &actual_parameters) { + if (!matrix->type->is_matrix()) + continue; + + /* Create a temporary containing the matrix. */ + ir_variable *var = new(ctx) ir_variable(matrix->type, "matrix_tmp", + ir_var_temporary); + instructions->push_tail(var); + instructions->push_tail(new(ctx) ir_assignment(new(ctx) + ir_dereference_variable(var), matrix, NULL)); + var->constant_value = matrix->constant_expression_value(); + + /* Replace the matrix with dereferences of its columns. */ + for (int i = 0; i < matrix->type->matrix_columns; i++) { + matrix->insert_before(new (ctx) ir_dereference_array(var, + new(ctx) ir_constant(i))); + } + matrix->remove(); + } + } + + bool all_parameters_are_constant = true; + + /* Type cast each parameter and, if possible, fold constants.*/ + foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { + const glsl_type *desired_type = + glsl_type::get_instance(constructor_type->base_type, + ir->type->vector_elements, + ir->type->matrix_columns); + ir_rvalue *result = convert_component(ir, desired_type); + + /* Attempt to convert the parameter to a constant valued expression. + * After doing so, track whether or not all the parameters to the + * constructor are trivially constant valued expressions. + */ + ir_rvalue *const constant = result->constant_expression_value(); + + if (constant != NULL) + result = constant; + else + all_parameters_are_constant = false; + + if (result != ir) { + ir->replace_with(result); + } + } + + /* If all of the parameters are trivially constant, create a + * constant representing the complete collection of parameters. + */ + if (all_parameters_are_constant) { + return new(ctx) ir_constant(constructor_type, &actual_parameters); + } else if (constructor_type->is_scalar()) { + return dereference_component((ir_rvalue *) actual_parameters.head, + 0); + } else if (constructor_type->is_vector()) { + return emit_inline_vector_constructor(constructor_type, + instructions, + &actual_parameters, + ctx); + } else { + assert(constructor_type->is_matrix()); + return emit_inline_matrix_constructor(constructor_type, + instructions, + &actual_parameters, + ctx); + } + } else if (subexpressions[0]->oper == ast_field_selection) { + return handle_method(instructions, state); + } else { + const ast_expression *id = subexpressions[0]; + const char *func_name; + YYLTYPE loc = get_location(); + exec_list actual_parameters; + ir_variable *sub_var = NULL; + ir_rvalue *array_idx = NULL; + + process_parameters(instructions, &actual_parameters, &this->expressions, + state); + + if (id->oper == ast_array_index) { + array_idx = generate_array_index(ctx, instructions, state, loc, + id->subexpressions[0], + id->subexpressions[1], &func_name, + &actual_parameters); + } else { + func_name = id->primary_expression.identifier; + } + + ir_function_signature *sig = + match_function_by_name(func_name, &actual_parameters, state); + + ir_rvalue *value = NULL; + if (sig == NULL) { + sig = match_subroutine_by_name(func_name, &actual_parameters, state, &sub_var); + } + + if (sig == NULL) { + no_matching_function_error(func_name, &loc, &actual_parameters, state); + value = ir_rvalue::error_value(ctx); + } else if (!verify_parameter_modes(state, sig, actual_parameters, this->expressions)) { + /* an error has already been emitted */ + value = ir_rvalue::error_value(ctx); + } else { + value = generate_call(instructions, sig, &actual_parameters, sub_var, array_idx, state); + if (!value) { + ir_variable *const tmp = new(ctx) ir_variable(glsl_type::void_type, + "void_var", + ir_var_temporary); + instructions->push_tail(tmp); + value = new(ctx) ir_dereference_variable(tmp); + } + } + + return value; + } + + unreachable("not reached"); +} + +bool +ast_function_expression::has_sequence_subexpression() const +{ + foreach_list_typed(const ast_node, ast, link, &this->expressions) { + if (ast->has_sequence_subexpression()) + return true; + } + + return false; +} + +ir_rvalue * +ast_aggregate_initializer::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + YYLTYPE loc = this->get_location(); + + if (!this->constructor_type) { + _mesa_glsl_error(&loc, state, "type of C-style initializer unknown"); + return ir_rvalue::error_value(ctx); + } + const glsl_type *const constructor_type = this->constructor_type; + + if (!state->has_420pack()) { + _mesa_glsl_error(&loc, state, "C-style initialization requires the " + "GL_ARB_shading_language_420pack extension"); + return ir_rvalue::error_value(ctx); + } + + if (constructor_type->is_array()) { + return process_array_constructor(instructions, constructor_type, &loc, + &this->expressions, state); + } + + if (constructor_type->is_record()) { + return process_record_constructor(instructions, constructor_type, &loc, + &this->expressions, state); + } + + return process_vec_mat_constructor(instructions, constructor_type, &loc, + &this->expressions, state); +} diff --git a/src/compiler/glsl/ast_to_hir.cpp b/src/compiler/glsl/ast_to_hir.cpp new file mode 100644 index 00000000000..dfd31966eb0 --- /dev/null +++ b/src/compiler/glsl/ast_to_hir.cpp @@ -0,0 +1,7583 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ast_to_hir.c + * Convert abstract syntax to to high-level intermediate reprensentation (HIR). + * + * During the conversion to HIR, the majority of the symantic checking is + * preformed on the program. This includes: + * + * * Symbol table management + * * Type checking + * * Function binding + * + * The majority of this work could be done during parsing, and the parser could + * probably generate HIR directly. However, this results in frequent changes + * to the parser code. Since we do not assume that every system this complier + * is built on will have Flex and Bison installed, we have to store the code + * generated by these tools in our version control system. In other parts of + * the system we've seen problems where a parser was changed but the generated + * code was not committed, merge conflicts where created because two developers + * had slightly different versions of Bison installed, etc. + * + * I have also noticed that running Bison generated parsers in GDB is very + * irritating. When you get a segfault on '$$ = $1->foo', you can't very + * well 'print $1' in GDB. + * + * As a result, my preference is to put as little C code as possible in the + * parser (and lexer) sources. + */ + +#include "glsl_symbol_table.h" +#include "glsl_parser_extras.h" +#include "ast.h" +#include "compiler/glsl_types.h" +#include "program/hash_table.h" +#include "main/shaderobj.h" +#include "ir.h" +#include "ir_builder.h" + +using namespace ir_builder; + +static void +detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, + exec_list *instructions); +static void +remove_per_vertex_blocks(exec_list *instructions, + _mesa_glsl_parse_state *state, ir_variable_mode mode); + +/** + * Visitor class that finds the first instance of any write-only variable that + * is ever read, if any + */ +class read_from_write_only_variable_visitor : public ir_hierarchical_visitor +{ +public: + read_from_write_only_variable_visitor() : found(NULL) + { + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (this->in_assignee) + return visit_continue; + + ir_variable *var = ir->variable_referenced(); + /* We can have image_write_only set on both images and buffer variables, + * but in the former there is a distinction between reads from + * the variable itself (write_only) and from the memory they point to + * (image_write_only), while in the case of buffer variables there is + * no such distinction, that is why this check here is limited to + * buffer variables alone. + */ + if (!var || var->data.mode != ir_var_shader_storage) + return visit_continue; + + if (var->data.image_write_only) { + found = var; + return visit_stop; + } + + return visit_continue; + } + + ir_variable *get_variable() { + return found; + } + + virtual ir_visitor_status visit_enter(ir_expression *ir) + { + /* .length() doesn't actually read anything */ + if (ir->operation == ir_unop_ssbo_unsized_array_length) + return visit_continue_with_parent; + + return visit_continue; + } + +private: + ir_variable *found; +}; + +void +_mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) +{ + _mesa_glsl_initialize_variables(instructions, state); + + state->symbols->separate_function_namespace = state->language_version == 110; + + state->current_function = NULL; + + state->toplevel_ir = instructions; + + state->gs_input_prim_type_specified = false; + state->tcs_output_vertices_specified = false; + state->cs_input_local_size_specified = false; + + /* Section 4.2 of the GLSL 1.20 specification states: + * "The built-in functions are scoped in a scope outside the global scope + * users declare global variables in. That is, a shader's global scope, + * available for user-defined functions and global variables, is nested + * inside the scope containing the built-in functions." + * + * Since built-in functions like ftransform() access built-in variables, + * it follows that those must be in the outer scope as well. + * + * We push scope here to create this nesting effect...but don't pop. + * This way, a shader's globals are still in the symbol table for use + * by the linker. + */ + state->symbols->push_scope(); + + foreach_list_typed (ast_node, ast, link, & state->translation_unit) + ast->hir(instructions, state); + + detect_recursion_unlinked(state, instructions); + detect_conflicting_assignments(state, instructions); + + state->toplevel_ir = NULL; + + /* Move all of the variable declarations to the front of the IR list, and + * reverse the order. This has the (intended!) side effect that vertex + * shader inputs and fragment shader outputs will appear in the IR in the + * same order that they appeared in the shader code. This results in the + * locations being assigned in the declared order. Many (arguably buggy) + * applications depend on this behavior, and it matches what nearly all + * other drivers do. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *const var = node->as_variable(); + + if (var == NULL) + continue; + + var->remove(); + instructions->push_head(var); + } + + /* Figure out if gl_FragCoord is actually used in fragment shader */ + ir_variable *const var = state->symbols->get_variable("gl_FragCoord"); + if (var != NULL) + state->fs_uses_gl_fragcoord = var->data.used; + + /* From section 7.1 (Built-In Language Variables) of the GLSL 4.10 spec: + * + * If multiple shaders using members of a built-in block belonging to + * the same interface are linked together in the same program, they + * must all redeclare the built-in block in the same way, as described + * in section 4.3.7 "Interface Blocks" for interface block matching, or + * a link error will result. + * + * The phrase "using members of a built-in block" implies that if two + * shaders are linked together and one of them *does not use* any members + * of the built-in block, then that shader does not need to have a matching + * redeclaration of the built-in block. + * + * This appears to be a clarification to the behaviour established for + * gl_PerVertex by GLSL 1.50, therefore implement it regardless of GLSL + * version. + * + * The definition of "interface" in section 4.3.7 that applies here is as + * follows: + * + * The boundary between adjacent programmable pipeline stages: This + * spans all the outputs in all compilation units of the first stage + * and all the inputs in all compilation units of the second stage. + * + * Therefore this rule applies to both inter- and intra-stage linking. + * + * The easiest way to implement this is to check whether the shader uses + * gl_PerVertex right after ast-to-ir conversion, and if it doesn't, simply + * remove all the relevant variable declaration from the IR, so that the + * linker won't see them and complain about mismatches. + */ + remove_per_vertex_blocks(instructions, state, ir_var_shader_in); + remove_per_vertex_blocks(instructions, state, ir_var_shader_out); + + /* Check that we don't have reads from write-only variables */ + read_from_write_only_variable_visitor v; + v.run(instructions); + ir_variable *error_var = v.get_variable(); + if (error_var) { + /* It would be nice to have proper location information, but for that + * we would need to check this as we process each kind of AST node + */ + YYLTYPE loc; + memset(&loc, 0, sizeof(loc)); + _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'", + error_var->name); + } +} + + +static ir_expression_operation +get_conversion_operation(const glsl_type *to, const glsl_type *from, + struct _mesa_glsl_parse_state *state) +{ + switch (to->base_type) { + case GLSL_TYPE_FLOAT: + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2f; + case GLSL_TYPE_UINT: return ir_unop_u2f; + case GLSL_TYPE_DOUBLE: return ir_unop_d2f; + default: return (ir_expression_operation)0; + } + + case GLSL_TYPE_UINT: + if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) + return (ir_expression_operation)0; + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2u; + default: return (ir_expression_operation)0; + } + + case GLSL_TYPE_DOUBLE: + if (!state->has_double()) + return (ir_expression_operation)0; + switch (from->base_type) { + case GLSL_TYPE_INT: return ir_unop_i2d; + case GLSL_TYPE_UINT: return ir_unop_u2d; + case GLSL_TYPE_FLOAT: return ir_unop_f2d; + default: return (ir_expression_operation)0; + } + + default: return (ir_expression_operation)0; + } +} + + +/** + * If a conversion is available, convert one operand to a different type + * + * The \c from \c ir_rvalue is converted "in place". + * + * \param to Type that the operand it to be converted to + * \param from Operand that is being converted + * \param state GLSL compiler state + * + * \return + * If a conversion is possible (or unnecessary), \c true is returned. + * Otherwise \c false is returned. + */ +bool +apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + if (to->base_type == from->type->base_type) + return true; + + /* Prior to GLSL 1.20, there are no implicit conversions */ + if (!state->is_version(120, 0)) + return false; + + /* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec: + * + * "There are no implicit array or structure conversions. For + * example, an array of int cannot be implicitly converted to an + * array of float. + */ + if (!to->is_numeric() || !from->type->is_numeric()) + return false; + + /* We don't actually want the specific type `to`, we want a type + * with the same base type as `to`, but the same vector width as + * `from`. + */ + to = glsl_type::get_instance(to->base_type, from->type->vector_elements, + from->type->matrix_columns); + + ir_expression_operation op = get_conversion_operation(to, from->type, state); + if (op) { + from = new(ctx) ir_expression(op, to, from, NULL); + return true; + } else { + return false; + } +} + + +static const struct glsl_type * +arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + bool multiply, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + /* From GLSL 1.50 spec, page 56: + * + * "The arithmetic binary operators add (+), subtract (-), + * multiply (*), and divide (/) operate on integer and + * floating-point scalars, vectors, and matrices." + */ + if (!type_a->is_numeric() || !type_b->is_numeric()) { + _mesa_glsl_error(loc, state, + "operands to arithmetic operators must be numeric"); + return glsl_type::error_type; + } + + + /* "If one operand is floating-point based and the other is + * not, then the conversions from Section 4.1.10 "Implicit + * Conversions" are applied to the non-floating-point-based operand." + */ + if (!apply_implicit_conversion(type_a, value_b, state) + && !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "arithmetic operator"); + return glsl_type::error_type; + } + type_a = value_a->type; + type_b = value_b->type; + + /* "If the operands are integer types, they must both be signed or + * both be unsigned." + * + * From this rule and the preceeding conversion it can be inferred that + * both types must be GLSL_TYPE_FLOAT, or GLSL_TYPE_UINT, or GLSL_TYPE_INT. + * The is_numeric check above already filtered out the case where either + * type is not one of these, so now the base types need only be tested for + * equality. + */ + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, + "base type mismatch for arithmetic operator"); + return glsl_type::error_type; + } + + /* "All arithmetic binary operators result in the same fundamental type + * (signed integer, unsigned integer, or floating-point) as the + * operands they operate on, after operand type conversion. After + * conversion, the following cases are valid + * + * * The two operands are scalars. In this case the operation is + * applied, resulting in a scalar." + */ + if (type_a->is_scalar() && type_b->is_scalar()) + return type_a; + + /* "* One operand is a scalar, and the other is a vector or matrix. + * In this case, the scalar operation is applied independently to each + * component of the vector or matrix, resulting in the same size + * vector or matrix." + */ + if (type_a->is_scalar()) { + if (!type_b->is_scalar()) + return type_b; + } else if (type_b->is_scalar()) { + return type_a; + } + + /* All of the combinations of , , + * , , and have been + * handled. + */ + assert(!type_a->is_scalar()); + assert(!type_b->is_scalar()); + + /* "* The two operands are vectors of the same size. In this case, the + * operation is done component-wise resulting in the same size + * vector." + */ + if (type_a->is_vector() && type_b->is_vector()) { + if (type_a == type_b) { + return type_a; + } else { + _mesa_glsl_error(loc, state, + "vector size mismatch for arithmetic operator"); + return glsl_type::error_type; + } + } + + /* All of the combinations of , , + * , , , and + * have been handled. At least one of the operands must + * be matrix. Further, since there are no integer matrix types, the base + * type of both operands must be float. + */ + assert(type_a->is_matrix() || type_b->is_matrix()); + assert(type_a->base_type == GLSL_TYPE_FLOAT || + type_a->base_type == GLSL_TYPE_DOUBLE); + assert(type_b->base_type == GLSL_TYPE_FLOAT || + type_b->base_type == GLSL_TYPE_DOUBLE); + + /* "* The operator is add (+), subtract (-), or divide (/), and the + * operands are matrices with the same number of rows and the same + * number of columns. In this case, the operation is done component- + * wise resulting in the same size matrix." + * * The operator is multiply (*), where both operands are matrices or + * one operand is a vector and the other a matrix. A right vector + * operand is treated as a column vector and a left vector operand as a + * row vector. In all these cases, it is required that the number of + * columns of the left operand is equal to the number of rows of the + * right operand. Then, the multiply (*) operation does a linear + * algebraic multiply, yielding an object that has the same number of + * rows as the left operand and the same number of columns as the right + * operand. Section 5.10 "Vector and Matrix Operations" explains in + * more detail how vectors and matrices are operated on." + */ + if (! multiply) { + if (type_a == type_b) + return type_a; + } else { + const glsl_type *type = glsl_type::get_mul_type(type_a, type_b); + + if (type == glsl_type::error_type) { + _mesa_glsl_error(loc, state, + "size mismatch for matrix multiplication"); + } + + return type; + } + + + /* "All other cases are illegal." + */ + _mesa_glsl_error(loc, state, "type mismatch"); + return glsl_type::error_type; +} + + +static const struct glsl_type * +unary_arithmetic_result_type(const struct glsl_type *type, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + /* From GLSL 1.50 spec, page 57: + * + * "The arithmetic unary operators negate (-), post- and pre-increment + * and decrement (-- and ++) operate on integer or floating-point + * values (including vectors and matrices). All unary operators work + * component-wise on their operands. These result with the same type + * they operated on." + */ + if (!type->is_numeric()) { + _mesa_glsl_error(loc, state, + "operands to arithmetic operators must be numeric"); + return glsl_type::error_type; + } + + return type; +} + +/** + * \brief Return the result type of a bit-logic operation. + * + * If the given types to the bit-logic operator are invalid, return + * glsl_type::error_type. + * + * \param value_a LHS of bit-logic op + * \param value_b RHS of bit-logic op + */ +static const struct glsl_type * +bit_logic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + ast_operators op, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + if (!state->check_bitwise_operations_allowed(loc)) { + return glsl_type::error_type; + } + + /* From page 50 (page 56 of PDF) of GLSL 1.30 spec: + * + * "The bitwise operators and (&), exclusive-or (^), and inclusive-or + * (|). The operands must be of type signed or unsigned integers or + * integer vectors." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* Prior to GLSL 4.0 / GL_ARB_gpu_shader5, implicit conversions didn't + * make sense for bitwise operations, as they don't operate on floats. + * + * GLSL 4.0 added implicit int -> uint conversions, which are relevant + * here. It wasn't clear whether or not we should apply them to bitwise + * operations. However, Khronos has decided that they should in future + * language revisions. Applications also rely on this behavior. We opt + * to apply them in general, but issue a portability warning. + * + * See https://www.khronos.org/bugzilla/show_bug.cgi?id=1405 + */ + if (type_a->base_type != type_b->base_type) { + if (!apply_implicit_conversion(type_a, value_b, state) + && !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "`%s` operator", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } else { + _mesa_glsl_warning(loc, state, + "some implementations may not support implicit " + "int -> uint conversions for `%s' operators; " + "consider casting explicitly for portability", + ast_expression::operator_string(op)); + } + type_a = value_a->type; + type_b = value_b->type; + } + + /* "The fundamental types of the operands (signed or unsigned) must + * match," + */ + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, "operands of `%s' must have the same " + "base type", ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "The operands cannot be vectors of differing size." */ + if (type_a->is_vector() && + type_b->is_vector() && + type_a->vector_elements != type_b->vector_elements) { + _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of " + "different sizes", ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "If one operand is a scalar and the other a vector, the scalar is + * applied component-wise to the vector, resulting in the same type as + * the vector. The fundamental types of the operands [...] will be the + * resulting fundamental type." + */ + if (type_a->is_scalar()) + return type_b; + else + return type_a; +} + +static const struct glsl_type * +modulus_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + if (!state->check_version(130, 300, loc, "operator '%%' is reserved")) { + return glsl_type::error_type; + } + + /* Section 5.9 (Expressions) of the GLSL 4.00 specification says: + * + * "The operator modulus (%) operates on signed or unsigned integers or + * integer vectors." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of operator %% must be an integer"); + return glsl_type::error_type; + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of operator %% must be an integer"); + return glsl_type::error_type; + } + + /* "If the fundamental types in the operands do not match, then the + * conversions from section 4.1.10 "Implicit Conversions" are applied + * to create matching types." + * + * Note that GLSL 4.00 (and GL_ARB_gpu_shader5) introduced implicit + * int -> uint conversion rules. Prior to that, there were no implicit + * conversions. So it's harmless to apply them universally - no implicit + * conversions will exist. If the types don't match, we'll receive false, + * and raise an error, satisfying the GLSL 1.50 spec, page 56: + * + * "The operand types must both be signed or unsigned." + */ + if (!apply_implicit_conversion(type_a, value_b, state) && + !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "modulus (%%) operator"); + return glsl_type::error_type; + } + type_a = value_a->type; + type_b = value_b->type; + + /* "The operands cannot be vectors of differing size. If one operand is + * a scalar and the other vector, then the scalar is applied component- + * wise to the vector, resulting in the same type as the vector. If both + * are vectors of the same size, the result is computed component-wise." + */ + if (type_a->is_vector()) { + if (!type_b->is_vector() + || (type_a->vector_elements == type_b->vector_elements)) + return type_a; + } else + return type_b; + + /* "The operator modulus (%) is not defined for any other data types + * (non-integer types)." + */ + _mesa_glsl_error(loc, state, "type mismatch"); + return glsl_type::error_type; +} + + +static const struct glsl_type * +relational_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + const glsl_type *type_a = value_a->type; + const glsl_type *type_b = value_b->type; + + /* From GLSL 1.50 spec, page 56: + * "The relational operators greater than (>), less than (<), greater + * than or equal (>=), and less than or equal (<=) operate only on + * scalar integer and scalar floating-point expressions." + */ + if (!type_a->is_numeric() + || !type_b->is_numeric() + || !type_a->is_scalar() + || !type_b->is_scalar()) { + _mesa_glsl_error(loc, state, + "operands to relational operators must be scalar and " + "numeric"); + return glsl_type::error_type; + } + + /* "Either the operands' types must match, or the conversions from + * Section 4.1.10 "Implicit Conversions" will be applied to the integer + * operand, after which the types must match." + */ + if (!apply_implicit_conversion(type_a, value_b, state) + && !apply_implicit_conversion(type_b, value_a, state)) { + _mesa_glsl_error(loc, state, + "could not implicitly convert operands to " + "relational operator"); + return glsl_type::error_type; + } + type_a = value_a->type; + type_b = value_b->type; + + if (type_a->base_type != type_b->base_type) { + _mesa_glsl_error(loc, state, "base type mismatch"); + return glsl_type::error_type; + } + + /* "The result is scalar Boolean." + */ + return glsl_type::bool_type; +} + +/** + * \brief Return the result type of a bit-shift operation. + * + * If the given types to the bit-shift operator are invalid, return + * glsl_type::error_type. + * + * \param type_a Type of LHS of bit-shift op + * \param type_b Type of RHS of bit-shift op + */ +static const struct glsl_type * +shift_result_type(const struct glsl_type *type_a, + const struct glsl_type *type_b, + ast_operators op, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + if (!state->check_bitwise_operations_allowed(loc)) { + return glsl_type::error_type; + } + + /* From page 50 (page 56 of the PDF) of the GLSL 1.30 spec: + * + * "The shift operators (<<) and (>>). For both operators, the operands + * must be signed or unsigned integers or integer vectors. One operand + * can be signed while the other is unsigned." + */ + if (!type_a->is_integer()) { + _mesa_glsl_error(loc, state, "LHS of operator %s must be an integer or " + "integer vector", ast_expression::operator_string(op)); + return glsl_type::error_type; + + } + if (!type_b->is_integer()) { + _mesa_glsl_error(loc, state, "RHS of operator %s must be an integer or " + "integer vector", ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "If the first operand is a scalar, the second operand has to be + * a scalar as well." + */ + if (type_a->is_scalar() && !type_b->is_scalar()) { + _mesa_glsl_error(loc, state, "if the first operand of %s is scalar, the " + "second must be scalar as well", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* If both operands are vectors, check that they have same number of + * elements. + */ + if (type_a->is_vector() && + type_b->is_vector() && + type_a->vector_elements != type_b->vector_elements) { + _mesa_glsl_error(loc, state, "vector operands to operator %s must " + "have same number of elements", + ast_expression::operator_string(op)); + return glsl_type::error_type; + } + + /* "In all cases, the resulting type will be the same type as the left + * operand." + */ + return type_a; +} + +/** + * Returns the innermost array index expression in an rvalue tree. + * This is the largest indexing level -- if an array of blocks, then + * it is the block index rather than an indexing expression for an + * array-typed member of an array of blocks. + */ +static ir_rvalue * +find_innermost_array_index(ir_rvalue *rv) +{ + ir_dereference_array *last = NULL; + while (rv) { + if (rv->as_dereference_array()) { + last = rv->as_dereference_array(); + rv = last->array; + } else if (rv->as_dereference_record()) + rv = rv->as_dereference_record()->record; + else if (rv->as_swizzle()) + rv = rv->as_swizzle()->val; + else + rv = NULL; + } + + if (last) + return last->array_index; + + return NULL; +} + +/** + * Validates that a value can be assigned to a location with a specified type + * + * Validates that \c rhs can be assigned to some location. If the types are + * not an exact match but an automatic conversion is possible, \c rhs will be + * converted. + * + * \return + * \c NULL if \c rhs cannot be assigned to a location with type \c lhs_type. + * Otherwise the actual RHS to be assigned will be returned. This may be + * \c rhs, or it may be \c rhs after some type conversion. + * + * \note + * In addition to being used for assignments, this function is used to + * type-check return values. + */ +static ir_rvalue * +validate_assignment(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_rvalue *lhs, + ir_rvalue *rhs, bool is_initializer) +{ + /* If there is already some error in the RHS, just return it. Anything + * else will lead to an avalanche of error message back to the user. + */ + if (rhs->type->is_error()) + return rhs; + + /* In the Tessellation Control Shader: + * If a per-vertex output variable is used as an l-value, it is an error + * if the expression indicating the vertex number is not the identifier + * `gl_InvocationID`. + */ + if (state->stage == MESA_SHADER_TESS_CTRL) { + ir_variable *var = lhs->variable_referenced(); + if (var->data.mode == ir_var_shader_out && !var->data.patch) { + ir_rvalue *index = find_innermost_array_index(lhs); + ir_variable *index_var = index ? index->variable_referenced() : NULL; + if (!index_var || strcmp(index_var->name, "gl_InvocationID") != 0) { + _mesa_glsl_error(&loc, state, + "Tessellation control shader outputs can only " + "be indexed by gl_InvocationID"); + return NULL; + } + } + } + + /* If the types are identical, the assignment can trivially proceed. + */ + if (rhs->type == lhs->type) + return rhs; + + /* If the array element types are the same and the LHS is unsized, + * the assignment is okay for initializers embedded in variable + * declarations. + * + * Note: Whole-array assignments are not permitted in GLSL 1.10, but this + * is handled by ir_dereference::is_lvalue. + */ + const glsl_type *lhs_t = lhs->type; + const glsl_type *rhs_t = rhs->type; + bool unsized_array = false; + while(lhs_t->is_array()) { + if (rhs_t == lhs_t) + break; /* the rest of the inner arrays match so break out early */ + if (!rhs_t->is_array()) { + unsized_array = false; + break; /* number of dimensions mismatch */ + } + if (lhs_t->length == rhs_t->length) { + lhs_t = lhs_t->fields.array; + rhs_t = rhs_t->fields.array; + continue; + } else if (lhs_t->is_unsized_array()) { + unsized_array = true; + } else { + unsized_array = false; + break; /* sized array mismatch */ + } + lhs_t = lhs_t->fields.array; + rhs_t = rhs_t->fields.array; + } + if (unsized_array) { + if (is_initializer) { + return rhs; + } else { + _mesa_glsl_error(&loc, state, + "implicitly sized arrays cannot be assigned"); + return NULL; + } + } + + /* Check for implicit conversion in GLSL 1.20 */ + if (apply_implicit_conversion(lhs->type, rhs, state)) { + if (rhs->type == lhs->type) + return rhs; + } + + _mesa_glsl_error(&loc, state, + "%s of type %s cannot be assigned to " + "variable of type %s", + is_initializer ? "initializer" : "value", + rhs->type->name, lhs->type->name); + + return NULL; +} + +static void +mark_whole_array_access(ir_rvalue *access) +{ + ir_dereference_variable *deref = access->as_dereference_variable(); + + if (deref && deref->var) { + deref->var->data.max_array_access = deref->type->length - 1; + } +} + +static bool +do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, + const char *non_lvalue_description, + ir_rvalue *lhs, ir_rvalue *rhs, + ir_rvalue **out_rvalue, bool needs_rvalue, + bool is_initializer, + YYLTYPE lhs_loc) +{ + void *ctx = state; + bool error_emitted = (lhs->type->is_error() || rhs->type->is_error()); + + ir_variable *lhs_var = lhs->variable_referenced(); + if (lhs_var) + lhs_var->data.assigned = true; + + if (!error_emitted) { + if (non_lvalue_description != NULL) { + _mesa_glsl_error(&lhs_loc, state, + "assignment to %s", + non_lvalue_description); + error_emitted = true; + } else if (lhs_var != NULL && (lhs_var->data.read_only || + (lhs_var->data.mode == ir_var_shader_storage && + lhs_var->data.image_read_only))) { + /* We can have image_read_only set on both images and buffer variables, + * but in the former there is a distinction between assignments to + * the variable itself (read_only) and to the memory they point to + * (image_read_only), while in the case of buffer variables there is + * no such distinction, that is why this check here is limited to + * buffer variables alone. + */ + _mesa_glsl_error(&lhs_loc, state, + "assignment to read-only variable '%s'", + lhs_var->name); + error_emitted = true; + } else if (lhs->type->is_array() && + !state->check_version(120, 300, &lhs_loc, + "whole array assignment forbidden")) { + /* From page 32 (page 38 of the PDF) of the GLSL 1.10 spec: + * + * "Other binary or unary expressions, non-dereferenced + * arrays, function names, swizzles with repeated fields, + * and constants cannot be l-values." + * + * The restriction on arrays is lifted in GLSL 1.20 and GLSL ES 3.00. + */ + error_emitted = true; + } else if (!lhs->is_lvalue()) { + _mesa_glsl_error(& lhs_loc, state, "non-lvalue in assignment"); + error_emitted = true; + } + } + + ir_rvalue *new_rhs = + validate_assignment(state, lhs_loc, lhs, rhs, is_initializer); + if (new_rhs != NULL) { + rhs = new_rhs; + + /* If the LHS array was not declared with a size, it takes it size from + * the RHS. If the LHS is an l-value and a whole array, it must be a + * dereference of a variable. Any other case would require that the LHS + * is either not an l-value or not a whole array. + */ + if (lhs->type->is_unsized_array()) { + ir_dereference *const d = lhs->as_dereference(); + + assert(d != NULL); + + ir_variable *const var = d->variable_referenced(); + + assert(var != NULL); + + if (var->data.max_array_access >= unsigned(rhs->type->array_size())) { + /* FINISHME: This should actually log the location of the RHS. */ + _mesa_glsl_error(& lhs_loc, state, "array size must be > %u due to " + "previous access", + var->data.max_array_access); + } + + var->type = glsl_type::get_array_instance(lhs->type->fields.array, + rhs->type->array_size()); + d->type = var->type; + } + if (lhs->type->is_array()) { + mark_whole_array_access(rhs); + mark_whole_array_access(lhs); + } + } + + /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, + * but not post_inc) need the converted assigned value as an rvalue + * to handle things like: + * + * i = j += 1; + */ + if (needs_rvalue) { + ir_variable *var = new(ctx) ir_variable(rhs->type, "assignment_tmp", + ir_var_temporary); + instructions->push_tail(var); + instructions->push_tail(assign(var, rhs)); + + if (!error_emitted) { + ir_dereference_variable *deref_var = new(ctx) ir_dereference_variable(var); + instructions->push_tail(new(ctx) ir_assignment(lhs, deref_var)); + } + ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var); + + *out_rvalue = rvalue; + } else { + if (!error_emitted) + instructions->push_tail(new(ctx) ir_assignment(lhs, rhs)); + *out_rvalue = NULL; + } + + return error_emitted; +} + +static ir_rvalue * +get_lvalue_copy(exec_list *instructions, ir_rvalue *lvalue) +{ + void *ctx = ralloc_parent(lvalue); + ir_variable *var; + + var = new(ctx) ir_variable(lvalue->type, "_post_incdec_tmp", + ir_var_temporary); + instructions->push_tail(var); + + instructions->push_tail(new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var), + lvalue)); + + return new(ctx) ir_dereference_variable(var); +} + + +ir_rvalue * +ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) +{ + (void) instructions; + (void) state; + + return NULL; +} + +bool +ast_node::has_sequence_subexpression() const +{ + return false; +} + +void +ast_function_expression::hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + (void)hir(instructions, state); +} + +void +ast_aggregate_initializer::hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + (void)hir(instructions, state); +} + +static ir_rvalue * +do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) +{ + int join_op; + ir_rvalue *cmp = NULL; + + if (operation == ir_binop_all_equal) + join_op = ir_binop_logic_and; + else + join_op = ir_binop_logic_or; + + switch (op0->type->base_type) { + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: + return new(mem_ctx) ir_expression(operation, op0, op1); + + case GLSL_TYPE_ARRAY: { + for (unsigned int i = 0; i < op0->type->length; i++) { + ir_rvalue *e0, *e1, *result; + + e0 = new(mem_ctx) ir_dereference_array(op0->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + e1 = new(mem_ctx) ir_dereference_array(op1->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + result = do_comparison(mem_ctx, operation, e0, e1); + + if (cmp) { + cmp = new(mem_ctx) ir_expression(join_op, cmp, result); + } else { + cmp = result; + } + } + + mark_whole_array_access(op0); + mark_whole_array_access(op1); + break; + } + + case GLSL_TYPE_STRUCT: { + for (unsigned int i = 0; i < op0->type->length; i++) { + ir_rvalue *e0, *e1, *result; + const char *field_name = op0->type->fields.structure[i].name; + + e0 = new(mem_ctx) ir_dereference_record(op0->clone(mem_ctx, NULL), + field_name); + e1 = new(mem_ctx) ir_dereference_record(op1->clone(mem_ctx, NULL), + field_name); + result = do_comparison(mem_ctx, operation, e0, e1); + + if (cmp) { + cmp = new(mem_ctx) ir_expression(join_op, cmp, result); + } else { + cmp = result; + } + } + break; + } + + case GLSL_TYPE_ERROR: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_SUBROUTINE: + /* I assume a comparison of a struct containing a sampler just + * ignores the sampler present in the type. + */ + break; + } + + if (cmp == NULL) + cmp = new(mem_ctx) ir_constant(true); + + return cmp; +} + +/* For logical operations, we want to ensure that the operands are + * scalar booleans. If it isn't, emit an error and return a constant + * boolean to avoid triggering cascading error messages. + */ +ir_rvalue * +get_scalar_boolean_operand(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + ast_expression *parent_expr, + int operand, + const char *operand_name, + bool *error_emitted) +{ + ast_expression *expr = parent_expr->subexpressions[operand]; + void *ctx = state; + ir_rvalue *val = expr->hir(instructions, state); + + if (val->type->is_boolean() && val->type->is_scalar()) + return val; + + if (!*error_emitted) { + YYLTYPE loc = expr->get_location(); + _mesa_glsl_error(&loc, state, "%s of `%s' must be scalar boolean", + operand_name, + parent_expr->operator_string(parent_expr->oper)); + *error_emitted = true; + } + + return new(ctx) ir_constant(true); +} + +/** + * If name refers to a builtin array whose maximum allowed size is less than + * size, report an error and return true. Otherwise return false. + */ +void +check_builtin_array_max_size(const char *name, unsigned size, + YYLTYPE loc, struct _mesa_glsl_parse_state *state) +{ + if ((strcmp("gl_TexCoord", name) == 0) + && (size > state->Const.MaxTextureCoords)) { + /* From page 54 (page 60 of the PDF) of the GLSL 1.20 spec: + * + * "The size [of gl_TexCoord] can be at most + * gl_MaxTextureCoords." + */ + _mesa_glsl_error(&loc, state, "`gl_TexCoord' array size cannot " + "be larger than gl_MaxTextureCoords (%u)", + state->Const.MaxTextureCoords); + } else if (strcmp("gl_ClipDistance", name) == 0 + && size > state->Const.MaxClipPlanes) { + /* From section 7.1 (Vertex Shader Special Variables) of the + * GLSL 1.30 spec: + * + * "The gl_ClipDistance array is predeclared as unsized and + * must be sized by the shader either redeclaring it with a + * size or indexing it only with integral constant + * expressions. ... The size can be at most + * gl_MaxClipDistances." + */ + _mesa_glsl_error(&loc, state, "`gl_ClipDistance' array size cannot " + "be larger than gl_MaxClipDistances (%u)", + state->Const.MaxClipPlanes); + } +} + +/** + * Create the constant 1, of a which is appropriate for incrementing and + * decrementing values of the given GLSL type. For example, if type is vec4, + * this creates a constant value of 1.0 having type float. + * + * If the given type is invalid for increment and decrement operators, return + * a floating point 1--the error will be detected later. + */ +static ir_rvalue * +constant_one_for_inc_dec(void *ctx, const glsl_type *type) +{ + switch (type->base_type) { + case GLSL_TYPE_UINT: + return new(ctx) ir_constant((unsigned) 1); + case GLSL_TYPE_INT: + return new(ctx) ir_constant(1); + default: + case GLSL_TYPE_FLOAT: + return new(ctx) ir_constant(1.0f); + } +} + +ir_rvalue * +ast_expression::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + return do_hir(instructions, state, true); +} + +void +ast_expression::hir_no_rvalue(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + do_hir(instructions, state, false); +} + +ir_rvalue * +ast_expression::do_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + bool needs_rvalue) +{ + void *ctx = state; + static const int operations[AST_NUM_OPERATORS] = { + -1, /* ast_assign doesn't convert to ir_expression. */ + -1, /* ast_plus doesn't convert to ir_expression. */ + ir_unop_neg, + ir_binop_add, + ir_binop_sub, + ir_binop_mul, + ir_binop_div, + ir_binop_mod, + ir_binop_lshift, + ir_binop_rshift, + ir_binop_less, + ir_binop_greater, + ir_binop_lequal, + ir_binop_gequal, + ir_binop_all_equal, + ir_binop_any_nequal, + ir_binop_bit_and, + ir_binop_bit_xor, + ir_binop_bit_or, + ir_unop_bit_not, + ir_binop_logic_and, + ir_binop_logic_xor, + ir_binop_logic_or, + ir_unop_logic_not, + + /* Note: The following block of expression types actually convert + * to multiple IR instructions. + */ + ir_binop_mul, /* ast_mul_assign */ + ir_binop_div, /* ast_div_assign */ + ir_binop_mod, /* ast_mod_assign */ + ir_binop_add, /* ast_add_assign */ + ir_binop_sub, /* ast_sub_assign */ + ir_binop_lshift, /* ast_ls_assign */ + ir_binop_rshift, /* ast_rs_assign */ + ir_binop_bit_and, /* ast_and_assign */ + ir_binop_bit_xor, /* ast_xor_assign */ + ir_binop_bit_or, /* ast_or_assign */ + + -1, /* ast_conditional doesn't convert to ir_expression. */ + ir_binop_add, /* ast_pre_inc. */ + ir_binop_sub, /* ast_pre_dec. */ + ir_binop_add, /* ast_post_inc. */ + ir_binop_sub, /* ast_post_dec. */ + -1, /* ast_field_selection doesn't conv to ir_expression. */ + -1, /* ast_array_index doesn't convert to ir_expression. */ + -1, /* ast_function_call doesn't conv to ir_expression. */ + -1, /* ast_identifier doesn't convert to ir_expression. */ + -1, /* ast_int_constant doesn't convert to ir_expression. */ + -1, /* ast_uint_constant doesn't conv to ir_expression. */ + -1, /* ast_float_constant doesn't conv to ir_expression. */ + -1, /* ast_bool_constant doesn't conv to ir_expression. */ + -1, /* ast_sequence doesn't convert to ir_expression. */ + }; + ir_rvalue *result = NULL; + ir_rvalue *op[3]; + const struct glsl_type *type; /* a temporary variable for switch cases */ + bool error_emitted = false; + YYLTYPE loc; + + loc = this->get_location(); + + switch (this->oper) { + case ast_aggregate: + assert(!"ast_aggregate: Should never get here."); + break; + + case ast_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0], op[1], &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_plus: + op[0] = this->subexpressions[0]->hir(instructions, state); + + type = unary_arithmetic_result_type(op[0]->type, state, & loc); + + error_emitted = type->is_error(); + + result = op[0]; + break; + + case ast_neg: + op[0] = this->subexpressions[0]->hir(instructions, state); + + type = unary_arithmetic_result_type(op[0]->type, state, & loc); + + error_emitted = type->is_error(); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], NULL); + break; + + case ast_add: + case ast_sub: + case ast_mul: + case ast_div: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = arithmetic_result_type(op[0], op[1], + (this->oper == ast_mul), + state, & loc); + error_emitted = type->is_error(); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + break; + + case ast_mod: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = modulus_result_type(op[0], op[1], state, &loc); + + assert(operations[this->oper] == ir_binop_mod); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = type->is_error(); + break; + + case ast_lshift: + case ast_rshift: + if (!state->check_bitwise_operations_allowed(&loc)) { + error_emitted = true; + } + + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = shift_result_type(op[0]->type, op[1]->type, this->oper, state, + &loc); + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + break; + + case ast_less: + case ast_greater: + case ast_lequal: + case ast_gequal: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = relational_result_type(op[0], op[1], state, & loc); + + /* The relational operators must either generate an error or result + * in a scalar boolean. See page 57 of the GLSL 1.50 spec. + */ + assert(type->is_error() + || ((type->base_type == GLSL_TYPE_BOOL) + && type->is_scalar())); + + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = type->is_error(); + break; + + case ast_nequal: + case ast_equal: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + /* From page 58 (page 64 of the PDF) of the GLSL 1.50 spec: + * + * "The equality operators equal (==), and not equal (!=) + * operate on all types. They result in a scalar Boolean. If + * the operand types do not match, then there must be a + * conversion from Section 4.1.10 "Implicit Conversions" + * applied to one operand that can make them match, in which + * case this conversion is done." + */ + + if (op[0]->type == glsl_type::void_type || op[1]->type == glsl_type::void_type) { + _mesa_glsl_error(& loc, state, "`%s': wrong operand types: " + "no operation `%1$s' exists that takes a left-hand " + "operand of type 'void' or a right operand of type " + "'void'", (this->oper == ast_equal) ? "==" : "!="); + error_emitted = true; + } else if ((!apply_implicit_conversion(op[0]->type, op[1], state) + && !apply_implicit_conversion(op[1]->type, op[0], state)) + || (op[0]->type != op[1]->type)) { + _mesa_glsl_error(& loc, state, "operands of `%s' must have the same " + "type", (this->oper == ast_equal) ? "==" : "!="); + error_emitted = true; + } else if ((op[0]->type->is_array() || op[1]->type->is_array()) && + !state->check_version(120, 300, &loc, + "array comparisons forbidden")) { + error_emitted = true; + } else if ((op[0]->type->contains_opaque() || + op[1]->type->contains_opaque())) { + _mesa_glsl_error(&loc, state, "opaque type comparisons forbidden"); + error_emitted = true; + } + + if (error_emitted) { + result = new(ctx) ir_constant(false); + } else { + result = do_comparison(ctx, operations[this->oper], op[0], op[1]); + assert(result->type == glsl_type::bool_type); + } + break; + + case ast_bit_and: + case ast_bit_xor: + case ast_bit_or: + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc); + result = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + break; + + case ast_bit_not: + op[0] = this->subexpressions[0]->hir(instructions, state); + + if (!state->check_bitwise_operations_allowed(&loc)) { + error_emitted = true; + } + + if (!op[0]->type->is_integer()) { + _mesa_glsl_error(&loc, state, "operand of `~' must be an integer"); + error_emitted = true; + } + + type = error_emitted ? glsl_type::error_type : op[0]->type; + result = new(ctx) ir_expression(ir_unop_bit_not, type, op[0], NULL); + break; + + case ast_logic_and: { + exec_list rhs_instructions; + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "LHS", &error_emitted); + op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1, + "RHS", &error_emitted); + + if (rhs_instructions.is_empty()) { + result = new(ctx) ir_expression(ir_binop_logic_and, op[0], op[1]); + type = result->type; + } else { + ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type, + "and_tmp", + ir_var_temporary); + instructions->push_tail(tmp); + + ir_if *const stmt = new(ctx) ir_if(op[0]); + instructions->push_tail(stmt); + + stmt->then_instructions.append_list(&rhs_instructions); + ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const then_assign = + new(ctx) ir_assignment(then_deref, op[1]); + stmt->then_instructions.push_tail(then_assign); + + ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const else_assign = + new(ctx) ir_assignment(else_deref, new(ctx) ir_constant(false)); + stmt->else_instructions.push_tail(else_assign); + + result = new(ctx) ir_dereference_variable(tmp); + type = tmp->type; + } + break; + } + + case ast_logic_or: { + exec_list rhs_instructions; + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "LHS", &error_emitted); + op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1, + "RHS", &error_emitted); + + if (rhs_instructions.is_empty()) { + result = new(ctx) ir_expression(ir_binop_logic_or, op[0], op[1]); + type = result->type; + } else { + ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type, + "or_tmp", + ir_var_temporary); + instructions->push_tail(tmp); + + ir_if *const stmt = new(ctx) ir_if(op[0]); + instructions->push_tail(stmt); + + ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const then_assign = + new(ctx) ir_assignment(then_deref, new(ctx) ir_constant(true)); + stmt->then_instructions.push_tail(then_assign); + + stmt->else_instructions.append_list(&rhs_instructions); + ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp); + ir_assignment *const else_assign = + new(ctx) ir_assignment(else_deref, op[1]); + stmt->else_instructions.push_tail(else_assign); + + result = new(ctx) ir_dereference_variable(tmp); + type = tmp->type; + } + break; + } + + case ast_logic_xor: + /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec: + * + * "The logical binary operators and (&&), or ( | | ), and + * exclusive or (^^). They operate only on two Boolean + * expressions and result in a Boolean expression." + */ + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, "LHS", + &error_emitted); + op[1] = get_scalar_boolean_operand(instructions, state, this, 1, "RHS", + &error_emitted); + + result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type, + op[0], op[1]); + break; + + case ast_logic_not: + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "operand", &error_emitted); + + result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type, + op[0], NULL); + break; + + case ast_mul_assign: + case ast_div_assign: + case ast_add_assign: + case ast_sub_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = arithmetic_result_type(op[0], op[1], + (this->oper == ast_mul_assign), + state, & loc); + + ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + + /* GLSL 1.10 does not allow array assignment. However, we don't have to + * explicitly test for this because none of the binary expression + * operators allow array operands either. + */ + + break; + } + + case ast_mod_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + + type = modulus_result_type(op[0], op[1], state, &loc); + + assert(operations[this->oper] == ir_binop_mod); + + ir_rvalue *temp_rhs; + temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_ls_assign: + case ast_rs_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = shift_result_type(op[0]->type, op[1]->type, this->oper, state, + &loc); + ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], + type, op[0], op[1]); + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: { + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = this->subexpressions[1]->hir(instructions, state); + type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc); + ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], + type, op[0], op[1]); + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_conditional: { + /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec: + * + * "The ternary selection operator (?:). It operates on three + * expressions (exp1 ? exp2 : exp3). This operator evaluates the + * first expression, which must result in a scalar Boolean." + */ + op[0] = get_scalar_boolean_operand(instructions, state, this, 0, + "condition", &error_emitted); + + /* The :? operator is implemented by generating an anonymous temporary + * followed by an if-statement. The last instruction in each branch of + * the if-statement assigns a value to the anonymous temporary. This + * temporary is the r-value of the expression. + */ + exec_list then_instructions; + exec_list else_instructions; + + op[1] = this->subexpressions[1]->hir(&then_instructions, state); + op[2] = this->subexpressions[2]->hir(&else_instructions, state); + + /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec: + * + * "The second and third expressions can be any type, as + * long their types match, or there is a conversion in + * Section 4.1.10 "Implicit Conversions" that can be applied + * to one of the expressions to make their types match. This + * resulting matching type is the type of the entire + * expression." + */ + if ((!apply_implicit_conversion(op[1]->type, op[2], state) + && !apply_implicit_conversion(op[2]->type, op[1], state)) + || (op[1]->type != op[2]->type)) { + YYLTYPE loc = this->subexpressions[1]->get_location(); + + _mesa_glsl_error(& loc, state, "second and third operands of ?: " + "operator must have matching types"); + error_emitted = true; + type = glsl_type::error_type; + } else { + type = op[1]->type; + } + + /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec: + * + * "The second and third expressions must be the same type, but can + * be of any type other than an array." + */ + if (type->is_array() && + !state->check_version(120, 300, &loc, + "second and third operands of ?: operator " + "cannot be arrays")) { + error_emitted = true; + } + + /* From section 4.1.7 of the GLSL 4.50 spec (Opaque Types): + * + * "Except for array indexing, structure member selection, and + * parentheses, opaque variables are not allowed to be operands in + * expressions; such use results in a compile-time error." + */ + if (type->contains_opaque()) { + _mesa_glsl_error(&loc, state, "opaque variables cannot be operands " + "of the ?: operator"); + error_emitted = true; + } + + ir_constant *cond_val = op[0]->constant_expression_value(); + + if (then_instructions.is_empty() + && else_instructions.is_empty() + && cond_val != NULL) { + result = cond_val->value.b[0] ? op[1] : op[2]; + } else { + /* The copy to conditional_tmp reads the whole array. */ + if (type->is_array()) { + mark_whole_array_access(op[1]); + mark_whole_array_access(op[2]); + } + + ir_variable *const tmp = + new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary); + instructions->push_tail(tmp); + + ir_if *const stmt = new(ctx) ir_if(op[0]); + instructions->push_tail(stmt); + + then_instructions.move_nodes_to(& stmt->then_instructions); + ir_dereference *const then_deref = + new(ctx) ir_dereference_variable(tmp); + ir_assignment *const then_assign = + new(ctx) ir_assignment(then_deref, op[1]); + stmt->then_instructions.push_tail(then_assign); + + else_instructions.move_nodes_to(& stmt->else_instructions); + ir_dereference *const else_deref = + new(ctx) ir_dereference_variable(tmp); + ir_assignment *const else_assign = + new(ctx) ir_assignment(else_deref, op[2]); + stmt->else_instructions.push_tail(else_assign); + + result = new(ctx) ir_dereference_variable(tmp); + } + break; + } + + case ast_pre_inc: + case ast_pre_dec: { + this->non_lvalue_description = (this->oper == ast_pre_inc) + ? "pre-increment operation" : "pre-decrement operation"; + + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = constant_one_for_inc_dec(ctx, op[0]->type); + + type = arithmetic_result_type(op[0], op[1], false, state, & loc); + + ir_rvalue *temp_rhs; + temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &result, needs_rvalue, false, + this->subexpressions[0]->get_location()); + break; + } + + case ast_post_inc: + case ast_post_dec: { + this->non_lvalue_description = (this->oper == ast_post_inc) + ? "post-increment operation" : "post-decrement operation"; + op[0] = this->subexpressions[0]->hir(instructions, state); + op[1] = constant_one_for_inc_dec(ctx, op[0]->type); + + error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); + + type = arithmetic_result_type(op[0], op[1], false, state, & loc); + + ir_rvalue *temp_rhs; + temp_rhs = new(ctx) ir_expression(operations[this->oper], type, + op[0], op[1]); + + /* Get a temporary of a copy of the lvalue before it's modified. + * This may get thrown away later. + */ + result = get_lvalue_copy(instructions, op[0]->clone(ctx, NULL)); + + ir_rvalue *junk_rvalue; + error_emitted = + do_assignment(instructions, state, + this->subexpressions[0]->non_lvalue_description, + op[0]->clone(ctx, NULL), temp_rhs, + &junk_rvalue, false, false, + this->subexpressions[0]->get_location()); + + break; + } + + case ast_field_selection: + result = _mesa_ast_field_selection_to_hir(this, instructions, state); + break; + + case ast_array_index: { + YYLTYPE index_loc = subexpressions[1]->get_location(); + + op[0] = subexpressions[0]->hir(instructions, state); + op[1] = subexpressions[1]->hir(instructions, state); + + result = _mesa_ast_array_index_to_hir(ctx, state, op[0], op[1], + loc, index_loc); + + if (result->type->is_error()) + error_emitted = true; + + break; + } + + case ast_unsized_array_dim: + assert(!"ast_unsized_array_dim: Should never get here."); + break; + + case ast_function_call: + /* Should *NEVER* get here. ast_function_call should always be handled + * by ast_function_expression::hir. + */ + assert(0); + break; + + case ast_identifier: { + /* ast_identifier can appear several places in a full abstract syntax + * tree. This particular use must be at location specified in the grammar + * as 'variable_identifier'. + */ + ir_variable *var = + state->symbols->get_variable(this->primary_expression.identifier); + + if (var != NULL) { + var->data.used = true; + result = new(ctx) ir_dereference_variable(var); + } else { + _mesa_glsl_error(& loc, state, "`%s' undeclared", + this->primary_expression.identifier); + + result = ir_rvalue::error_value(ctx); + error_emitted = true; + } + break; + } + + case ast_int_constant: + result = new(ctx) ir_constant(this->primary_expression.int_constant); + break; + + case ast_uint_constant: + result = new(ctx) ir_constant(this->primary_expression.uint_constant); + break; + + case ast_float_constant: + result = new(ctx) ir_constant(this->primary_expression.float_constant); + break; + + case ast_bool_constant: + result = new(ctx) ir_constant(bool(this->primary_expression.bool_constant)); + break; + + case ast_double_constant: + result = new(ctx) ir_constant(this->primary_expression.double_constant); + break; + + case ast_sequence: { + /* It should not be possible to generate a sequence in the AST without + * any expressions in it. + */ + assert(!this->expressions.is_empty()); + + /* The r-value of a sequence is the last expression in the sequence. If + * the other expressions in the sequence do not have side-effects (and + * therefore add instructions to the instruction list), they get dropped + * on the floor. + */ + exec_node *previous_tail_pred = NULL; + YYLTYPE previous_operand_loc = loc; + + foreach_list_typed (ast_node, ast, link, &this->expressions) { + /* If one of the operands of comma operator does not generate any + * code, we want to emit a warning. At each pass through the loop + * previous_tail_pred will point to the last instruction in the + * stream *before* processing the previous operand. Naturally, + * instructions->tail_pred will point to the last instruction in the + * stream *after* processing the previous operand. If the two + * pointers match, then the previous operand had no effect. + * + * The warning behavior here differs slightly from GCC. GCC will + * only emit a warning if none of the left-hand operands have an + * effect. However, it will emit a warning for each. I believe that + * there are some cases in C (especially with GCC extensions) where + * it is useful to have an intermediate step in a sequence have no + * effect, but I don't think these cases exist in GLSL. Either way, + * it would be a giant hassle to replicate that behavior. + */ + if (previous_tail_pred == instructions->tail_pred) { + _mesa_glsl_warning(&previous_operand_loc, state, + "left-hand operand of comma expression has " + "no effect"); + } + + /* tail_pred is directly accessed instead of using the get_tail() + * method for performance reasons. get_tail() has extra code to + * return NULL when the list is empty. We don't care about that + * here, so using tail_pred directly is fine. + */ + previous_tail_pred = instructions->tail_pred; + previous_operand_loc = ast->get_location(); + + result = ast->hir(instructions, state); + } + + /* Any errors should have already been emitted in the loop above. + */ + error_emitted = true; + break; + } + } + type = NULL; /* use result->type, not type. */ + assert(result != NULL || !needs_rvalue); + + if (result && result->type->is_error() && !error_emitted) + _mesa_glsl_error(& loc, state, "type mismatch"); + + return result; +} + +bool +ast_expression::has_sequence_subexpression() const +{ + switch (this->oper) { + case ast_plus: + case ast_neg: + case ast_bit_not: + case ast_logic_not: + case ast_pre_inc: + case ast_pre_dec: + case ast_post_inc: + case ast_post_dec: + return this->subexpressions[0]->has_sequence_subexpression(); + + case ast_assign: + case ast_add: + case ast_sub: + case ast_mul: + case ast_div: + case ast_mod: + case ast_lshift: + case ast_rshift: + case ast_less: + case ast_greater: + case ast_lequal: + case ast_gequal: + case ast_nequal: + case ast_equal: + case ast_bit_and: + case ast_bit_xor: + case ast_bit_or: + case ast_logic_and: + case ast_logic_or: + case ast_logic_xor: + case ast_array_index: + case ast_mul_assign: + case ast_div_assign: + case ast_add_assign: + case ast_sub_assign: + case ast_mod_assign: + case ast_ls_assign: + case ast_rs_assign: + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: + return this->subexpressions[0]->has_sequence_subexpression() || + this->subexpressions[1]->has_sequence_subexpression(); + + case ast_conditional: + return this->subexpressions[0]->has_sequence_subexpression() || + this->subexpressions[1]->has_sequence_subexpression() || + this->subexpressions[2]->has_sequence_subexpression(); + + case ast_sequence: + return true; + + case ast_field_selection: + case ast_identifier: + case ast_int_constant: + case ast_uint_constant: + case ast_float_constant: + case ast_bool_constant: + case ast_double_constant: + return false; + + case ast_aggregate: + unreachable("ast_aggregate: Should never get here."); + + case ast_function_call: + unreachable("should be handled by ast_function_expression::hir"); + + case ast_unsized_array_dim: + unreachable("ast_unsized_array_dim: Should never get here."); + } + + return false; +} + +ir_rvalue * +ast_expression_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + /* It is possible to have expression statements that don't have an + * expression. This is the solitary semicolon: + * + * for (i = 0; i < 5; i++) + * ; + * + * In this case the expression will be NULL. Test for NULL and don't do + * anything in that case. + */ + if (expression != NULL) + expression->hir_no_rvalue(instructions, state); + + /* Statements do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_compound_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + if (new_scope) + state->symbols->push_scope(); + + foreach_list_typed (ast_node, ast, link, &this->statements) + ast->hir(instructions, state); + + if (new_scope) + state->symbols->pop_scope(); + + /* Compound statements do not have r-values. + */ + return NULL; +} + +/** + * Evaluate the given exec_node (which should be an ast_node representing + * a single array dimension) and return its integer value. + */ +static unsigned +process_array_size(exec_node *node, + struct _mesa_glsl_parse_state *state) +{ + exec_list dummy_instructions; + + ast_node *array_size = exec_node_data(ast_node, node, link); + + /** + * Dimensions other than the outermost dimension can by unsized if they + * are immediately sized by a constructor or initializer. + */ + if (((ast_expression*)array_size)->oper == ast_unsized_array_dim) + return 0; + + ir_rvalue *const ir = array_size->hir(& dummy_instructions, state); + YYLTYPE loc = array_size->get_location(); + + if (ir == NULL) { + _mesa_glsl_error(& loc, state, + "array size could not be resolved"); + return 0; + } + + if (!ir->type->is_integer()) { + _mesa_glsl_error(& loc, state, + "array size must be integer type"); + return 0; + } + + if (!ir->type->is_scalar()) { + _mesa_glsl_error(& loc, state, + "array size must be scalar type"); + return 0; + } + + ir_constant *const size = ir->constant_expression_value(); + if (size == NULL || array_size->has_sequence_subexpression()) { + _mesa_glsl_error(& loc, state, "array size must be a " + "constant valued expression"); + return 0; + } + + if (size->value.i[0] <= 0) { + _mesa_glsl_error(& loc, state, "array size must be > 0"); + return 0; + } + + assert(size->type == ir->type); + + /* If the array size is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the array size isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + + return size->value.u[0]; +} + +static const glsl_type * +process_array_type(YYLTYPE *loc, const glsl_type *base, + ast_array_specifier *array_specifier, + struct _mesa_glsl_parse_state *state) +{ + const glsl_type *array_type = base; + + if (array_specifier != NULL) { + if (base->is_array()) { + + /* From page 19 (page 25) of the GLSL 1.20 spec: + * + * "Only one-dimensional arrays may be declared." + */ + if (!state->check_arrays_of_arrays_allowed(loc)) { + return glsl_type::error_type; + } + } + + for (exec_node *node = array_specifier->array_dimensions.tail_pred; + !node->is_head_sentinel(); node = node->prev) { + unsigned array_size = process_array_size(node, state); + array_type = glsl_type::get_array_instance(array_type, array_size); + } + } + + return array_type; +} + +static bool +precision_qualifier_allowed(const glsl_type *type) +{ + /* Precision qualifiers apply to floating point, integer and opaque + * types. + * + * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says: + * "Any floating point or any integer declaration can have the type + * preceded by one of these precision qualifiers [...] Literal + * constants do not have precision qualifiers. Neither do Boolean + * variables. + * + * Section 4.5 (Precision and Precision Qualifiers) of the GLSL 1.30 + * spec also says: + * + * "Precision qualifiers are added for code portability with OpenGL + * ES, not for functionality. They have the same syntax as in OpenGL + * ES." + * + * Section 8 (Built-In Functions) of the GLSL ES 1.00 spec says: + * + * "uniform lowp sampler2D sampler; + * highp vec2 coord; + * ... + * lowp vec4 col = texture2D (sampler, coord); + * // texture2D returns lowp" + * + * From this, we infer that GLSL 1.30 (and later) should allow precision + * qualifiers on sampler types just like float and integer types. + */ + return (type->is_float() + || type->is_integer() + || type->contains_opaque()) + && !type->without_array()->is_record(); +} + +const glsl_type * +ast_type_specifier::glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) const +{ + const struct glsl_type *type; + + type = state->symbols->get_type(this->type_name); + *name = this->type_name; + + YYLTYPE loc = this->get_location(); + type = process_array_type(&loc, type, this->array_specifier, state); + + return type; +} + +/** + * From the OpenGL ES 3.0 spec, 4.5.4 Default Precision Qualifiers: + * + * "The precision statement + * + * precision precision-qualifier type; + * + * can be used to establish a default precision qualifier. The type field can + * be either int or float or any of the sampler types, (...) If type is float, + * the directive applies to non-precision-qualified floating point type + * (scalar, vector, and matrix) declarations. If type is int, the directive + * applies to all non-precision-qualified integer type (scalar, vector, signed, + * and unsigned) declarations." + * + * We use the symbol table to keep the values of the default precisions for + * each 'type' in each scope and we use the 'type' string from the precision + * statement as key in the symbol table. When we want to retrieve the default + * precision associated with a given glsl_type we need to know the type string + * associated with it. This is what this function returns. + */ +static const char * +get_type_name_for_precision_qualifier(const glsl_type *type) +{ + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + return "float"; + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + return "int"; + case GLSL_TYPE_ATOMIC_UINT: + return "atomic_uint"; + case GLSL_TYPE_IMAGE: + /* fallthrough */ + case GLSL_TYPE_SAMPLER: { + const unsigned type_idx = + type->sampler_array + 2 * type->sampler_shadow; + const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4; + assert(type_idx < 4); + switch (type->sampler_type) { + case GLSL_TYPE_FLOAT: + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "sampler1D", "sampler1DArray", + "sampler1DShadow", "sampler1DArrayShadow" + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_2D: { + static const char *const names[8] = { + "sampler2D", "sampler2DArray", + "sampler2DShadow", "sampler2DArrayShadow", + "image2D", "image2DArray", NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_3D: { + static const char *const names[8] = { + "sampler3D", NULL, NULL, NULL, + "image3D", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_CUBE: { + static const char *const names[8] = { + "samplerCube", "samplerCubeArray", + "samplerCubeShadow", "samplerCubeArrayShadow", + "imageCube", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_MS: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "sampler2DMS", "sampler2DMSArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_RECT: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "samplerRect", NULL, "samplerRectShadow", NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_BUF: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "samplerBuffer", NULL, NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_EXTERNAL: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "samplerExternalOES", NULL, NULL, NULL + }; + return names[type_idx]; + } + default: + unreachable("Unsupported sampler/image dimensionality"); + } /* sampler/image float dimensionality */ + break; + case GLSL_TYPE_INT: + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isampler1D", "isampler1DArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_2D: { + static const char *const names[8] = { + "isampler2D", "isampler2DArray", NULL, NULL, + "iimage2D", "iimage2DArray", NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_3D: { + static const char *const names[8] = { + "isampler3D", NULL, NULL, NULL, + "iimage3D", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_CUBE: { + static const char *const names[8] = { + "isamplerCube", "isamplerCubeArray", NULL, NULL, + "iimageCube", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_MS: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isampler2DMS", "isampler2DMSArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_RECT: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isamplerRect", NULL, "isamplerRectShadow", NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_BUF: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "isamplerBuffer", NULL, NULL, NULL + }; + return names[type_idx]; + } + default: + unreachable("Unsupported isampler/iimage dimensionality"); + } /* sampler/image int dimensionality */ + break; + case GLSL_TYPE_UINT: + switch (type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_1D: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usampler1D", "usampler1DArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_2D: { + static const char *const names[8] = { + "usampler2D", "usampler2DArray", NULL, NULL, + "uimage2D", "uimage2DArray", NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_3D: { + static const char *const names[8] = { + "usampler3D", NULL, NULL, NULL, + "uimage3D", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_CUBE: { + static const char *const names[8] = { + "usamplerCube", "usamplerCubeArray", NULL, NULL, + "uimageCube", NULL, NULL, NULL + }; + return names[offset + type_idx]; + } + case GLSL_SAMPLER_DIM_MS: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usampler2DMS", "usampler2DMSArray", NULL, NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_RECT: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usamplerRect", NULL, "usamplerRectShadow", NULL + }; + return names[type_idx]; + } + case GLSL_SAMPLER_DIM_BUF: { + assert(type->base_type == GLSL_TYPE_SAMPLER); + static const char *const names[4] = { + "usamplerBuffer", NULL, NULL, NULL + }; + return names[type_idx]; + } + default: + unreachable("Unsupported usampler/uimage dimensionality"); + } /* sampler/image uint dimensionality */ + break; + default: + unreachable("Unsupported sampler/image type"); + } /* sampler/image type */ + break; + } /* GLSL_TYPE_SAMPLER/GLSL_TYPE_IMAGE */ + break; + default: + unreachable("Unsupported type"); + } /* base type */ +} + +static unsigned +select_gles_precision(unsigned qual_precision, + const glsl_type *type, + struct _mesa_glsl_parse_state *state, YYLTYPE *loc) +{ + /* Precision qualifiers do not have any meaning in Desktop GLSL. + * In GLES we take the precision from the type qualifier if present, + * otherwise, if the type of the variable allows precision qualifiers at + * all, we look for the default precision qualifier for that type in the + * current scope. + */ + assert(state->es_shader); + + unsigned precision = GLSL_PRECISION_NONE; + if (qual_precision) { + precision = qual_precision; + } else if (precision_qualifier_allowed(type)) { + const char *type_name = + get_type_name_for_precision_qualifier(type->without_array()); + assert(type_name != NULL); + + precision = + state->symbols->get_default_precision_qualifier(type_name); + if (precision == ast_precision_none) { + _mesa_glsl_error(loc, state, + "No precision specified in this scope for type `%s'", + type->name); + } + } + return precision; +} + +const glsl_type * +ast_fully_specified_type::glsl_type(const char **name, + struct _mesa_glsl_parse_state *state) const +{ + return this->specifier->glsl_type(name, state); +} + +/** + * Determine whether a toplevel variable declaration declares a varying. This + * function operates by examining the variable's mode and the shader target, + * so it correctly identifies linkage variables regardless of whether they are + * declared using the deprecated "varying" syntax or the new "in/out" syntax. + * + * Passing a non-toplevel variable declaration (e.g. a function parameter) to + * this function will produce undefined results. + */ +static bool +is_varying_var(ir_variable *var, gl_shader_stage target) +{ + switch (target) { + case MESA_SHADER_VERTEX: + return var->data.mode == ir_var_shader_out; + case MESA_SHADER_FRAGMENT: + return var->data.mode == ir_var_shader_in; + default: + return var->data.mode == ir_var_shader_out || var->data.mode == ir_var_shader_in; + } +} + + +/** + * Matrix layout qualifiers are only allowed on certain types + */ +static void +validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + const glsl_type *type, + ir_variable *var) +{ + if (var && !var->is_in_buffer_block()) { + /* Layout qualifiers may only apply to interface blocks and fields in + * them. + */ + _mesa_glsl_error(loc, state, + "uniform block layout qualifiers row_major and " + "column_major may not be applied to variables " + "outside of uniform blocks"); + } else if (!type->without_array()->is_matrix()) { + /* The OpenGL ES 3.0 conformance tests did not originally allow + * matrix layout qualifiers on non-matrices. However, the OpenGL + * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were + * amended to specifically allow these layouts on all types. Emit + * a warning so that people know their code may not be portable. + */ + _mesa_glsl_warning(loc, state, + "uniform block layout qualifiers row_major and " + "column_major applied to non-matrix types may " + "be rejected by older compilers"); + } +} + +static bool +process_qualifier_constant(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + const char *qual_indentifier, + ast_expression *const_expression, + unsigned *value) +{ + exec_list dummy_instructions; + + if (const_expression == NULL) { + *value = 0; + return true; + } + + ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); + + ir_constant *const const_int = ir->constant_expression_value(); + if (const_int == NULL || !const_int->type->is_integer()) { + _mesa_glsl_error(loc, state, "%s must be an integral constant " + "expression", qual_indentifier); + return false; + } + + if (const_int->value.i[0] < 0) { + _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)", + qual_indentifier, const_int->value.u[0]); + return false; + } + + /* If the location is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the location isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + + *value = const_int->value.u[0]; + return true; +} + +static bool +validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state, + unsigned stream) +{ + if (stream >= state->ctx->Const.MaxVertexStreams) { + _mesa_glsl_error(loc, state, + "invalid stream specified %d is larger than " + "MAX_VERTEX_STREAMS - 1 (%d).", + stream, state->ctx->Const.MaxVertexStreams - 1); + return false; + } + + return true; +} + +static void +apply_explicit_binding(struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + ir_variable *var, + const glsl_type *type, + const ast_type_qualifier *qual) +{ + if (!qual->flags.q.uniform && !qual->flags.q.buffer) { + _mesa_glsl_error(loc, state, + "the \"binding\" qualifier only applies to uniforms and " + "shader storage buffer objects"); + return; + } + + unsigned qual_binding; + if (!process_qualifier_constant(state, loc, "binding", qual->binding, + &qual_binding)) { + return; + } + + const struct gl_context *const ctx = state->ctx; + unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1; + unsigned max_index = qual_binding + elements - 1; + const glsl_type *base_type = type->without_array(); + + if (base_type->is_interface()) { + /* UBOs. From page 60 of the GLSL 4.20 specification: + * "If the binding point for any uniform block instance is less than zero, + * or greater than or equal to the implementation-dependent maximum + * number of uniform buffer bindings, a compilation error will occur. + * When the binding identifier is used with a uniform block instanced as + * an array of size N, all elements of the array from binding through + * binding + N – 1 must be within this range." + * + * The implementation-dependent maximum is GL_MAX_UNIFORM_BUFFER_BINDINGS. + */ + if (qual->flags.q.uniform && + max_index >= ctx->Const.MaxUniformBufferBindings) { + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds " + "the maximum number of UBO binding points (%d)", + qual_binding, elements, + ctx->Const.MaxUniformBufferBindings); + return; + } + + /* SSBOs. From page 67 of the GLSL 4.30 specification: + * "If the binding point for any uniform or shader storage block instance + * is less than zero, or greater than or equal to the + * implementation-dependent maximum number of uniform buffer bindings, a + * compile-time error will occur. When the binding identifier is used + * with a uniform or shader storage block instanced as an array of size + * N, all elements of the array from binding through binding + N – 1 must + * be within this range." + */ + if (qual->flags.q.buffer && + max_index >= ctx->Const.MaxShaderStorageBufferBindings) { + _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds " + "the maximum number of SSBO binding points (%d)", + qual_binding, elements, + ctx->Const.MaxShaderStorageBufferBindings); + return; + } + } else if (base_type->is_sampler()) { + /* Samplers. From page 63 of the GLSL 4.20 specification: + * "If the binding is less than zero, or greater than or equal to the + * implementation-dependent maximum supported number of units, a + * compilation error will occur. When the binding identifier is used + * with an array of size N, all elements of the array from binding + * through binding + N - 1 must be within this range." + */ + unsigned limit = ctx->Const.MaxCombinedTextureImageUnits; + + if (max_index >= limit) { + _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers " + "exceeds the maximum number of texture image units " + "(%u)", qual_binding, elements, limit); + + return; + } + } else if (base_type->contains_atomic()) { + assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS); + if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) { + _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the " + " maximum number of atomic counter buffer bindings" + "(%u)", qual_binding, + ctx->Const.MaxAtomicBufferBindings); + + return; + } + } else if ((state->is_version(420, 310) || + state->ARB_shading_language_420pack_enable) && + base_type->is_image()) { + assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS); + if (max_index >= ctx->Const.MaxImageUnits) { + _mesa_glsl_error(loc, state, "Image binding %d exceeds the " + " maximum number of image units (%d)", max_index, + ctx->Const.MaxImageUnits); + return; + } + + } else { + _mesa_glsl_error(loc, state, + "the \"binding\" qualifier only applies to uniform " + "blocks, opaque variables, or arrays thereof"); + return; + } + + var->data.explicit_binding = true; + var->data.binding = qual_binding; + + return; +} + + +static glsl_interp_qualifier +interpret_interpolation_qualifier(const struct ast_type_qualifier *qual, + ir_variable_mode mode, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + glsl_interp_qualifier interpolation; + if (qual->flags.q.flat) + interpolation = INTERP_QUALIFIER_FLAT; + else if (qual->flags.q.noperspective) + interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + else if (qual->flags.q.smooth) + interpolation = INTERP_QUALIFIER_SMOOTH; + else + interpolation = INTERP_QUALIFIER_NONE; + + if (interpolation != INTERP_QUALIFIER_NONE) { + if (mode != ir_var_shader_in && mode != ir_var_shader_out) { + _mesa_glsl_error(loc, state, + "interpolation qualifier `%s' can only be applied to " + "shader inputs or outputs.", + interpolation_string(interpolation)); + + } + + if ((state->stage == MESA_SHADER_VERTEX && mode == ir_var_shader_in) || + (state->stage == MESA_SHADER_FRAGMENT && mode == ir_var_shader_out)) { + _mesa_glsl_error(loc, state, + "interpolation qualifier `%s' cannot be applied to " + "vertex shader inputs or fragment shader outputs", + interpolation_string(interpolation)); + } + } + + return interpolation; +} + + +static void +apply_explicit_location(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + bool fail = false; + + unsigned qual_location; + if (!process_qualifier_constant(state, loc, "location", qual->location, + &qual_location)) { + return; + } + + /* Checks for GL_ARB_explicit_uniform_location. */ + if (qual->flags.q.uniform) { + if (!state->check_explicit_uniform_location_allowed(loc, var)) + return; + + const struct gl_context *const ctx = state->ctx; + unsigned max_loc = qual_location + var->type->uniform_locations() - 1; + + if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) { + _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s " + ">= MAX_UNIFORM_LOCATIONS (%u)", var->name, + ctx->Const.MaxUserAssignableUniformLocations); + return; + } + + var->data.explicit_location = true; + var->data.location = qual_location; + return; + } + + /* Between GL_ARB_explicit_attrib_location an + * GL_ARB_separate_shader_objects, the inputs and outputs of any shader + * stage can be assigned explicit locations. The checking here associates + * the correct extension with the correct stage's input / output: + * + * input output + * ----- ------ + * vertex explicit_loc sso + * tess control sso sso + * tess eval sso sso + * geometry sso sso + * fragment sso explicit_loc + */ + switch (state->stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == ir_var_shader_in) { + if (!state->check_explicit_attrib_location_allowed(loc, var)) + return; + + break; + } + + if (var->data.mode == ir_var_shader_out) { + if (!state->check_separate_shader_objects_allowed(loc, var)) + return; + + break; + } + + fail = true; + break; + + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if (var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) { + if (!state->check_separate_shader_objects_allowed(loc, var)) + return; + + break; + } + + fail = true; + break; + + case MESA_SHADER_FRAGMENT: + if (var->data.mode == ir_var_shader_in) { + if (!state->check_separate_shader_objects_allowed(loc, var)) + return; + + break; + } + + if (var->data.mode == ir_var_shader_out) { + if (!state->check_explicit_attrib_location_allowed(loc, var)) + return; + + break; + } + + fail = true; + break; + + case MESA_SHADER_COMPUTE: + _mesa_glsl_error(loc, state, + "compute shader variables cannot be given " + "explicit locations"); + return; + }; + + if (fail) { + _mesa_glsl_error(loc, state, + "%s cannot be given an explicit location in %s shader", + mode_string(var), + _mesa_shader_stage_to_string(state->stage)); + } else { + var->data.explicit_location = true; + + switch (state->stage) { + case MESA_SHADER_VERTEX: + var->data.location = (var->data.mode == ir_var_shader_in) + ? (qual_location + VERT_ATTRIB_GENERIC0) + : (qual_location + VARYING_SLOT_VAR0); + break; + + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if (var->data.patch) + var->data.location = qual_location + VARYING_SLOT_PATCH0; + else + var->data.location = qual_location + VARYING_SLOT_VAR0; + break; + + case MESA_SHADER_FRAGMENT: + var->data.location = (var->data.mode == ir_var_shader_out) + ? (qual_location + FRAG_RESULT_DATA0) + : (qual_location + VARYING_SLOT_VAR0); + break; + case MESA_SHADER_COMPUTE: + assert(!"Unexpected shader type"); + break; + } + + /* Check if index was set for the uniform instead of the function */ + if (qual->flags.q.explicit_index && qual->flags.q.subroutine) { + _mesa_glsl_error(loc, state, "an index qualifier can only be " + "used with subroutine functions"); + return; + } + + unsigned qual_index; + if (qual->flags.q.explicit_index && + process_qualifier_constant(state, loc, "index", qual->index, + &qual_index)) { + /* From the GLSL 4.30 specification, section 4.4.2 (Output + * Layout Qualifiers): + * + * "It is also a compile-time error if a fragment shader + * sets a layout index to less than 0 or greater than 1." + * + * Older specifications don't mandate a behavior; we take + * this as a clarification and always generate the error. + */ + if (qual_index > 1) { + _mesa_glsl_error(loc, state, + "explicit index may only be 0 or 1"); + } else { + var->data.explicit_index = true; + var->data.index = qual_index; + } + } + } +} + +static void +apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + const glsl_type *base_type = var->type->without_array(); + + if (base_type->is_image()) { + if (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_function_in) { + _mesa_glsl_error(loc, state, "image variables may only be declared as " + "function parameters or uniform-qualified " + "global variables"); + } + + var->data.image_read_only |= qual->flags.q.read_only; + var->data.image_write_only |= qual->flags.q.write_only; + var->data.image_coherent |= qual->flags.q.coherent; + var->data.image_volatile |= qual->flags.q._volatile; + var->data.image_restrict |= qual->flags.q.restrict_flag; + var->data.read_only = true; + + if (qual->flags.q.explicit_image_format) { + if (var->data.mode == ir_var_function_in) { + _mesa_glsl_error(loc, state, "format qualifiers cannot be " + "used on image function parameters"); + } + + if (qual->image_base_type != base_type->sampler_type) { + _mesa_glsl_error(loc, state, "format qualifier doesn't match the " + "base data type of the image"); + } + + var->data.image_format = qual->image_format; + } else { + if (var->data.mode == ir_var_uniform) { + if (state->es_shader) { + _mesa_glsl_error(loc, state, "all image uniforms " + "must have a format layout qualifier"); + + } else if (!qual->flags.q.write_only) { + _mesa_glsl_error(loc, state, "image uniforms not qualified with " + "`writeonly' must have a format layout " + "qualifier"); + } + } + + var->data.image_format = GL_NONE; + } + + /* From page 70 of the GLSL ES 3.1 specification: + * + * "Except for image variables qualified with the format qualifiers + * r32f, r32i, and r32ui, image variables must specify either memory + * qualifier readonly or the memory qualifier writeonly." + */ + if (state->es_shader && + var->data.image_format != GL_R32F && + var->data.image_format != GL_R32I && + var->data.image_format != GL_R32UI && + !var->data.image_read_only && + !var->data.image_write_only) { + _mesa_glsl_error(loc, state, "image variables of format other than " + "r32f, r32i or r32ui must be qualified `readonly' or " + "`writeonly'"); + } + + } else if (qual->flags.q.read_only || + qual->flags.q.write_only || + qual->flags.q.coherent || + qual->flags.q._volatile || + qual->flags.q.restrict_flag || + qual->flags.q.explicit_image_format) { + _mesa_glsl_error(loc, state, "memory qualifiers may only be applied to " + "images"); + } +} + +static inline const char* +get_layout_qualifier_string(bool origin_upper_left, bool pixel_center_integer) +{ + if (origin_upper_left && pixel_center_integer) + return "origin_upper_left, pixel_center_integer"; + else if (origin_upper_left) + return "origin_upper_left"; + else if (pixel_center_integer) + return "pixel_center_integer"; + else + return " "; +} + +static inline bool +is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state, + const struct ast_type_qualifier *qual) +{ + /* If gl_FragCoord was previously declared, and the qualifiers were + * different in any way, return true. + */ + if (state->fs_redeclares_gl_fragcoord) { + return (state->fs_pixel_center_integer != qual->flags.q.pixel_center_integer + || state->fs_origin_upper_left != qual->flags.q.origin_upper_left); + } + + return false; +} + +static inline void +validate_array_dimensions(const glsl_type *t, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) { + if (t->is_array()) { + t = t->fields.array; + while (t->is_array()) { + if (t->is_unsized_array()) { + _mesa_glsl_error(loc, state, + "only the outermost array dimension can " + "be unsized", + t->name); + break; + } + t = t->fields.array; + } + } +} + +static void +apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc) +{ + if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) { + + /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says: + * + * "Within any shader, the first redeclarations of gl_FragCoord + * must appear before any use of gl_FragCoord." + * + * Generate a compiler error if above condition is not met by the + * fragment shader. + */ + ir_variable *earlier = state->symbols->get_variable("gl_FragCoord"); + if (earlier != NULL && + earlier->data.used && + !state->fs_redeclares_gl_fragcoord) { + _mesa_glsl_error(loc, state, + "gl_FragCoord used before its first redeclaration " + "in fragment shader"); + } + + /* Make sure all gl_FragCoord redeclarations specify the same layout + * qualifiers. + */ + if (is_conflicting_fragcoord_redeclaration(state, qual)) { + const char *const qual_string = + get_layout_qualifier_string(qual->flags.q.origin_upper_left, + qual->flags.q.pixel_center_integer); + + const char *const state_string = + get_layout_qualifier_string(state->fs_origin_upper_left, + state->fs_pixel_center_integer); + + _mesa_glsl_error(loc, state, + "gl_FragCoord redeclared with different layout " + "qualifiers (%s) and (%s) ", + state_string, + qual_string); + } + state->fs_origin_upper_left = qual->flags.q.origin_upper_left; + state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = + !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer; + state->fs_redeclares_gl_fragcoord = + state->fs_origin_upper_left || + state->fs_pixel_center_integer || + state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; + } + + var->data.pixel_center_integer = qual->flags.q.pixel_center_integer; + var->data.origin_upper_left = qual->flags.q.origin_upper_left; + if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer) + && (strcmp(var->name, "gl_FragCoord") != 0)) { + const char *const qual_string = (qual->flags.q.origin_upper_left) + ? "origin_upper_left" : "pixel_center_integer"; + + _mesa_glsl_error(loc, state, + "layout qualifier `%s' can only be applied to " + "fragment shader input `gl_FragCoord'", + qual_string); + } + + if (qual->flags.q.explicit_location) { + apply_explicit_location(qual, var, state, loc); + } else if (qual->flags.q.explicit_index) { + if (!qual->flags.q.subroutine_def) + _mesa_glsl_error(loc, state, + "explicit index requires explicit location"); + } + + if (qual->flags.q.explicit_binding) { + apply_explicit_binding(state, loc, var, var->type, qual); + } + + if (state->stage == MESA_SHADER_GEOMETRY && + qual->flags.q.out && qual->flags.q.stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, loc, "stream", qual->stream, + &qual_stream) && + validate_stream_qualifier(loc, state, qual_stream)) { + var->data.stream = qual_stream; + } + } + + if (var->type->contains_atomic()) { + if (var->data.mode == ir_var_uniform) { + if (var->data.explicit_binding) { + unsigned *offset = + &state->atomic_counter_offsets[var->data.binding]; + + if (*offset % ATOMIC_COUNTER_SIZE) + _mesa_glsl_error(loc, state, + "misaligned atomic counter offset"); + + var->data.offset = *offset; + *offset += var->type->atomic_size(); + + } else { + _mesa_glsl_error(loc, state, + "atomic counters require explicit binding point"); + } + } else if (var->data.mode != ir_var_function_in) { + _mesa_glsl_error(loc, state, "atomic counters may only be declared as " + "function parameters or uniform-qualified " + "global variables"); + } + } + + /* Is the 'layout' keyword used with parameters that allow relaxed checking. + * Many implementations of GL_ARB_fragment_coord_conventions_enable and some + * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable + * allowed the layout qualifier to be used with 'varying' and 'attribute'. + * These extensions and all following extensions that add the 'layout' + * keyword have been modified to require the use of 'in' or 'out'. + * + * The following extension do not allow the deprecated keywords: + * + * GL_AMD_conservative_depth + * GL_ARB_conservative_depth + * GL_ARB_gpu_shader5 + * GL_ARB_separate_shader_objects + * GL_ARB_tessellation_shader + * GL_ARB_transform_feedback3 + * GL_ARB_uniform_buffer_object + * + * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5 + * allow layout with the deprecated keywords. + */ + const bool relaxed_layout_qualifier_checking = + state->ARB_fragment_coord_conventions_enable; + + const bool uses_deprecated_qualifier = qual->flags.q.attribute + || qual->flags.q.varying; + if (qual->has_layout() && uses_deprecated_qualifier) { + if (relaxed_layout_qualifier_checking) { + _mesa_glsl_warning(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } else { + _mesa_glsl_error(loc, state, + "`layout' qualifier may not be used with " + "`attribute' or `varying'"); + } + } + + /* Layout qualifiers for gl_FragDepth, which are enabled by extension + * AMD_conservative_depth. + */ + int depth_layout_count = qual->flags.q.depth_any + + qual->flags.q.depth_greater + + qual->flags.q.depth_less + + qual->flags.q.depth_unchanged; + if (depth_layout_count > 0 + && !state->AMD_conservative_depth_enable + && !state->ARB_conservative_depth_enable) { + _mesa_glsl_error(loc, state, + "extension GL_AMD_conservative_depth or " + "GL_ARB_conservative_depth must be enabled " + "to use depth layout qualifiers"); + } else if (depth_layout_count > 0 + && strcmp(var->name, "gl_FragDepth") != 0) { + _mesa_glsl_error(loc, state, + "depth layout qualifiers can be applied only to " + "gl_FragDepth"); + } else if (depth_layout_count > 1 + && strcmp(var->name, "gl_FragDepth") == 0) { + _mesa_glsl_error(loc, state, + "at most one depth layout qualifier can be applied to " + "gl_FragDepth"); + } + if (qual->flags.q.depth_any) + var->data.depth_layout = ir_depth_layout_any; + else if (qual->flags.q.depth_greater) + var->data.depth_layout = ir_depth_layout_greater; + else if (qual->flags.q.depth_less) + var->data.depth_layout = ir_depth_layout_less; + else if (qual->flags.q.depth_unchanged) + var->data.depth_layout = ir_depth_layout_unchanged; + else + var->data.depth_layout = ir_depth_layout_none; + + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(loc, state, + "uniform and shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform or shader storage blocks, not " + "members"); + } + + if (qual->flags.q.row_major || qual->flags.q.column_major) { + validate_matrix_layout_for_type(state, loc, var->type, var); + } + + /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader + * Inputs): + * + * "Fragment shaders also allow the following layout qualifier on in only + * (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (qual->flags.q.early_fragment_tests) { + _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " + "valid in fragment shader input layout declaration."); + } +} + +static void +apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, + ir_variable *var, + struct _mesa_glsl_parse_state *state, + YYLTYPE *loc, + bool is_parameter) +{ + STATIC_ASSERT(sizeof(qual->flags.q) <= sizeof(qual->flags.i)); + + if (qual->flags.q.invariant) { + if (var->data.used) { + _mesa_glsl_error(loc, state, + "variable `%s' may not be redeclared " + "`invariant' after being used", + var->name); + } else { + var->data.invariant = 1; + } + } + + if (qual->flags.q.precise) { + if (var->data.used) { + _mesa_glsl_error(loc, state, + "variable `%s' may not be redeclared " + "`precise' after being used", + var->name); + } else { + var->data.precise = 1; + } + } + + if (qual->flags.q.subroutine && !qual->flags.q.uniform) { + _mesa_glsl_error(loc, state, + "`subroutine' may only be applied to uniforms, " + "subroutine type declarations, or function definitions"); + } + + if (qual->flags.q.constant || qual->flags.q.attribute + || qual->flags.q.uniform + || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT))) + var->data.read_only = 1; + + if (qual->flags.q.centroid) + var->data.centroid = 1; + + if (qual->flags.q.sample) + var->data.sample = 1; + + /* Precision qualifiers do not hold any meaning in Desktop GLSL */ + if (state->es_shader) { + var->data.precision = + select_gles_precision(qual->precision, var->type, state, loc); + } + + if (qual->flags.q.patch) + var->data.patch = 1; + + if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) { + var->type = glsl_type::error_type; + _mesa_glsl_error(loc, state, + "`attribute' variables may not be declared in the " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } + + /* Disallow layout qualifiers which may only appear on layout declarations. */ + if (qual->flags.q.prim_type) { + _mesa_glsl_error(loc, state, + "Primitive type may only be specified on GS input or output " + "layout declaration, not on variables."); + } + + /* Section 6.1.1 (Function Calling Conventions) of the GLSL 1.10 spec says: + * + * "However, the const qualifier cannot be used with out or inout." + * + * The same section of the GLSL 4.40 spec further clarifies this saying: + * + * "The const qualifier cannot be used with out or inout, or a + * compile-time error results." + */ + if (is_parameter && qual->flags.q.constant && qual->flags.q.out) { + _mesa_glsl_error(loc, state, + "`const' may not be applied to `out' or `inout' " + "function parameters"); + } + + /* If there is no qualifier that changes the mode of the variable, leave + * the setting alone. + */ + assert(var->data.mode != ir_var_temporary); + if (qual->flags.q.in && qual->flags.q.out) + var->data.mode = ir_var_function_inout; + else if (qual->flags.q.in) + var->data.mode = is_parameter ? ir_var_function_in : ir_var_shader_in; + else if (qual->flags.q.attribute + || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT))) + var->data.mode = ir_var_shader_in; + else if (qual->flags.q.out) + var->data.mode = is_parameter ? ir_var_function_out : ir_var_shader_out; + else if (qual->flags.q.varying && (state->stage == MESA_SHADER_VERTEX)) + var->data.mode = ir_var_shader_out; + else if (qual->flags.q.uniform) + var->data.mode = ir_var_uniform; + else if (qual->flags.q.buffer) + var->data.mode = ir_var_shader_storage; + else if (qual->flags.q.shared_storage) + var->data.mode = ir_var_shader_shared; + + if (!is_parameter && is_varying_var(var, state->stage)) { + /* User-defined ins/outs are not permitted in compute shaders. */ + if (state->stage == MESA_SHADER_COMPUTE) { + _mesa_glsl_error(loc, state, + "user-defined input and output variables are not " + "permitted in compute shaders"); + } + + /* This variable is being used to link data between shader stages (in + * pre-glsl-1.30 parlance, it's a "varying"). Check that it has a type + * that is allowed for such purposes. + * + * From page 25 (page 31 of the PDF) of the GLSL 1.10 spec: + * + * "The varying qualifier can be used only with the data types + * float, vec2, vec3, vec4, mat2, mat3, and mat4, or arrays of + * these." + * + * This was relaxed in GLSL version 1.30 and GLSL ES version 3.00. From + * page 31 (page 37 of the PDF) of the GLSL 1.30 spec: + * + * "Fragment inputs can only be signed and unsigned integers and + * integer vectors, float, floating-point vectors, matrices, or + * arrays of these. Structures cannot be input. + * + * Similar text exists in the section on vertex shader outputs. + * + * Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES + * 3.00 spec allows structs as well. Varying structs are also allowed + * in GLSL 1.50. + */ + switch (var->type->get_scalar_type()->base_type) { + case GLSL_TYPE_FLOAT: + /* Ok in all GLSL versions */ + break; + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + if (state->is_version(130, 300)) + break; + _mesa_glsl_error(loc, state, + "varying variables must be of base type float in %s", + state->get_version_string()); + break; + case GLSL_TYPE_STRUCT: + if (state->is_version(150, 300)) + break; + _mesa_glsl_error(loc, state, + "varying variables may not be of type struct"); + break; + case GLSL_TYPE_DOUBLE: + break; + default: + _mesa_glsl_error(loc, state, "illegal type for a varying variable"); + break; + } + } + + if (state->all_invariant && (state->current_function == NULL)) { + switch (state->stage) { + case MESA_SHADER_VERTEX: + if (var->data.mode == ir_var_shader_out) + var->data.invariant = true; + break; + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + if ((var->data.mode == ir_var_shader_in) + || (var->data.mode == ir_var_shader_out)) + var->data.invariant = true; + break; + case MESA_SHADER_FRAGMENT: + if (var->data.mode == ir_var_shader_in) + var->data.invariant = true; + break; + case MESA_SHADER_COMPUTE: + /* Invariance isn't meaningful in compute shaders. */ + break; + } + } + + var->data.interpolation = + interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode, + state, loc); + + /* Does the declaration use the deprecated 'attribute' or 'varying' + * keywords? + */ + const bool uses_deprecated_qualifier = qual->flags.q.attribute + || qual->flags.q.varying; + + + /* Validate auxiliary storage qualifiers */ + + /* From section 4.3.4 of the GLSL 1.30 spec: + * "It is an error to use centroid in in a vertex shader." + * + * From section 4.3.4 of the GLSL ES 3.00 spec: + * "It is an error to use centroid in or interpolation qualifiers in + * a vertex shader input." + */ + + /* Section 4.3.6 of the GLSL 1.30 specification states: + * "It is an error to use centroid out in a fragment shader." + * + * The GL_ARB_shading_language_420pack extension specification states: + * "It is an error to use auxiliary storage qualifiers or interpolation + * qualifiers on an output in a fragment shader." + */ + if (qual->flags.q.sample && (!is_varying_var(var, state->stage) || uses_deprecated_qualifier)) { + _mesa_glsl_error(loc, state, + "sample qualifier may only be used on `in` or `out` " + "variables between shader stages"); + } + if (qual->flags.q.centroid && !is_varying_var(var, state->stage)) { + _mesa_glsl_error(loc, state, + "centroid qualifier may only be used with `in', " + "`out' or `varying' variables between shader stages"); + } + + if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) { + _mesa_glsl_error(loc, state, + "the shared storage qualifiers can only be used with " + "compute shaders"); + } + + apply_image_qualifier_to_variable(qual, var, state, loc); +} + +/** + * Get the variable that is being redeclared by this declaration + * + * Semantic checks to verify the validity of the redeclaration are also + * performed. If semantic checks fail, compilation error will be emitted via + * \c _mesa_glsl_error, but a non-\c NULL pointer will still be returned. + * + * \returns + * A pointer to an existing variable in the current scope if the declaration + * is a redeclaration, \c NULL otherwise. + */ +static ir_variable * +get_variable_being_redeclared(ir_variable *var, YYLTYPE loc, + struct _mesa_glsl_parse_state *state, + bool allow_all_redeclarations) +{ + /* Check if this declaration is actually a re-declaration, either to + * resize an array or add qualifiers to an existing variable. + * + * This is allowed for variables in the current scope, or when at + * global scope (for built-ins in the implicit outer scope). + */ + ir_variable *earlier = state->symbols->get_variable(var->name); + if (earlier == NULL || + (state->current_function != NULL && + !state->symbols->name_declared_this_scope(var->name))) { + return NULL; + } + + + /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec, + * + * "It is legal to declare an array without a size and then + * later re-declare the same name as an array of the same + * type and specify a size." + */ + if (earlier->type->is_unsized_array() && var->type->is_array() + && (var->type->fields.array == earlier->type->fields.array)) { + /* FINISHME: This doesn't match the qualifiers on the two + * FINISHME: declarations. It's not 100% clear whether this is + * FINISHME: required or not. + */ + + const unsigned size = unsigned(var->type->array_size()); + check_builtin_array_max_size(var->name, size, loc, state); + if ((size > 0) && (size <= earlier->data.max_array_access)) { + _mesa_glsl_error(& loc, state, "array size must be > %u due to " + "previous access", + earlier->data.max_array_access); + } + + earlier->type = var->type; + delete var; + var = NULL; + } else if ((state->ARB_fragment_coord_conventions_enable || + state->is_version(150, 0)) + && strcmp(var->name, "gl_FragCoord") == 0 + && earlier->type == var->type + && var->data.mode == ir_var_shader_in) { + /* Allow redeclaration of gl_FragCoord for ARB_fcc layout + * qualifiers. + */ + earlier->data.origin_upper_left = var->data.origin_upper_left; + earlier->data.pixel_center_integer = var->data.pixel_center_integer; + + /* According to section 4.3.7 of the GLSL 1.30 spec, + * the following built-in varaibles can be redeclared with an + * interpolation qualifier: + * * gl_FrontColor + * * gl_BackColor + * * gl_FrontSecondaryColor + * * gl_BackSecondaryColor + * * gl_Color + * * gl_SecondaryColor + */ + } else if (state->is_version(130, 0) + && (strcmp(var->name, "gl_FrontColor") == 0 + || strcmp(var->name, "gl_BackColor") == 0 + || strcmp(var->name, "gl_FrontSecondaryColor") == 0 + || strcmp(var->name, "gl_BackSecondaryColor") == 0 + || strcmp(var->name, "gl_Color") == 0 + || strcmp(var->name, "gl_SecondaryColor") == 0) + && earlier->type == var->type + && earlier->data.mode == var->data.mode) { + earlier->data.interpolation = var->data.interpolation; + + /* Layout qualifiers for gl_FragDepth. */ + } else if ((state->AMD_conservative_depth_enable || + state->ARB_conservative_depth_enable) + && strcmp(var->name, "gl_FragDepth") == 0 + && earlier->type == var->type + && earlier->data.mode == var->data.mode) { + + /** From the AMD_conservative_depth spec: + * Within any shader, the first redeclarations of gl_FragDepth + * must appear before any use of gl_FragDepth. + */ + if (earlier->data.used) { + _mesa_glsl_error(&loc, state, + "the first redeclaration of gl_FragDepth " + "must appear before any use of gl_FragDepth"); + } + + /* Prevent inconsistent redeclaration of depth layout qualifier. */ + if (earlier->data.depth_layout != ir_depth_layout_none + && earlier->data.depth_layout != var->data.depth_layout) { + _mesa_glsl_error(&loc, state, + "gl_FragDepth: depth layout is declared here " + "as '%s, but it was previously declared as " + "'%s'", + depth_layout_string(var->data.depth_layout), + depth_layout_string(earlier->data.depth_layout)); + } + + earlier->data.depth_layout = var->data.depth_layout; + + } else if (allow_all_redeclarations) { + if (earlier->data.mode != var->data.mode) { + _mesa_glsl_error(&loc, state, + "redeclaration of `%s' with incorrect qualifiers", + var->name); + } else if (earlier->type != var->type) { + _mesa_glsl_error(&loc, state, + "redeclaration of `%s' has incorrect type", + var->name); + } + } else { + _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name); + } + + return earlier; +} + +/** + * Generate the IR for an initializer in a variable declaration + */ +ir_rvalue * +process_initializer(ir_variable *var, ast_declaration *decl, + ast_fully_specified_type *type, + exec_list *initializer_instructions, + struct _mesa_glsl_parse_state *state) +{ + ir_rvalue *result = NULL; + + YYLTYPE initializer_loc = decl->initializer->get_location(); + + /* From page 24 (page 30 of the PDF) of the GLSL 1.10 spec: + * + * "All uniform variables are read-only and are initialized either + * directly by an application via API commands, or indirectly by + * OpenGL." + */ + if (var->data.mode == ir_var_uniform) { + state->check_version(120, 0, &initializer_loc, + "cannot initialize uniform %s", + var->name); + } + + /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec: + * + * "Buffer variables cannot have initializers." + */ + if (var->data.mode == ir_var_shader_storage) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize buffer variable %s", + var->name); + } + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "Opaque variables [...] are initialized only through the + * OpenGL API; they cannot be declared with an initializer in a + * shader." + */ + if (var->type->contains_opaque()) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize opaque variable %s", + var->name); + } + + if ((var->data.mode == ir_var_shader_in) && (state->current_function == NULL)) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize %s shader input / %s %s", + _mesa_shader_stage_to_string(state->stage), + (state->stage == MESA_SHADER_VERTEX) + ? "attribute" : "varying", + var->name); + } + + if (var->data.mode == ir_var_shader_out && state->current_function == NULL) { + _mesa_glsl_error(&initializer_loc, state, + "cannot initialize %s shader output %s", + _mesa_shader_stage_to_string(state->stage), + var->name); + } + + /* If the initializer is an ast_aggregate_initializer, recursively store + * type information from the LHS into it, so that its hir() function can do + * type checking. + */ + if (decl->initializer->oper == ast_aggregate) + _mesa_ast_set_aggregate_type(var->type, decl->initializer); + + ir_dereference *const lhs = new(state) ir_dereference_variable(var); + ir_rvalue *rhs = decl->initializer->hir(initializer_instructions, state); + + /* Calculate the constant value if this is a const or uniform + * declaration. + * + * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says: + * + * "Declarations of globals without a storage qualifier, or with + * just the const qualifier, may include initializers, in which case + * they will be initialized before the first line of main() is + * executed. Such initializers must be a constant expression." + * + * The same section of the GLSL ES 3.00.4 spec has similar language. + */ + if (type->qualifier.flags.q.constant + || type->qualifier.flags.q.uniform + || (state->es_shader && state->current_function == NULL)) { + ir_rvalue *new_rhs = validate_assignment(state, initializer_loc, + lhs, rhs, true); + if (new_rhs != NULL) { + rhs = new_rhs; + + /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec + * says: + * + * "A constant expression is one of + * + * ... + * + * - an expression formed by an operator on operands that are + * all constant expressions, including getting an element of + * a constant array, or a field of a constant structure, or + * components of a constant vector. However, the sequence + * operator ( , ) and the assignment operators ( =, +=, ...) + * are not included in the operators that can create a + * constant expression." + * + * Section 12.43 (Sequence operator and constant expressions) says: + * + * "Should the following construct be allowed? + * + * float a[2,3]; + * + * The expression within the brackets uses the sequence operator + * (',') and returns the integer 3 so the construct is declaring + * a single-dimensional array of size 3. In some languages, the + * construct declares a two-dimensional array. It would be + * preferable to make this construct illegal to avoid confusion. + * + * One possibility is to change the definition of the sequence + * operator so that it does not return a constant-expression and + * hence cannot be used to declare an array size. + * + * RESOLUTION: The result of a sequence operator is not a + * constant-expression." + * + * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec + * contains language almost identical to the section 4.3.3 in the + * GLSL ES 3.00.4 spec. This is a new limitation for these GLSL + * versions. + */ + ir_constant *constant_value = rhs->constant_expression_value(); + if (!constant_value || + (state->is_version(430, 300) && + decl->initializer->has_sequence_subexpression())) { + const char *const variable_mode = + (type->qualifier.flags.q.constant) + ? "const" + : ((type->qualifier.flags.q.uniform) ? "uniform" : "global"); + + /* If ARB_shading_language_420pack is enabled, initializers of + * const-qualified local variables do not have to be constant + * expressions. Const-qualified global variables must still be + * initialized with constant expressions. + */ + if (!state->has_420pack() + || state->current_function == NULL) { + _mesa_glsl_error(& initializer_loc, state, + "initializer of %s variable `%s' must be a " + "constant expression", + variable_mode, + decl->identifier); + if (var->type->is_numeric()) { + /* Reduce cascading errors. */ + var->constant_value = type->qualifier.flags.q.constant + ? ir_constant::zero(state, var->type) : NULL; + } + } + } else { + rhs = constant_value; + var->constant_value = type->qualifier.flags.q.constant + ? constant_value : NULL; + } + } else { + if (var->type->is_numeric()) { + /* Reduce cascading errors. */ + var->constant_value = type->qualifier.flags.q.constant + ? ir_constant::zero(state, var->type) : NULL; + } + } + } + + if (rhs && !rhs->type->is_error()) { + bool temp = var->data.read_only; + if (type->qualifier.flags.q.constant) + var->data.read_only = false; + + /* Never emit code to initialize a uniform. + */ + const glsl_type *initializer_type; + if (!type->qualifier.flags.q.uniform) { + do_assignment(initializer_instructions, state, + NULL, + lhs, rhs, + &result, true, + true, + type->get_location()); + initializer_type = result->type; + } else + initializer_type = rhs->type; + + var->constant_initializer = rhs->constant_expression_value(); + var->data.has_initializer = true; + + /* If the declared variable is an unsized array, it must inherrit + * its full type from the initializer. A declaration such as + * + * uniform float a[] = float[](1.0, 2.0, 3.0, 3.0); + * + * becomes + * + * uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0); + * + * The assignment generated in the if-statement (below) will also + * automatically handle this case for non-uniforms. + * + * If the declared variable is not an array, the types must + * already match exactly. As a result, the type assignment + * here can be done unconditionally. For non-uniforms the call + * to do_assignment can change the type of the initializer (via + * the implicit conversion rules). For uniforms the initializer + * must be a constant expression, and the type of that expression + * was validated above. + */ + var->type = initializer_type; + + var->data.read_only = temp; + } + + return result; +} + +static void +validate_layout_qualifier_vertex_count(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var, + unsigned num_vertices, + unsigned *size, + const char *var_category) +{ + if (var->type->is_unsized_array()) { + /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec says: + * + * All geometry shader input unsized array declarations will be + * sized by an earlier input layout qualifier, when present, as per + * the following table. + * + * Followed by a table mapping each allowed input layout qualifier to + * the corresponding input length. + * + * Similarly for tessellation control shader outputs. + */ + if (num_vertices != 0) + var->type = glsl_type::get_array_instance(var->type->fields.array, + num_vertices); + } else { + /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec + * includes the following examples of compile-time errors: + * + * // code sequence within one shader... + * in vec4 Color1[]; // size unknown + * ...Color1.length()...// illegal, length() unknown + * in vec4 Color2[2]; // size is 2 + * ...Color1.length()...// illegal, Color1 still has no size + * in vec4 Color3[3]; // illegal, input sizes are inconsistent + * layout(lines) in; // legal, input size is 2, matching + * in vec4 Color4[3]; // illegal, contradicts layout + * ... + * + * To detect the case illustrated by Color3, we verify that the size of + * an explicitly-sized array matches the size of any previously declared + * explicitly-sized array. To detect the case illustrated by Color4, we + * verify that the size of an explicitly-sized array is consistent with + * any previously declared input layout. + */ + if (num_vertices != 0 && var->type->length != num_vertices) { + _mesa_glsl_error(&loc, state, + "%s size contradicts previously declared layout " + "(size is %u, but layout requires a size of %u)", + var_category, var->type->length, num_vertices); + } else if (*size != 0 && var->type->length != *size) { + _mesa_glsl_error(&loc, state, + "%s sizes are inconsistent (size is %u, but a " + "previous declaration has size %u)", + var_category, var->type->length, *size); + } else { + *size = var->type->length; + } + } +} + +static void +handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var) +{ + unsigned num_vertices = 0; + + if (state->tcs_output_vertices_specified) { + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", + &num_vertices, false)) { + return; + } + + if (num_vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", num_vertices); + return; + } + } + + if (!var->type->is_array() && !var->data.patch) { + _mesa_glsl_error(&loc, state, + "tessellation control shader outputs must be arrays"); + + /* To avoid cascading failures, short circuit the checks below. */ + return; + } + + if (var->data.patch) + return; + + validate_layout_qualifier_vertex_count(state, loc, var, num_vertices, + &state->tcs_output_size, + "tessellation control shader output"); +} + +/** + * Do additional processing necessary for tessellation control/evaluation shader + * input declarations. This covers both interface block arrays and bare input + * variables. + */ +static void +handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var) +{ + if (!var->type->is_array() && !var->data.patch) { + _mesa_glsl_error(&loc, state, + "per-vertex tessellation shader inputs must be arrays"); + /* Avoid cascading failures. */ + return; + } + + if (var->data.patch) + return; + + /* Unsized arrays are implicitly sized to gl_MaxPatchVertices. */ + if (var->type->is_unsized_array()) { + var->type = glsl_type::get_array_instance(var->type->fields.array, + state->Const.MaxPatchVertices); + } +} + + +/** + * Do additional processing necessary for geometry shader input declarations + * (this covers both interface blocks arrays and bare input variables). + */ +static void +handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state, + YYLTYPE loc, ir_variable *var) +{ + unsigned num_vertices = 0; + + if (state->gs_input_prim_type_specified) { + num_vertices = vertices_per_prim(state->in_qualifier->prim_type); + } + + /* Geometry shader input variables must be arrays. Caller should have + * reported an error for this. + */ + if (!var->type->is_array()) { + assert(state->error); + + /* To avoid cascading failures, short circuit the checks below. */ + return; + } + + validate_layout_qualifier_vertex_count(state, loc, var, num_vertices, + &state->gs_input_size, + "geometry shader input"); +} + +void +validate_identifier(const char *identifier, YYLTYPE loc, + struct _mesa_glsl_parse_state *state) +{ + /* From page 15 (page 21 of the PDF) of the GLSL 1.10 spec, + * + * "Identifiers starting with "gl_" are reserved for use by + * OpenGL, and may not be declared in a shader as either a + * variable or a function." + */ + if (is_gl_identifier(identifier)) { + _mesa_glsl_error(&loc, state, + "identifier `%s' uses reserved `gl_' prefix", + identifier); + } else if (strstr(identifier, "__")) { + /* From page 14 (page 20 of the PDF) of the GLSL 1.10 + * spec: + * + * "In addition, all identifiers containing two + * consecutive underscores (__) are reserved as + * possible future keywords." + * + * The intention is that names containing __ are reserved for internal + * use by the implementation, and names prefixed with GL_ are reserved + * for use by Khronos. Names simply containing __ are dangerous to use, + * but should be allowed. + * + * A future version of the GLSL specification will clarify this. + */ + _mesa_glsl_warning(&loc, state, + "identifier `%s' uses reserved `__' string", + identifier); + } +} + +ir_rvalue * +ast_declarator_list::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + const struct glsl_type *decl_type; + const char *type_name = NULL; + ir_rvalue *result = NULL; + YYLTYPE loc = this->get_location(); + + /* From page 46 (page 52 of the PDF) of the GLSL 1.50 spec: + * + * "To ensure that a particular output variable is invariant, it is + * necessary to use the invariant qualifier. It can either be used to + * qualify a previously declared variable as being invariant + * + * invariant gl_Position; // make existing gl_Position be invariant" + * + * In these cases the parser will set the 'invariant' flag in the declarator + * list, and the type will be NULL. + */ + if (this->invariant) { + assert(this->type == NULL); + + if (state->current_function != NULL) { + _mesa_glsl_error(& loc, state, + "all uses of `invariant' keyword must be at global " + "scope"); + } + + foreach_list_typed (ast_declaration, decl, link, &this->declarations) { + assert(decl->array_specifier == NULL); + assert(decl->initializer == NULL); + + ir_variable *const earlier = + state->symbols->get_variable(decl->identifier); + if (earlier == NULL) { + _mesa_glsl_error(& loc, state, + "undeclared variable `%s' cannot be marked " + "invariant", decl->identifier); + } else if (!is_varying_var(earlier, state->stage)) { + _mesa_glsl_error(&loc, state, + "`%s' cannot be marked invariant; interfaces between " + "shader stages only.", decl->identifier); + } else if (earlier->data.used) { + _mesa_glsl_error(& loc, state, + "variable `%s' may not be redeclared " + "`invariant' after being used", + earlier->name); + } else { + earlier->data.invariant = true; + } + } + + /* Invariant redeclarations do not have r-values. + */ + return NULL; + } + + if (this->precise) { + assert(this->type == NULL); + + foreach_list_typed (ast_declaration, decl, link, &this->declarations) { + assert(decl->array_specifier == NULL); + assert(decl->initializer == NULL); + + ir_variable *const earlier = + state->symbols->get_variable(decl->identifier); + if (earlier == NULL) { + _mesa_glsl_error(& loc, state, + "undeclared variable `%s' cannot be marked " + "precise", decl->identifier); + } else if (state->current_function != NULL && + !state->symbols->name_declared_this_scope(decl->identifier)) { + /* Note: we have to check if we're in a function, since + * builtins are treated as having come from another scope. + */ + _mesa_glsl_error(& loc, state, + "variable `%s' from an outer scope may not be " + "redeclared `precise' in this scope", + earlier->name); + } else if (earlier->data.used) { + _mesa_glsl_error(& loc, state, + "variable `%s' may not be redeclared " + "`precise' after being used", + earlier->name); + } else { + earlier->data.precise = true; + } + } + + /* Precise redeclarations do not have r-values either. */ + return NULL; + } + + assert(this->type != NULL); + assert(!this->invariant); + assert(!this->precise); + + /* The type specifier may contain a structure definition. Process that + * before any of the variable declarations. + */ + (void) this->type->specifier->hir(instructions, state); + + decl_type = this->type->glsl_type(& type_name, state); + + /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec: + * "Buffer variables may only be declared inside interface blocks + * (section 4.3.9 “Interface Blocks”), which are then referred to as + * shader storage blocks. It is a compile-time error to declare buffer + * variables at global scope (outside a block)." + */ + if (type->qualifier.flags.q.buffer && !decl_type->is_interface()) { + _mesa_glsl_error(&loc, state, + "buffer variables cannot be declared outside " + "interface blocks"); + } + + /* An offset-qualified atomic counter declaration sets the default + * offset for the next declaration within the same atomic counter + * buffer. + */ + if (decl_type && decl_type->contains_atomic()) { + if (type->qualifier.flags.q.explicit_binding && + type->qualifier.flags.q.explicit_offset) { + unsigned qual_binding; + unsigned qual_offset; + if (process_qualifier_constant(state, &loc, "binding", + type->qualifier.binding, + &qual_binding) + && process_qualifier_constant(state, &loc, "offset", + type->qualifier.offset, + &qual_offset)) { + state->atomic_counter_offsets[qual_binding] = qual_offset; + } + } + } + + if (this->declarations.is_empty()) { + /* If there is no structure involved in the program text, there are two + * possible scenarios: + * + * - The program text contained something like 'vec4;'. This is an + * empty declaration. It is valid but weird. Emit a warning. + * + * - The program text contained something like 'S;' and 'S' is not the + * name of a known structure type. This is both invalid and weird. + * Emit an error. + * + * - The program text contained something like 'mediump float;' + * when the programmer probably meant 'precision mediump + * float;' Emit a warning with a description of what they + * probably meant to do. + * + * Note that if decl_type is NULL and there is a structure involved, + * there must have been some sort of error with the structure. In this + * case we assume that an error was already generated on this line of + * code for the structure. There is no need to generate an additional, + * confusing error. + */ + assert(this->type->specifier->structure == NULL || decl_type != NULL + || state->error); + + if (decl_type == NULL) { + _mesa_glsl_error(&loc, state, + "invalid type `%s' in empty declaration", + type_name); + } else if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) { + /* Empty atomic counter declarations are allowed and useful + * to set the default offset qualifier. + */ + return NULL; + } else if (this->type->qualifier.precision != ast_precision_none) { + if (this->type->specifier->structure != NULL) { + _mesa_glsl_error(&loc, state, + "precision qualifiers can't be applied " + "to structures"); + } else { + static const char *const precision_names[] = { + "highp", + "highp", + "mediump", + "lowp" + }; + + _mesa_glsl_warning(&loc, state, + "empty declaration with precision qualifier, " + "to set the default precision, use " + "`precision %s %s;'", + precision_names[this->type->qualifier.precision], + type_name); + } + } else if (this->type->specifier->structure == NULL) { + _mesa_glsl_warning(&loc, state, "empty declaration"); + } + } + + foreach_list_typed (ast_declaration, decl, link, &this->declarations) { + const struct glsl_type *var_type; + ir_variable *var; + const char *identifier = decl->identifier; + /* FINISHME: Emit a warning if a variable declaration shadows a + * FINISHME: declaration at a higher scope. + */ + + if ((decl_type == NULL) || decl_type->is_void()) { + if (type_name != NULL) { + _mesa_glsl_error(& loc, state, + "invalid type `%s' in declaration of `%s'", + type_name, decl->identifier); + } else { + _mesa_glsl_error(& loc, state, + "invalid type in declaration of `%s'", + decl->identifier); + } + continue; + } + + if (this->type->qualifier.flags.q.subroutine) { + const glsl_type *t; + const char *name; + + t = state->symbols->get_type(this->type->specifier->type_name); + if (!t) + _mesa_glsl_error(& loc, state, + "invalid type in declaration of `%s'", + decl->identifier); + name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), decl->identifier); + + identifier = name; + + } + var_type = process_array_type(&loc, decl_type, decl->array_specifier, + state); + + var = new(ctx) ir_variable(var_type, identifier, ir_var_auto); + + /* The 'varying in' and 'varying out' qualifiers can only be used with + * ARB_geometry_shader4 and EXT_geometry_shader4, which we don't support + * yet. + */ + if (this->type->qualifier.flags.q.varying) { + if (this->type->qualifier.flags.q.in) { + _mesa_glsl_error(& loc, state, + "`varying in' qualifier in declaration of " + "`%s' only valid for geometry shaders using " + "ARB_geometry_shader4 or EXT_geometry_shader4", + decl->identifier); + } else if (this->type->qualifier.flags.q.out) { + _mesa_glsl_error(& loc, state, + "`varying out' qualifier in declaration of " + "`%s' only valid for geometry shaders using " + "ARB_geometry_shader4 or EXT_geometry_shader4", + decl->identifier); + } + } + + /* From page 22 (page 28 of the PDF) of the GLSL 1.10 specification; + * + * "Global variables can only use the qualifiers const, + * attribute, uniform, or varying. Only one may be + * specified. + * + * Local variables can only use the qualifier const." + * + * This is relaxed in GLSL 1.30 and GLSL ES 3.00. It is also relaxed by + * any extension that adds the 'layout' keyword. + */ + if (!state->is_version(130, 300) + && !state->has_explicit_attrib_location() + && !state->has_separate_shader_objects() + && !state->ARB_fragment_coord_conventions_enable) { + if (this->type->qualifier.flags.q.out) { + _mesa_glsl_error(& loc, state, + "`out' qualifier in declaration of `%s' " + "only valid for function parameters in %s", + decl->identifier, state->get_version_string()); + } + if (this->type->qualifier.flags.q.in) { + _mesa_glsl_error(& loc, state, + "`in' qualifier in declaration of `%s' " + "only valid for function parameters in %s", + decl->identifier, state->get_version_string()); + } + /* FINISHME: Test for other invalid qualifiers. */ + } + + apply_type_qualifier_to_variable(& this->type->qualifier, var, state, + & loc, false); + apply_layout_qualifier_to_variable(&this->type->qualifier, var, state, + &loc); + + if (this->type->qualifier.flags.q.invariant) { + if (!is_varying_var(var, state->stage)) { + _mesa_glsl_error(&loc, state, + "`%s' cannot be marked invariant; interfaces between " + "shader stages only", var->name); + } + } + + if (state->current_function != NULL) { + const char *mode = NULL; + const char *extra = ""; + + /* There is no need to check for 'inout' here because the parser will + * only allow that in function parameter lists. + */ + if (this->type->qualifier.flags.q.attribute) { + mode = "attribute"; + } else if (this->type->qualifier.flags.q.subroutine) { + mode = "subroutine uniform"; + } else if (this->type->qualifier.flags.q.uniform) { + mode = "uniform"; + } else if (this->type->qualifier.flags.q.varying) { + mode = "varying"; + } else if (this->type->qualifier.flags.q.in) { + mode = "in"; + extra = " or in function parameter list"; + } else if (this->type->qualifier.flags.q.out) { + mode = "out"; + extra = " or in function parameter list"; + } + + if (mode) { + _mesa_glsl_error(& loc, state, + "%s variable `%s' must be declared at " + "global scope%s", + mode, var->name, extra); + } + } else if (var->data.mode == ir_var_shader_in) { + var->data.read_only = true; + + if (state->stage == MESA_SHADER_VERTEX) { + bool error_emitted = false; + + /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: + * + * "Vertex shader inputs can only be float, floating-point + * vectors, matrices, signed and unsigned integers and integer + * vectors. Vertex shader inputs can also form arrays of these + * types, but not structures." + * + * From page 31 (page 27 of the PDF) of the GLSL 1.30 spec: + * + * "Vertex shader inputs can only be float, floating-point + * vectors, matrices, signed and unsigned integers and integer + * vectors. They cannot be arrays or structures." + * + * From page 23 (page 29 of the PDF) of the GLSL 1.20 spec: + * + * "The attribute qualifier can be used only with float, + * floating-point vectors, and matrices. Attribute variables + * cannot be declared as arrays or structures." + * + * From page 33 (page 39 of the PDF) of the GLSL ES 3.00 spec: + * + * "Vertex shader inputs can only be float, floating-point + * vectors, matrices, signed and unsigned integers and integer + * vectors. Vertex shader inputs cannot be arrays or + * structures." + */ + const glsl_type *check_type = var->type->without_array(); + + switch (check_type->base_type) { + case GLSL_TYPE_FLOAT: + break; + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + if (state->is_version(120, 300)) + break; + case GLSL_TYPE_DOUBLE: + if (check_type->base_type == GLSL_TYPE_DOUBLE && (state->is_version(410, 0) || state->ARB_vertex_attrib_64bit_enable)) + break; + /* FALLTHROUGH */ + default: + _mesa_glsl_error(& loc, state, + "vertex shader input / attribute cannot have " + "type %s`%s'", + var->type->is_array() ? "array of " : "", + check_type->name); + error_emitted = true; + } + + if (!error_emitted && var->type->is_array() && + !state->check_version(150, 0, &loc, + "vertex shader input / attribute " + "cannot have array type")) { + error_emitted = true; + } + } else if (state->stage == MESA_SHADER_GEOMETRY) { + /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec: + * + * Geometry shader input variables get the per-vertex values + * written out by vertex shader output variables of the same + * names. Since a geometry shader operates on a set of + * vertices, each input varying variable (or input block, see + * interface blocks below) needs to be declared as an array. + */ + if (!var->type->is_array()) { + _mesa_glsl_error(&loc, state, + "geometry shader inputs must be arrays"); + } + + handle_geometry_shader_input_decl(state, loc, var); + } else if (state->stage == MESA_SHADER_FRAGMENT) { + /* From section 4.3.4 (Input Variables) of the GLSL ES 3.10 spec: + * + * It is a compile-time error to declare a fragment shader + * input with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * An array of arrays + * * An array of structures + * * A structure containing an array + * * A structure containing a structure + */ + if (state->es_shader) { + const glsl_type *check_type = var->type->without_array(); + if (check_type->is_boolean() || + check_type->contains_opaque()) { + _mesa_glsl_error(&loc, state, + "fragment shader input cannot have type %s", + check_type->name); + } + if (var->type->is_array() && + var->type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "%s shader output " + "cannot have an array of arrays", + _mesa_shader_stage_to_string(state->stage)); + } + if (var->type->is_array() && + var->type->fields.array->is_record()) { + _mesa_glsl_error(&loc, state, + "fragment shader input " + "cannot have an array of structs"); + } + if (var->type->is_record()) { + for (unsigned i = 0; i < var->type->length; i++) { + if (var->type->fields.structure[i].type->is_array() || + var->type->fields.structure[i].type->is_record()) + _mesa_glsl_error(&loc, state, + "fragement shader input cannot have " + "a struct that contains an " + "array or struct"); + } + } + } + } else if (state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) { + handle_tess_shader_input_decl(state, loc, var); + } + } else if (var->data.mode == ir_var_shader_out) { + const glsl_type *check_type = var->type->without_array(); + + /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: + * + * It is a compile-time error to declare a vertex, tessellation + * evaluation, tessellation control, or geometry shader output + * that contains any of the following: + * + * * A Boolean type (bool, bvec2 ...) + * * An opaque type + */ + if (check_type->is_boolean() || check_type->contains_opaque()) + _mesa_glsl_error(&loc, state, + "%s shader output cannot have type %s", + _mesa_shader_stage_to_string(state->stage), + check_type->name); + + /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: + * + * It is a compile-time error to declare a fragment shader output + * that contains any of the following: + * + * * A Boolean type (bool, bvec2 ...) + * * A double-precision scalar or vector (double, dvec2 ...) + * * An opaque type + * * Any matrix type + * * A structure + */ + if (state->stage == MESA_SHADER_FRAGMENT) { + if (check_type->is_record() || check_type->is_matrix()) + _mesa_glsl_error(&loc, state, + "fragment shader output " + "cannot have struct or matrix type"); + switch (check_type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + break; + default: + _mesa_glsl_error(&loc, state, + "fragment shader output cannot have " + "type %s", check_type->name); + } + } + + /* From section 4.3.6 (Output Variables) of the GLSL ES 3.10 spec: + * + * It is a compile-time error to declare a vertex shader output + * with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * An array of arrays + * * An array of structures + * * A structure containing an array + * * A structure containing a structure + * + * It is a compile-time error to declare a fragment shader output + * with, or that contains, any of the following types: + * + * * A boolean type + * * An opaque type + * * A matrix + * * A structure + * * An array of array + */ + if (state->es_shader) { + if (var->type->is_array() && + var->type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "%s shader output " + "cannot have an array of arrays", + _mesa_shader_stage_to_string(state->stage)); + } + if (state->stage == MESA_SHADER_VERTEX) { + if (var->type->is_array() && + var->type->fields.array->is_record()) { + _mesa_glsl_error(&loc, state, + "vertex shader output " + "cannot have an array of structs"); + } + if (var->type->is_record()) { + for (unsigned i = 0; i < var->type->length; i++) { + if (var->type->fields.structure[i].type->is_array() || + var->type->fields.structure[i].type->is_record()) + _mesa_glsl_error(&loc, state, + "vertex shader output cannot have a " + "struct that contains an " + "array or struct"); + } + } + } + } + + if (state->stage == MESA_SHADER_TESS_CTRL) { + handle_tess_ctrl_shader_output_decl(state, loc, var); + } + } else if (var->type->contains_subroutine()) { + /* declare subroutine uniforms as hidden */ + var->data.how_declared = ir_var_hidden; + } + + /* Integer fragment inputs must be qualified with 'flat'. In GLSL ES, + * so must integer vertex outputs. + * + * From section 4.3.4 ("Inputs") of the GLSL 1.50 spec: + * "Fragment shader inputs that are signed or unsigned integers or + * integer vectors must be qualified with the interpolation qualifier + * flat." + * + * From section 4.3.4 ("Input Variables") of the GLSL 3.00 ES spec: + * "Fragment shader inputs that are, or contain, signed or unsigned + * integers or integer vectors must be qualified with the + * interpolation qualifier flat." + * + * From section 4.3.6 ("Output Variables") of the GLSL 3.00 ES spec: + * "Vertex shader outputs that are, or contain, signed or unsigned + * integers or integer vectors must be qualified with the + * interpolation qualifier flat." + * + * Note that prior to GLSL 1.50, this requirement applied to vertex + * outputs rather than fragment inputs. That creates problems in the + * presence of geometry shaders, so we adopt the GLSL 1.50 rule for all + * desktop GL shaders. For GLSL ES shaders, we follow the spec and + * apply the restriction to both vertex outputs and fragment inputs. + * + * Note also that the desktop GLSL specs are missing the text "or + * contain"; this is presumably an oversight, since there is no + * reasonable way to interpolate a fragment shader input that contains + * an integer. + */ + if (state->is_version(130, 300) && + var->type->contains_integer() && + var->data.interpolation != INTERP_QUALIFIER_FLAT && + ((state->stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_in) + || (state->stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_out + && state->es_shader))) { + const char *var_type = (state->stage == MESA_SHADER_VERTEX) ? + "vertex output" : "fragment input"; + _mesa_glsl_error(&loc, state, "if a %s is (or contains) " + "an integer, then it must be qualified with 'flat'", + var_type); + } + + /* Double fragment inputs must be qualified with 'flat'. */ + if (var->type->contains_double() && + var->data.interpolation != INTERP_QUALIFIER_FLAT && + state->stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "if a fragment input is (or contains) " + "a double, then it must be qualified with 'flat'", + var_type); + } + + /* Interpolation qualifiers cannot be applied to 'centroid' and + * 'centroid varying'. + * + * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec: + * "interpolation qualifiers may only precede the qualifiers in, + * centroid in, out, or centroid out in a declaration. They do not apply + * to the deprecated storage qualifiers varying or centroid varying." + * + * These deprecated storage qualifiers do not exist in GLSL ES 3.00. + */ + if (state->is_version(130, 0) + && this->type->qualifier.has_interpolation() + && this->type->qualifier.flags.q.varying) { + + const char *i = this->type->qualifier.interpolation_string(); + assert(i != NULL); + const char *s; + if (this->type->qualifier.flags.q.centroid) + s = "centroid varying"; + else + s = "varying"; + + _mesa_glsl_error(&loc, state, + "qualifier '%s' cannot be applied to the " + "deprecated storage qualifier '%s'", i, s); + } + + + /* Interpolation qualifiers can only apply to vertex shader outputs and + * fragment shader inputs. + * + * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec: + * "Outputs from a vertex shader (out) and inputs to a fragment + * shader (in) can be further qualified with one or more of these + * interpolation qualifiers" + * + * From page 31 (page 37 of the PDF) of the GLSL ES 3.00 spec: + * "These interpolation qualifiers may only precede the qualifiers + * in, centroid in, out, or centroid out in a declaration. They do + * not apply to inputs into a vertex shader or outputs from a + * fragment shader." + */ + if (state->is_version(130, 300) + && this->type->qualifier.has_interpolation()) { + + const char *i = this->type->qualifier.interpolation_string(); + assert(i != NULL); + + switch (state->stage) { + case MESA_SHADER_VERTEX: + if (this->type->qualifier.flags.q.in) { + _mesa_glsl_error(&loc, state, + "qualifier '%s' cannot be applied to vertex " + "shader inputs", i); + } + break; + case MESA_SHADER_FRAGMENT: + if (this->type->qualifier.flags.q.out) { + _mesa_glsl_error(&loc, state, + "qualifier '%s' cannot be applied to fragment " + "shader outputs", i); + } + break; + default: + break; + } + } + + + /* From section 4.3.4 of the GLSL 4.00 spec: + * "Input variables may not be declared using the patch in qualifier + * in tessellation control or geometry shaders." + * + * From section 4.3.6 of the GLSL 4.00 spec: + * "It is an error to use patch out in a vertex, tessellation + * evaluation, or geometry shader." + * + * This doesn't explicitly forbid using them in a fragment shader, but + * that's probably just an oversight. + */ + if (state->stage != MESA_SHADER_TESS_EVAL + && this->type->qualifier.flags.q.patch + && this->type->qualifier.flags.q.in) { + + _mesa_glsl_error(&loc, state, "'patch in' can only be used in a " + "tessellation evaluation shader"); + } + + if (state->stage != MESA_SHADER_TESS_CTRL + && this->type->qualifier.flags.q.patch + && this->type->qualifier.flags.q.out) { + + _mesa_glsl_error(&loc, state, "'patch out' can only be used in a " + "tessellation control shader"); + } + + /* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30. + */ + if (this->type->qualifier.precision != ast_precision_none) { + state->check_precision_qualifiers_allowed(&loc); + } + + + /* If a precision qualifier is allowed on a type, it is allowed on + * an array of that type. + */ + if (!(this->type->qualifier.precision == ast_precision_none + || precision_qualifier_allowed(var->type->without_array()))) { + + _mesa_glsl_error(&loc, state, + "precision qualifiers apply only to floating point" + ", integer and opaque types"); + } + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "[Opaque types] can only be declared as function + * parameters or uniform-qualified variables." + */ + if (var_type->contains_opaque() && + !this->type->qualifier.flags.q.uniform) { + _mesa_glsl_error(&loc, state, + "opaque variables must be declared uniform"); + } + + /* Process the initializer and add its instructions to a temporary + * list. This list will be added to the instruction stream (below) after + * the declaration is added. This is done because in some cases (such as + * redeclarations) the declaration may not actually be added to the + * instruction stream. + */ + exec_list initializer_instructions; + + /* Examine var name here since var may get deleted in the next call */ + bool var_is_gl_id = is_gl_identifier(var->name); + + ir_variable *earlier = + get_variable_being_redeclared(var, decl->get_location(), state, + false /* allow_all_redeclarations */); + if (earlier != NULL) { + if (var_is_gl_id && + earlier->data.how_declared == ir_var_declared_in_block) { + _mesa_glsl_error(&loc, state, + "`%s' has already been redeclared using " + "gl_PerVertex", earlier->name); + } + earlier->data.how_declared = ir_var_declared_normally; + } + + if (decl->initializer != NULL) { + result = process_initializer((earlier == NULL) ? var : earlier, + decl, this->type, + &initializer_instructions, state); + } else { + validate_array_dimensions(var_type, state, &loc); + } + + /* From page 23 (page 29 of the PDF) of the GLSL 1.10 spec: + * + * "It is an error to write to a const variable outside of + * its declaration, so they must be initialized when + * declared." + */ + if (this->type->qualifier.flags.q.constant && decl->initializer == NULL) { + _mesa_glsl_error(& loc, state, + "const declaration of `%s' must be initialized", + decl->identifier); + } + + if (state->es_shader) { + const glsl_type *const t = (earlier == NULL) + ? var->type : earlier->type; + + if (t->is_unsized_array()) + /* Section 10.17 of the GLSL ES 1.00 specification states that + * unsized array declarations have been removed from the language. + * Arrays that are sized using an initializer are still explicitly + * sized. However, GLSL ES 1.00 does not allow array + * initializers. That is only allowed in GLSL ES 3.00. + * + * Section 4.1.9 (Arrays) of the GLSL ES 3.00 spec says: + * + * "An array type can also be formed without specifying a size + * if the definition includes an initializer: + * + * float x[] = float[2] (1.0, 2.0); // declares an array of size 2 + * float y[] = float[] (1.0, 2.0, 3.0); // declares an array of size 3 + * + * float a[5]; + * float b[] = a;" + */ + _mesa_glsl_error(& loc, state, + "unsized array declarations are not allowed in " + "GLSL ES"); + } + + /* If the declaration is not a redeclaration, there are a few additional + * semantic checks that must be applied. In addition, variable that was + * created for the declaration should be added to the IR stream. + */ + if (earlier == NULL) { + validate_identifier(decl->identifier, loc, state); + + /* Add the variable to the symbol table. Note that the initializer's + * IR was already processed earlier (though it hasn't been emitted + * yet), without the variable in scope. + * + * This differs from most C-like languages, but it follows the GLSL + * specification. From page 28 (page 34 of the PDF) of the GLSL 1.50 + * spec: + * + * "Within a declaration, the scope of a name starts immediately + * after the initializer if present or immediately after the name + * being declared if not." + */ + if (!state->symbols->add_variable(var)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, "name `%s' already taken in the " + "current scope", decl->identifier); + continue; + } + + /* Push the variable declaration to the top. It means that all the + * variable declarations will appear in a funny last-to-first order, + * but otherwise we run into trouble if a function is prototyped, a + * global var is decled, then the function is defined with usage of + * the global var. See glslparsertest's CorrectModule.frag. + */ + instructions->push_head(var); + } + + instructions->append_list(&initializer_instructions); + } + + + /* Generally, variable declarations do not have r-values. However, + * one is used for the declaration in + * + * while (bool b = some_condition()) { + * ... + * } + * + * so we return the rvalue from the last seen declaration here. + */ + return result; +} + + +ir_rvalue * +ast_parameter_declarator::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + const struct glsl_type *type; + const char *name = NULL; + YYLTYPE loc = this->get_location(); + + type = this->type->glsl_type(& name, state); + + if (type == NULL) { + if (name != NULL) { + _mesa_glsl_error(& loc, state, + "invalid type `%s' in declaration of `%s'", + name, this->identifier); + } else { + _mesa_glsl_error(& loc, state, + "invalid type in declaration of `%s'", + this->identifier); + } + + type = glsl_type::error_type; + } + + /* From page 62 (page 68 of the PDF) of the GLSL 1.50 spec: + * + * "Functions that accept no input arguments need not use void in the + * argument list because prototypes (or definitions) are required and + * therefore there is no ambiguity when an empty argument list "( )" is + * declared. The idiom "(void)" as a parameter list is provided for + * convenience." + * + * Placing this check here prevents a void parameter being set up + * for a function, which avoids tripping up checks for main taking + * parameters and lookups of an unnamed symbol. + */ + if (type->is_void()) { + if (this->identifier != NULL) + _mesa_glsl_error(& loc, state, + "named parameter cannot have type `void'"); + + is_void = true; + return NULL; + } + + if (formal_parameter && (this->identifier == NULL)) { + _mesa_glsl_error(& loc, state, "formal parameter lacks a name"); + return NULL; + } + + /* This only handles "vec4 foo[..]". The earlier specifier->glsl_type(...) + * call already handled the "vec4[..] foo" case. + */ + type = process_array_type(&loc, type, this->array_specifier, state); + + if (!type->is_error() && type->is_unsized_array()) { + _mesa_glsl_error(&loc, state, "arrays passed as parameters must have " + "a declared size"); + type = glsl_type::error_type; + } + + is_void = false; + ir_variable *var = new(ctx) + ir_variable(type, this->identifier, ir_var_function_in); + + /* Apply any specified qualifiers to the parameter declaration. Note that + * for function parameters the default mode is 'in'. + */ + apply_type_qualifier_to_variable(& this->type->qualifier, var, state, & loc, + true); + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "Opaque variables cannot be treated as l-values; hence cannot + * be used as out or inout function parameters, nor can they be + * assigned into." + */ + if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out) + && type->contains_opaque()) { + _mesa_glsl_error(&loc, state, "out and inout parameters cannot " + "contain opaque variables"); + type = glsl_type::error_type; + } + + /* From page 39 (page 45 of the PDF) of the GLSL 1.10 spec: + * + * "When calling a function, expressions that do not evaluate to + * l-values cannot be passed to parameters declared as out or inout." + * + * From page 32 (page 38 of the PDF) of the GLSL 1.10 spec: + * + * "Other binary or unary expressions, non-dereferenced arrays, + * function names, swizzles with repeated fields, and constants + * cannot be l-values." + * + * So for GLSL 1.10, passing an array as an out or inout parameter is not + * allowed. This restriction is removed in GLSL 1.20, and in GLSL ES. + */ + if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out) + && type->is_array() + && !state->check_version(120, 100, &loc, + "arrays cannot be out or inout parameters")) { + type = glsl_type::error_type; + } + + instructions->push_tail(var); + + /* Parameter declarations do not have r-values. + */ + return NULL; +} + + +void +ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters, + bool formal, + exec_list *ir_parameters, + _mesa_glsl_parse_state *state) +{ + ast_parameter_declarator *void_param = NULL; + unsigned count = 0; + + foreach_list_typed (ast_parameter_declarator, param, link, ast_parameters) { + param->formal_parameter = formal; + param->hir(ir_parameters, state); + + if (param->is_void) + void_param = param; + + count++; + } + + if ((void_param != NULL) && (count > 1)) { + YYLTYPE loc = void_param->get_location(); + + _mesa_glsl_error(& loc, state, + "`void' parameter must be only parameter"); + } +} + + +void +emit_function(_mesa_glsl_parse_state *state, ir_function *f) +{ + /* IR invariants disallow function declarations or definitions + * nested within other function definitions. But there is no + * requirement about the relative order of function declarations + * and definitions with respect to one another. So simply insert + * the new ir_function block at the end of the toplevel instruction + * list. + */ + state->toplevel_ir->push_tail(f); +} + + +ir_rvalue * +ast_function::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + ir_function *f = NULL; + ir_function_signature *sig = NULL; + exec_list hir_parameters; + YYLTYPE loc = this->get_location(); + + const char *const name = identifier; + + /* New functions are always added to the top-level IR instruction stream, + * so this instruction list pointer is ignored. See also emit_function + * (called below). + */ + (void) instructions; + + /* From page 21 (page 27 of the PDF) of the GLSL 1.20 spec, + * + * "Function declarations (prototypes) cannot occur inside of functions; + * they must be at global scope, or for the built-in functions, outside + * the global scope." + * + * From page 27 (page 33 of the PDF) of the GLSL ES 1.00.16 spec, + * + * "User defined functions may only be defined within the global scope." + * + * Note that this language does not appear in GLSL 1.10. + */ + if ((state->current_function != NULL) && + state->is_version(120, 100)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "declaration of function `%s' not allowed within " + "function body", name); + } + + validate_identifier(name, this->get_location(), state); + + /* Convert the list of function parameters to HIR now so that they can be + * used below to compare this function's signature with previously seen + * signatures for functions with the same name. + */ + ast_parameter_declarator::parameters_to_hir(& this->parameters, + is_definition, + & hir_parameters, state); + + const char *return_type_name; + const glsl_type *return_type = + this->return_type->glsl_type(& return_type_name, state); + + if (!return_type) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "function `%s' has undeclared return type `%s'", + name, return_type_name); + return_type = glsl_type::error_type; + } + + /* ARB_shader_subroutine states: + * "Subroutine declarations cannot be prototyped. It is an error to prepend + * subroutine(...) to a function declaration." + */ + if (this->return_type->qualifier.flags.q.subroutine_def && !is_definition) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "function declaration `%s' cannot have subroutine prepended", + name); + } + + /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec: + * "No qualifier is allowed on the return type of a function." + */ + if (this->return_type->has_qualifiers(state)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "function `%s' return type has qualifiers", name); + } + + /* Section 6.1 (Function Definitions) of the GLSL 1.20 spec says: + * + * "Arrays are allowed as arguments and as the return type. In both + * cases, the array must be explicitly sized." + */ + if (return_type->is_unsized_array()) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "function `%s' return type array must be explicitly " + "sized", name); + } + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "[Opaque types] can only be declared as function parameters + * or uniform-qualified variables." + */ + if (return_type->contains_opaque()) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, + "function `%s' return type can't contain an opaque type", + name); + } + + /* Create an ir_function if one doesn't already exist. */ + f = state->symbols->get_function(name); + if (f == NULL) { + f = new(ctx) ir_function(name); + if (!this->return_type->qualifier.flags.q.subroutine) { + if (!state->symbols->add_function(f)) { + /* This function name shadows a non-function use of the same name. */ + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, "function name `%s' conflicts with " + "non-function", name); + return NULL; + } + } + emit_function(state, f); + } + + /* From GLSL ES 3.0 spec, chapter 6.1 "Function Definitions", page 71: + * + * "A shader cannot redefine or overload built-in functions." + * + * While in GLSL ES 1.0 specification, chapter 8 "Built-in Functions": + * + * "User code can overload the built-in functions but cannot redefine + * them." + */ + if (state->es_shader && state->language_version >= 300) { + /* Local shader has no exact candidates; check the built-ins. */ + _mesa_glsl_initialize_builtin_functions(); + if (_mesa_glsl_find_builtin_function_by_name(name)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "A shader cannot redefine or overload built-in " + "function `%s' in GLSL ES 3.00", name); + return NULL; + } + } + + /* Verify that this function's signature either doesn't match a previously + * seen signature for a function with the same name, or, if a match is found, + * that the previously seen signature does not have an associated definition. + */ + if (state->es_shader || f->has_user_signature()) { + sig = f->exact_matching_signature(state, &hir_parameters); + if (sig != NULL) { + const char *badvar = sig->qualifiers_match(&hir_parameters); + if (badvar != NULL) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(&loc, state, "function `%s' parameter `%s' " + "qualifiers don't match prototype", name, badvar); + } + + if (sig->return_type != return_type) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(&loc, state, "function `%s' return type doesn't " + "match prototype", name); + } + + if (sig->is_defined) { + if (is_definition) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, "function `%s' redefined", name); + } else { + /* We just encountered a prototype that exactly matches a + * function that's already been defined. This is redundant, + * and we should ignore it. + */ + return NULL; + } + } + } + } + + /* Verify the return type of main() */ + if (strcmp(name, "main") == 0) { + if (! return_type->is_void()) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "main() must return void"); + } + + if (!hir_parameters.is_empty()) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "main() must not take any parameters"); + } + } + + /* Finish storing the information about this new function in its signature. + */ + if (sig == NULL) { + sig = new(ctx) ir_function_signature(return_type); + f->add_signature(sig); + } + + sig->replace_parameters(&hir_parameters); + signature = sig; + + if (this->return_type->qualifier.flags.q.subroutine_def) { + int idx; + + if (this->return_type->qualifier.flags.q.explicit_index) { + unsigned qual_index; + if (process_qualifier_constant(state, &loc, "index", + this->return_type->qualifier.index, + &qual_index)) { + if (!state->has_explicit_uniform_location()) { + _mesa_glsl_error(&loc, state, "subroutine index requires " + "GL_ARB_explicit_uniform_location or " + "GLSL 4.30"); + } else if (qual_index >= MAX_SUBROUTINES) { + _mesa_glsl_error(&loc, state, + "invalid subroutine index (%d) index must " + "be a number between 0 and " + "GL_MAX_SUBROUTINES - 1 (%d)", qual_index, + MAX_SUBROUTINES - 1); + } else { + f->subroutine_index = qual_index; + } + } + } + + f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length(); + f->subroutine_types = ralloc_array(state, const struct glsl_type *, + f->num_subroutine_types); + idx = 0; + foreach_list_typed(ast_declaration, decl, link, &this->return_type->qualifier.subroutine_list->declarations) { + const struct glsl_type *type; + /* the subroutine type must be already declared */ + type = state->symbols->get_type(decl->identifier); + if (!type) { + _mesa_glsl_error(& loc, state, "unknown type '%s' in subroutine function definition", decl->identifier); + } + f->subroutine_types[idx++] = type; + } + state->subroutines = (ir_function **)reralloc(state, state->subroutines, + ir_function *, + state->num_subroutines + 1); + state->subroutines[state->num_subroutines] = f; + state->num_subroutines++; + + } + + if (this->return_type->qualifier.flags.q.subroutine) { + if (!state->symbols->add_type(this->identifier, glsl_type::get_subroutine_instance(this->identifier))) { + _mesa_glsl_error(& loc, state, "type '%s' previously defined", this->identifier); + return NULL; + } + state->subroutine_types = (ir_function **)reralloc(state, state->subroutine_types, + ir_function *, + state->num_subroutine_types + 1); + state->subroutine_types[state->num_subroutine_types] = f; + state->num_subroutine_types++; + + f->is_subroutine = true; + } + + /* Function declarations (prototypes) do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_function_definition::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + prototype->is_definition = true; + prototype->hir(instructions, state); + + ir_function_signature *signature = prototype->signature; + if (signature == NULL) + return NULL; + + assert(state->current_function == NULL); + state->current_function = signature; + state->found_return = false; + + /* Duplicate parameters declared in the prototype as concrete variables. + * Add these to the symbol table. + */ + state->symbols->push_scope(); + foreach_in_list(ir_variable, var, &signature->parameters) { + assert(var->as_variable() != NULL); + + /* The only way a parameter would "exist" is if two parameters have + * the same name. + */ + if (state->symbols->name_declared_this_scope(var->name)) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "parameter `%s' redeclared", var->name); + } else { + state->symbols->add_variable(var); + } + } + + /* Convert the body of the function to HIR. */ + this->body->hir(&signature->body, state); + signature->is_defined = true; + + state->symbols->pop_scope(); + + assert(state->current_function == signature); + state->current_function = NULL; + + if (!signature->return_type->is_void() && !state->found_return) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, "function `%s' has non-void return type " + "%s, but no return statement", + signature->function_name(), + signature->return_type->name); + } + + /* Function definitions do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_jump_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + switch (mode) { + case ast_return: { + ir_return *inst; + assert(state->current_function); + + if (opt_return_value) { + ir_rvalue *ret = opt_return_value->hir(instructions, state); + + /* The value of the return type can be NULL if the shader says + * 'return foo();' and foo() is a function that returns void. + * + * NOTE: The GLSL spec doesn't say that this is an error. The type + * of the return value is void. If the return type of the function is + * also void, then this should compile without error. Seriously. + */ + const glsl_type *const ret_type = + (ret == NULL) ? glsl_type::void_type : ret->type; + + /* Implicit conversions are not allowed for return values prior to + * ARB_shading_language_420pack. + */ + if (state->current_function->return_type != ret_type) { + YYLTYPE loc = this->get_location(); + + if (state->has_420pack()) { + if (!apply_implicit_conversion(state->current_function->return_type, + ret, state)) { + _mesa_glsl_error(& loc, state, + "could not implicitly convert return value " + "to %s, in function `%s'", + state->current_function->return_type->name, + state->current_function->function_name()); + } + } else { + _mesa_glsl_error(& loc, state, + "`return' with wrong type %s, in function `%s' " + "returning %s", + ret_type->name, + state->current_function->function_name(), + state->current_function->return_type->name); + } + } else if (state->current_function->return_type->base_type == + GLSL_TYPE_VOID) { + YYLTYPE loc = this->get_location(); + + /* The ARB_shading_language_420pack, GLSL ES 3.0, and GLSL 4.20 + * specs add a clarification: + * + * "A void function can only use return without a return argument, even if + * the return argument has void type. Return statements only accept values: + * + * void func1() { } + * void func2() { return func1(); } // illegal return statement" + */ + _mesa_glsl_error(& loc, state, + "void functions can only use `return' without a " + "return argument"); + } + + inst = new(ctx) ir_return(ret); + } else { + if (state->current_function->return_type->base_type != + GLSL_TYPE_VOID) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, + "`return' with no value, in function %s returning " + "non-void", + state->current_function->function_name()); + } + inst = new(ctx) ir_return; + } + + state->found_return = true; + instructions->push_tail(inst); + break; + } + + case ast_discard: + if (state->stage != MESA_SHADER_FRAGMENT) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, + "`discard' may only appear in a fragment shader"); + } + instructions->push_tail(new(ctx) ir_discard); + break; + + case ast_break: + case ast_continue: + if (mode == ast_continue && + state->loop_nesting_ast == NULL) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, "continue may only appear in a loop"); + } else if (mode == ast_break && + state->loop_nesting_ast == NULL && + state->switch_state.switch_nesting_ast == NULL) { + YYLTYPE loc = this->get_location(); + + _mesa_glsl_error(& loc, state, + "break may only appear in a loop or a switch"); + } else { + /* For a loop, inline the for loop expression again, since we don't + * know where near the end of the loop body the normal copy of it is + * going to be placed. Same goes for the condition for a do-while + * loop. + */ + if (state->loop_nesting_ast != NULL && + mode == ast_continue && !state->switch_state.is_switch_innermost) { + if (state->loop_nesting_ast->rest_expression) { + state->loop_nesting_ast->rest_expression->hir(instructions, + state); + } + if (state->loop_nesting_ast->mode == + ast_iteration_statement::ast_do_while) { + state->loop_nesting_ast->condition_to_hir(instructions, state); + } + } + + if (state->switch_state.is_switch_innermost && + mode == ast_continue) { + /* Set 'continue_inside' to true. */ + ir_rvalue *const true_val = new (ctx) ir_constant(true); + ir_dereference_variable *deref_continue_inside_var = + new(ctx) ir_dereference_variable(state->switch_state.continue_inside); + instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var, + true_val)); + + /* Break out from the switch, continue for the loop will + * be called right after switch. */ + ir_loop_jump *const jump = + new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + instructions->push_tail(jump); + + } else if (state->switch_state.is_switch_innermost && + mode == ast_break) { + /* Force break out of switch by inserting a break. */ + ir_loop_jump *const jump = + new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + instructions->push_tail(jump); + } else { + ir_loop_jump *const jump = + new(ctx) ir_loop_jump((mode == ast_break) + ? ir_loop_jump::jump_break + : ir_loop_jump::jump_continue); + instructions->push_tail(jump); + } + } + + break; + } + + /* Jump instructions do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_selection_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + ir_rvalue *const condition = this->condition->hir(instructions, state); + + /* From page 66 (page 72 of the PDF) of the GLSL 1.50 spec: + * + * "Any expression whose type evaluates to a Boolean can be used as the + * conditional expression bool-expression. Vector types are not accepted + * as the expression to if." + * + * The checks are separated so that higher quality diagnostics can be + * generated for cases where both rules are violated. + */ + if (!condition->type->is_boolean() || !condition->type->is_scalar()) { + YYLTYPE loc = this->condition->get_location(); + + _mesa_glsl_error(& loc, state, "if-statement condition must be scalar " + "boolean"); + } + + ir_if *const stmt = new(ctx) ir_if(condition); + + if (then_statement != NULL) { + state->symbols->push_scope(); + then_statement->hir(& stmt->then_instructions, state); + state->symbols->pop_scope(); + } + + if (else_statement != NULL) { + state->symbols->push_scope(); + else_statement->hir(& stmt->else_instructions, state); + state->symbols->pop_scope(); + } + + instructions->push_tail(stmt); + + /* if-statements do not have r-values. + */ + return NULL; +} + + +ir_rvalue * +ast_switch_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + ir_rvalue *const test_expression = + this->test_expression->hir(instructions, state); + + /* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec: + * + * "The type of init-expression in a switch statement must be a + * scalar integer." + */ + if (!test_expression->type->is_scalar() || + !test_expression->type->is_integer()) { + YYLTYPE loc = this->test_expression->get_location(); + + _mesa_glsl_error(& loc, + state, + "switch-statement expression must be scalar " + "integer"); + } + + /* Track the switch-statement nesting in a stack-like manner. + */ + struct glsl_switch_state saved = state->switch_state; + + state->switch_state.is_switch_innermost = true; + state->switch_state.switch_nesting_ast = this; + state->switch_state.labels_ht = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + state->switch_state.previous_default = NULL; + + /* Initalize is_fallthru state to false. + */ + ir_rvalue *const is_fallthru_val = new (ctx) ir_constant(false); + state->switch_state.is_fallthru_var = + new(ctx) ir_variable(glsl_type::bool_type, + "switch_is_fallthru_tmp", + ir_var_temporary); + instructions->push_tail(state->switch_state.is_fallthru_var); + + ir_dereference_variable *deref_is_fallthru_var = + new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var); + instructions->push_tail(new(ctx) ir_assignment(deref_is_fallthru_var, + is_fallthru_val)); + + /* Initialize continue_inside state to false. + */ + state->switch_state.continue_inside = + new(ctx) ir_variable(glsl_type::bool_type, + "continue_inside_tmp", + ir_var_temporary); + instructions->push_tail(state->switch_state.continue_inside); + + ir_rvalue *const false_val = new (ctx) ir_constant(false); + ir_dereference_variable *deref_continue_inside_var = + new(ctx) ir_dereference_variable(state->switch_state.continue_inside); + instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var, + false_val)); + + state->switch_state.run_default = + new(ctx) ir_variable(glsl_type::bool_type, + "run_default_tmp", + ir_var_temporary); + instructions->push_tail(state->switch_state.run_default); + + /* Loop around the switch is used for flow control. */ + ir_loop * loop = new(ctx) ir_loop(); + instructions->push_tail(loop); + + /* Cache test expression. + */ + test_to_hir(&loop->body_instructions, state); + + /* Emit code for body of switch stmt. + */ + body->hir(&loop->body_instructions, state); + + /* Insert a break at the end to exit loop. */ + ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + loop->body_instructions.push_tail(jump); + + /* If we are inside loop, check if continue got called inside switch. */ + if (state->loop_nesting_ast != NULL) { + ir_dereference_variable *deref_continue_inside = + new(ctx) ir_dereference_variable(state->switch_state.continue_inside); + ir_if *irif = new(ctx) ir_if(deref_continue_inside); + ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_continue); + + if (state->loop_nesting_ast != NULL) { + if (state->loop_nesting_ast->rest_expression) { + state->loop_nesting_ast->rest_expression->hir(&irif->then_instructions, + state); + } + if (state->loop_nesting_ast->mode == + ast_iteration_statement::ast_do_while) { + state->loop_nesting_ast->condition_to_hir(&irif->then_instructions, state); + } + } + irif->then_instructions.push_tail(jump); + instructions->push_tail(irif); + } + + hash_table_dtor(state->switch_state.labels_ht); + + state->switch_state = saved; + + /* Switch statements do not have r-values. */ + return NULL; +} + + +void +ast_switch_statement::test_to_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + /* Cache value of test expression. */ + ir_rvalue *const test_val = + test_expression->hir(instructions, + state); + + state->switch_state.test_var = new(ctx) ir_variable(test_val->type, + "switch_test_tmp", + ir_var_temporary); + ir_dereference_variable *deref_test_var = + new(ctx) ir_dereference_variable(state->switch_state.test_var); + + instructions->push_tail(state->switch_state.test_var); + instructions->push_tail(new(ctx) ir_assignment(deref_test_var, test_val)); +} + + +ir_rvalue * +ast_switch_body::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + if (stmts != NULL) + stmts->hir(instructions, state); + + /* Switch bodies do not have r-values. */ + return NULL; +} + +ir_rvalue * +ast_case_statement_list::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + exec_list default_case, after_default, tmp; + + foreach_list_typed (ast_case_statement, case_stmt, link, & this->cases) { + case_stmt->hir(&tmp, state); + + /* Default case. */ + if (state->switch_state.previous_default && default_case.is_empty()) { + default_case.append_list(&tmp); + continue; + } + + /* If default case found, append 'after_default' list. */ + if (!default_case.is_empty()) + after_default.append_list(&tmp); + else + instructions->append_list(&tmp); + } + + /* Handle the default case. This is done here because default might not be + * the last case. We need to add checks against following cases first to see + * if default should be chosen or not. + */ + if (!default_case.is_empty()) { + + ir_rvalue *const true_val = new (state) ir_constant(true); + ir_dereference_variable *deref_run_default_var = + new(state) ir_dereference_variable(state->switch_state.run_default); + + /* Choose to run default case initially, following conditional + * assignments might change this. + */ + ir_assignment *const init_var = + new(state) ir_assignment(deref_run_default_var, true_val); + instructions->push_tail(init_var); + + /* Default case was the last one, no checks required. */ + if (after_default.is_empty()) { + instructions->append_list(&default_case); + return NULL; + } + + foreach_in_list(ir_instruction, ir, &after_default) { + ir_assignment *assign = ir->as_assignment(); + + if (!assign) + continue; + + /* Clone the check between case label and init expression. */ + ir_expression *exp = (ir_expression*) assign->condition; + ir_expression *clone = exp->clone(state, NULL); + + ir_dereference_variable *deref_var = + new(state) ir_dereference_variable(state->switch_state.run_default); + ir_rvalue *const false_val = new (state) ir_constant(false); + + ir_assignment *const set_false = + new(state) ir_assignment(deref_var, false_val, clone); + + instructions->push_tail(set_false); + } + + /* Append default case and all cases after it. */ + instructions->append_list(&default_case); + instructions->append_list(&after_default); + } + + /* Case statements do not have r-values. */ + return NULL; +} + +ir_rvalue * +ast_case_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + labels->hir(instructions, state); + + /* Guard case statements depending on fallthru state. */ + ir_dereference_variable *const deref_fallthru_guard = + new(state) ir_dereference_variable(state->switch_state.is_fallthru_var); + ir_if *const test_fallthru = new(state) ir_if(deref_fallthru_guard); + + foreach_list_typed (ast_node, stmt, link, & this->stmts) + stmt->hir(& test_fallthru->then_instructions, state); + + instructions->push_tail(test_fallthru); + + /* Case statements do not have r-values. */ + return NULL; +} + + +ir_rvalue * +ast_case_label_list::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + foreach_list_typed (ast_case_label, label, link, & this->labels) + label->hir(instructions, state); + + /* Case labels do not have r-values. */ + return NULL; +} + +ir_rvalue * +ast_case_label::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + ir_dereference_variable *deref_fallthru_var = + new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var); + + ir_rvalue *const true_val = new(ctx) ir_constant(true); + + /* If not default case, ... */ + if (this->test_value != NULL) { + /* Conditionally set fallthru state based on + * comparison of cached test expression value to case label. + */ + ir_rvalue *const label_rval = this->test_value->hir(instructions, state); + ir_constant *label_const = label_rval->constant_expression_value(); + + if (!label_const) { + YYLTYPE loc = this->test_value->get_location(); + + _mesa_glsl_error(& loc, state, + "switch statement case label must be a " + "constant expression"); + + /* Stuff a dummy value in to allow processing to continue. */ + label_const = new(ctx) ir_constant(0); + } else { + ast_expression *previous_label = (ast_expression *) + hash_table_find(state->switch_state.labels_ht, + (void *)(uintptr_t)label_const->value.u[0]); + + if (previous_label) { + YYLTYPE loc = this->test_value->get_location(); + _mesa_glsl_error(& loc, state, "duplicate case value"); + + loc = previous_label->get_location(); + _mesa_glsl_error(& loc, state, "this is the previous case label"); + } else { + hash_table_insert(state->switch_state.labels_ht, + this->test_value, + (void *)(uintptr_t)label_const->value.u[0]); + } + } + + ir_dereference_variable *deref_test_var = + new(ctx) ir_dereference_variable(state->switch_state.test_var); + + ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, + label_const, + deref_test_var); + + /* + * From GLSL 4.40 specification section 6.2 ("Selection"): + * + * "The type of the init-expression value in a switch statement must + * be a scalar int or uint. The type of the constant-expression value + * in a case label also must be a scalar int or uint. When any pair + * of these values is tested for "equal value" and the types do not + * match, an implicit conversion will be done to convert the int to a + * uint (see section 4.1.10 “Implicit Conversions”) before the compare + * is done." + */ + if (label_const->type != state->switch_state.test_var->type) { + YYLTYPE loc = this->test_value->get_location(); + + const glsl_type *type_a = label_const->type; + const glsl_type *type_b = state->switch_state.test_var->type; + + /* Check if int->uint implicit conversion is supported. */ + bool integer_conversion_supported = + glsl_type::int_type->can_implicitly_convert_to(glsl_type::uint_type, + state); + + if ((!type_a->is_integer() || !type_b->is_integer()) || + !integer_conversion_supported) { + _mesa_glsl_error(&loc, state, "type mismatch with switch " + "init-expression and case label (%s != %s)", + type_a->name, type_b->name); + } else { + /* Conversion of the case label. */ + if (type_a->base_type == GLSL_TYPE_INT) { + if (!apply_implicit_conversion(glsl_type::uint_type, + test_cond->operands[0], state)) + _mesa_glsl_error(&loc, state, "implicit type conversion error"); + } else { + /* Conversion of the init-expression value. */ + if (!apply_implicit_conversion(glsl_type::uint_type, + test_cond->operands[1], state)) + _mesa_glsl_error(&loc, state, "implicit type conversion error"); + } + } + } + + ir_assignment *set_fallthru_on_test = + new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); + + instructions->push_tail(set_fallthru_on_test); + } else { /* default case */ + if (state->switch_state.previous_default) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(& loc, state, + "multiple default labels in one switch"); + + loc = state->switch_state.previous_default->get_location(); + _mesa_glsl_error(& loc, state, "this is the first default label"); + } + state->switch_state.previous_default = this; + + /* Set fallthru condition on 'run_default' bool. */ + ir_dereference_variable *deref_run_default = + new(ctx) ir_dereference_variable(state->switch_state.run_default); + ir_rvalue *const cond_true = new(ctx) ir_constant(true); + ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, + cond_true, + deref_run_default); + + /* Set falltrhu state. */ + ir_assignment *set_fallthru = + new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); + + instructions->push_tail(set_fallthru); + } + + /* Case statements do not have r-values. */ + return NULL; +} + +void +ast_iteration_statement::condition_to_hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + if (condition != NULL) { + ir_rvalue *const cond = + condition->hir(instructions, state); + + if ((cond == NULL) + || !cond->type->is_boolean() || !cond->type->is_scalar()) { + YYLTYPE loc = condition->get_location(); + + _mesa_glsl_error(& loc, state, + "loop condition must be scalar boolean"); + } else { + /* As the first code in the loop body, generate a block that looks + * like 'if (!condition) break;' as the loop termination condition. + */ + ir_rvalue *const not_cond = + new(ctx) ir_expression(ir_unop_logic_not, cond); + + ir_if *const if_stmt = new(ctx) ir_if(not_cond); + + ir_jump *const break_stmt = + new(ctx) ir_loop_jump(ir_loop_jump::jump_break); + + if_stmt->then_instructions.push_tail(break_stmt); + instructions->push_tail(if_stmt); + } + } +} + + +ir_rvalue * +ast_iteration_statement::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + + /* For-loops and while-loops start a new scope, but do-while loops do not. + */ + if (mode != ast_do_while) + state->symbols->push_scope(); + + if (init_statement != NULL) + init_statement->hir(instructions, state); + + ir_loop *const stmt = new(ctx) ir_loop(); + instructions->push_tail(stmt); + + /* Track the current loop nesting. */ + ast_iteration_statement *nesting_ast = state->loop_nesting_ast; + + state->loop_nesting_ast = this; + + /* Likewise, indicate that following code is closest to a loop, + * NOT closest to a switch. + */ + bool saved_is_switch_innermost = state->switch_state.is_switch_innermost; + state->switch_state.is_switch_innermost = false; + + if (mode != ast_do_while) + condition_to_hir(&stmt->body_instructions, state); + + if (body != NULL) + body->hir(& stmt->body_instructions, state); + + if (rest_expression != NULL) + rest_expression->hir(& stmt->body_instructions, state); + + if (mode == ast_do_while) + condition_to_hir(&stmt->body_instructions, state); + + if (mode != ast_do_while) + state->symbols->pop_scope(); + + /* Restore previous nesting before returning. */ + state->loop_nesting_ast = nesting_ast; + state->switch_state.is_switch_innermost = saved_is_switch_innermost; + + /* Loops do not have r-values. + */ + return NULL; +} + + +/** + * Determine if the given type is valid for establishing a default precision + * qualifier. + * + * From GLSL ES 3.00 section 4.5.4 ("Default Precision Qualifiers"): + * + * "The precision statement + * + * precision precision-qualifier type; + * + * can be used to establish a default precision qualifier. The type field + * can be either int or float or any of the sampler types, and the + * precision-qualifier can be lowp, mediump, or highp." + * + * GLSL ES 1.00 has similar language. GLSL 1.30 doesn't allow precision + * qualifiers on sampler types, but this seems like an oversight (since the + * intention of including these in GLSL 1.30 is to allow compatibility with ES + * shaders). So we allow int, float, and all sampler types regardless of GLSL + * version. + */ +static bool +is_valid_default_precision_type(const struct glsl_type *const type) +{ + if (type == NULL) + return false; + + switch (type->base_type) { + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + /* "int" and "float" are valid, but vectors and matrices are not. */ + return type->vector_elements == 1 && type->matrix_columns == 1; + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + return true; + default: + return false; + } +} + + +ir_rvalue * +ast_type_specifier::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + if (this->default_precision == ast_precision_none && this->structure == NULL) + return NULL; + + YYLTYPE loc = this->get_location(); + + /* If this is a precision statement, check that the type to which it is + * applied is either float or int. + * + * From section 4.5.3 of the GLSL 1.30 spec: + * "The precision statement + * precision precision-qualifier type; + * can be used to establish a default precision qualifier. The type + * field can be either int or float [...]. Any other types or + * qualifiers will result in an error. + */ + if (this->default_precision != ast_precision_none) { + if (!state->check_precision_qualifiers_allowed(&loc)) + return NULL; + + if (this->structure != NULL) { + _mesa_glsl_error(&loc, state, + "precision qualifiers do not apply to structures"); + return NULL; + } + + if (this->array_specifier != NULL) { + _mesa_glsl_error(&loc, state, + "default precision statements do not apply to " + "arrays"); + return NULL; + } + + const struct glsl_type *const type = + state->symbols->get_type(this->type_name); + if (!is_valid_default_precision_type(type)) { + _mesa_glsl_error(&loc, state, + "default precision statements apply only to " + "float, int, and opaque types"); + return NULL; + } + + if (state->es_shader) { + /* Section 4.5.3 (Default Precision Qualifiers) of the GLSL ES 1.00 + * spec says: + * + * "Non-precision qualified declarations will use the precision + * qualifier specified in the most recent precision statement + * that is still in scope. The precision statement has the same + * scoping rules as variable declarations. If it is declared + * inside a compound statement, its effect stops at the end of + * the innermost statement it was declared in. Precision + * statements in nested scopes override precision statements in + * outer scopes. Multiple precision statements for the same basic + * type can appear inside the same scope, with later statements + * overriding earlier statements within that scope." + * + * Default precision specifications follow the same scope rules as + * variables. So, we can track the state of the default precision + * qualifiers in the symbol table, and the rules will just work. This + * is a slight abuse of the symbol table, but it has the semantics + * that we want. + */ + state->symbols->add_default_precision_qualifier(this->type_name, + this->default_precision); + } + + /* FINISHME: Translate precision statements into IR. */ + return NULL; + } + + /* _mesa_ast_set_aggregate_type() sets the field so that + * process_record_constructor() can do type-checking on C-style initializer + * expressions of structs, but ast_struct_specifier should only be translated + * to HIR if it is declaring the type of a structure. + * + * The ->is_declaration field is false for initializers of variables + * declared separately from the struct's type definition. + * + * struct S { ... }; (is_declaration = true) + * struct T { ... } t = { ... }; (is_declaration = true) + * S s = { ... }; (is_declaration = false) + */ + if (this->structure != NULL && this->structure->is_declaration) + return this->structure->hir(instructions, state); + + return NULL; +} + + +/** + * Process a structure or interface block tree into an array of structure fields + * + * After parsing, where there are some syntax differnces, structures and + * interface blocks are almost identical. They are similar enough that the + * AST for each can be processed the same way into a set of + * \c glsl_struct_field to describe the members. + * + * If we're processing an interface block, var_mode should be the type of the + * interface block (ir_var_shader_in, ir_var_shader_out, ir_var_uniform or + * ir_var_shader_storage). If we're processing a structure, var_mode should be + * ir_var_auto. + * + * \return + * The number of fields processed. A pointer to the array structure fields is + * stored in \c *fields_ret. + */ +static unsigned +ast_process_struct_or_iface_block_members(exec_list *instructions, + struct _mesa_glsl_parse_state *state, + exec_list *declarations, + glsl_struct_field **fields_ret, + bool is_interface, + enum glsl_matrix_layout matrix_layout, + bool allow_reserved_names, + ir_variable_mode var_mode, + ast_type_qualifier *layout, + unsigned block_stream, + unsigned expl_location) +{ + unsigned decl_count = 0; + + /* Make an initial pass over the list of fields to determine how + * many there are. Each element in this list is an ast_declarator_list. + * This means that we actually need to count the number of elements in the + * 'declarations' list in each of the elements. + */ + foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { + decl_count += decl_list->declarations.length(); + } + + /* Allocate storage for the fields and process the field + * declarations. As the declarations are processed, try to also convert + * the types to HIR. This ensures that structure definitions embedded in + * other structure definitions or in interface blocks are processed. + */ + glsl_struct_field *const fields = ralloc_array(state, glsl_struct_field, + decl_count); + + bool first_member = true; + bool first_member_has_explicit_location; + + unsigned i = 0; + foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { + const char *type_name; + YYLTYPE loc = decl_list->get_location(); + + decl_list->type->specifier->hir(instructions, state); + + /* Section 10.9 of the GLSL ES 1.00 specification states that + * embedded structure definitions have been removed from the language. + */ + if (state->es_shader && decl_list->type->specifier->structure != NULL) { + _mesa_glsl_error(&loc, state, "embedded structure definitions are " + "not allowed in GLSL ES 1.00"); + } + + const glsl_type *decl_type = + decl_list->type->glsl_type(& type_name, state); + + const struct ast_type_qualifier *const qual = + &decl_list->type->qualifier; + + /* From section 4.3.9 of the GLSL 4.40 spec: + * + * "[In interface blocks] opaque types are not allowed." + * + * It should be impossible for decl_type to be NULL here. Cases that + * might naturally lead to decl_type being NULL, especially for the + * is_interface case, will have resulted in compilation having + * already halted due to a syntax error. + */ + assert(decl_type); + + if (is_interface && decl_type->contains_opaque()) { + _mesa_glsl_error(&loc, state, + "uniform/buffer in non-default interface block contains " + "opaque variable"); + } + + if (decl_type->contains_atomic()) { + /* From section 4.1.7.3 of the GLSL 4.40 spec: + * + * "Members of structures cannot be declared as atomic counter + * types." + */ + _mesa_glsl_error(&loc, state, "atomic counter in structure, " + "shader storage block or uniform block"); + } + + if (decl_type->contains_image()) { + /* FINISHME: Same problem as with atomic counters. + * FINISHME: Request clarification from Khronos and add + * FINISHME: spec quotation here. + */ + _mesa_glsl_error(&loc, state, + "image in structure, shader storage block or " + "uniform block"); + } + + if (qual->flags.q.explicit_binding) { + _mesa_glsl_error(&loc, state, + "binding layout qualifier cannot be applied " + "to struct or interface block members"); + } + + if (is_interface) { + if (!first_member) { + if (!layout->flags.q.explicit_location && + ((first_member_has_explicit_location && + !qual->flags.q.explicit_location) || + (!first_member_has_explicit_location && + qual->flags.q.explicit_location))) { + _mesa_glsl_error(&loc, state, + "when block-level location layout qualifier " + "is not supplied either all members must " + "have a location layout qualifier or all " + "members must not have a location layout " + "qualifier"); + } + } else { + first_member = false; + first_member_has_explicit_location = + qual->flags.q.explicit_location; + } + } + + if (qual->flags.q.std140 || + qual->flags.q.std430 || + qual->flags.q.packed || + qual->flags.q.shared) { + _mesa_glsl_error(&loc, state, + "uniform/shader storage block layout qualifiers " + "std140, std430, packed, and shared can only be " + "applied to uniform/shader storage blocks, not " + "members"); + } + + if (qual->flags.q.constant) { + _mesa_glsl_error(&loc, state, + "const storage qualifier cannot be applied " + "to struct or interface block members"); + } + + /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec: + * + * "A block member may be declared with a stream identifier, but + * the specified stream must match the stream associated with the + * containing block." + */ + if (qual->flags.q.explicit_stream) { + unsigned qual_stream; + if (process_qualifier_constant(state, &loc, "stream", + qual->stream, &qual_stream) && + qual_stream != block_stream) { + _mesa_glsl_error(&loc, state, "stream layout qualifier on " + "interface block member does not match " + "the interface block (%u vs %u)", qual_stream, + block_stream); + } + } + + if (qual->flags.q.uniform && qual->has_interpolation()) { + _mesa_glsl_error(&loc, state, + "interpolation qualifiers cannot be used " + "with uniform interface blocks"); + } + + if ((qual->flags.q.uniform || !is_interface) && + qual->has_auxiliary_storage()) { + _mesa_glsl_error(&loc, state, + "auxiliary storage qualifiers cannot be used " + "in uniform blocks or structures."); + } + + if (qual->flags.q.row_major || qual->flags.q.column_major) { + if (!qual->flags.q.uniform && !qual->flags.q.buffer) { + _mesa_glsl_error(&loc, state, + "row_major and column_major can only be " + "applied to interface blocks"); + } else + validate_matrix_layout_for_type(state, &loc, decl_type, NULL); + } + + if (qual->flags.q.read_only && qual->flags.q.write_only) { + _mesa_glsl_error(&loc, state, "buffer variable can't be both " + "readonly and writeonly."); + } + + foreach_list_typed (ast_declaration, decl, link, + &decl_list->declarations) { + YYLTYPE loc = decl->get_location(); + + if (!allow_reserved_names) + validate_identifier(decl->identifier, loc, state); + + const struct glsl_type *field_type = + process_array_type(&loc, decl_type, decl->array_specifier, state); + validate_array_dimensions(field_type, state, &loc); + fields[i].type = field_type; + fields[i].name = decl->identifier; + fields[i].interpolation = + interpret_interpolation_qualifier(qual, var_mode, state, &loc); + fields[i].centroid = qual->flags.q.centroid ? 1 : 0; + fields[i].sample = qual->flags.q.sample ? 1 : 0; + fields[i].patch = qual->flags.q.patch ? 1 : 0; + fields[i].precision = qual->precision; + + if (qual->flags.q.explicit_location) { + unsigned qual_location; + if (process_qualifier_constant(state, &loc, "location", + qual->location, &qual_location)) { + fields[i].location = VARYING_SLOT_VAR0 + qual_location; + expl_location = fields[i].location + + fields[i].type->count_attribute_slots(false); + } + } else { + if (layout && layout->flags.q.explicit_location) { + fields[i].location = expl_location; + expl_location += fields[i].type->count_attribute_slots(false); + } else { + fields[i].location = -1; + } + } + + /* Propogate row- / column-major information down the fields of the + * structure or interface block. Structures need this data because + * the structure may contain a structure that contains ... a matrix + * that need the proper layout. + */ + if (field_type->without_array()->is_matrix() + || field_type->without_array()->is_record()) { + /* If no layout is specified for the field, inherit the layout + * from the block. + */ + fields[i].matrix_layout = matrix_layout; + + if (qual->flags.q.row_major) + fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR; + else if (qual->flags.q.column_major) + fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR; + + /* If we're processing an interface block, the matrix layout must + * be decided by this point. + */ + assert(!is_interface + || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR + || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR); + } + + /* Image qualifiers are allowed on buffer variables, which can only + * be defined inside shader storage buffer objects + */ + if (layout && var_mode == ir_var_shader_storage) { + /* For readonly and writeonly qualifiers the field definition, + * if set, overwrites the layout qualifier. + */ + if (qual->flags.q.read_only) { + fields[i].image_read_only = true; + fields[i].image_write_only = false; + } else if (qual->flags.q.write_only) { + fields[i].image_read_only = false; + fields[i].image_write_only = true; + } else { + fields[i].image_read_only = layout->flags.q.read_only; + fields[i].image_write_only = layout->flags.q.write_only; + } + + /* For other qualifiers, we set the flag if either the layout + * qualifier or the field qualifier are set + */ + fields[i].image_coherent = qual->flags.q.coherent || + layout->flags.q.coherent; + fields[i].image_volatile = qual->flags.q._volatile || + layout->flags.q._volatile; + fields[i].image_restrict = qual->flags.q.restrict_flag || + layout->flags.q.restrict_flag; + } + + i++; + } + } + + assert(i == decl_count); + + *fields_ret = fields; + return decl_count; +} + + +ir_rvalue * +ast_struct_specifier::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + /* Section 4.1.8 (Structures) of the GLSL 1.10 spec says: + * + * "Anonymous structures are not supported; so embedded structures must + * have a declarator. A name given to an embedded struct is scoped at + * the same level as the struct it is embedded in." + * + * The same section of the GLSL 1.20 spec says: + * + * "Anonymous structures are not supported. Embedded structures are not + * supported. + * + * struct S { float f; }; + * struct T { + * S; // Error: anonymous structures disallowed + * struct { ... }; // Error: embedded structures disallowed + * S s; // Okay: nested structures with name are allowed + * };" + * + * The GLSL ES 1.00 and 3.00 specs have similar langauge and examples. So, + * we allow embedded structures in 1.10 only. + */ + if (state->language_version != 110 && state->struct_specifier_depth != 0) + _mesa_glsl_error(&loc, state, + "embedded structure declarations are not allowed"); + + state->struct_specifier_depth++; + + unsigned expl_location = 0; + if (layout && layout->flags.q.explicit_location) { + if (!process_qualifier_constant(state, &loc, "location", + layout->location, &expl_location)) { + return NULL; + } else { + expl_location = VARYING_SLOT_VAR0 + expl_location; + } + } + + glsl_struct_field *fields; + unsigned decl_count = + ast_process_struct_or_iface_block_members(instructions, + state, + &this->declarations, + &fields, + false, + GLSL_MATRIX_LAYOUT_INHERITED, + false /* allow_reserved_names */, + ir_var_auto, + layout, + 0, /* for interface only */ + expl_location); + + validate_identifier(this->name, loc, state); + + const glsl_type *t = + glsl_type::get_record_instance(fields, decl_count, this->name); + + if (!state->symbols->add_type(name, t)) { + _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name); + } else { + const glsl_type **s = reralloc(state, state->user_structures, + const glsl_type *, + state->num_user_structures + 1); + if (s != NULL) { + s[state->num_user_structures] = t; + state->user_structures = s; + state->num_user_structures++; + } + } + + state->struct_specifier_depth--; + + /* Structure type definitions do not have r-values. + */ + return NULL; +} + + +/** + * Visitor class which detects whether a given interface block has been used. + */ +class interface_block_usage_visitor : public ir_hierarchical_visitor +{ +public: + interface_block_usage_visitor(ir_variable_mode mode, const glsl_type *block) + : mode(mode), block(block), found(false) + { + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (ir->var->data.mode == mode && ir->var->get_interface_type() == block) { + found = true; + return visit_stop; + } + return visit_continue; + } + + bool usage_found() const + { + return this->found; + } + +private: + ir_variable_mode mode; + const glsl_type *block; + bool found; +}; + +static bool +is_unsized_array_last_element(ir_variable *v) +{ + const glsl_type *interface_type = v->get_interface_type(); + int length = interface_type->length; + + assert(v->type->is_unsized_array()); + + /* Check if it is the last element of the interface */ + if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0) + return true; + return false; +} + +ir_rvalue * +ast_interface_block::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + /* Interface blocks must be declared at global scope */ + if (state->current_function != NULL) { + _mesa_glsl_error(&loc, state, + "Interface block `%s' must be declared " + "at global scope", + this->block_name); + } + + if (!this->layout.flags.q.buffer && + this->layout.flags.q.std430) { + _mesa_glsl_error(&loc, state, + "std430 storage block layout qualifier is supported " + "only for shader storage blocks"); + } + + /* The ast_interface_block has a list of ast_declarator_lists. We + * need to turn those into ir_variables with an association + * with this uniform block. + */ + enum glsl_interface_packing packing; + if (this->layout.flags.q.shared) { + packing = GLSL_INTERFACE_PACKING_SHARED; + } else if (this->layout.flags.q.packed) { + packing = GLSL_INTERFACE_PACKING_PACKED; + } else if (this->layout.flags.q.std430) { + packing = GLSL_INTERFACE_PACKING_STD430; + } else { + /* The default layout is std140. + */ + packing = GLSL_INTERFACE_PACKING_STD140; + } + + ir_variable_mode var_mode; + const char *iface_type_name; + if (this->layout.flags.q.in) { + var_mode = ir_var_shader_in; + iface_type_name = "in"; + } else if (this->layout.flags.q.out) { + var_mode = ir_var_shader_out; + iface_type_name = "out"; + } else if (this->layout.flags.q.uniform) { + var_mode = ir_var_uniform; + iface_type_name = "uniform"; + } else if (this->layout.flags.q.buffer) { + var_mode = ir_var_shader_storage; + iface_type_name = "buffer"; + } else { + var_mode = ir_var_auto; + iface_type_name = "UNKNOWN"; + assert(!"interface block layout qualifier not found!"); + } + + enum glsl_matrix_layout matrix_layout = GLSL_MATRIX_LAYOUT_INHERITED; + if (this->layout.flags.q.row_major) + matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR; + else if (this->layout.flags.q.column_major) + matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR; + + bool redeclaring_per_vertex = strcmp(this->block_name, "gl_PerVertex") == 0; + exec_list declared_variables; + glsl_struct_field *fields; + + /* Treat an interface block as one level of nesting, so that embedded struct + * specifiers will be disallowed. + */ + state->struct_specifier_depth++; + + /* For blocks that accept memory qualifiers (i.e. shader storage), verify + * that we don't have incompatible qualifiers + */ + if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) { + _mesa_glsl_error(&loc, state, + "Interface block sets both readonly and writeonly"); + } + + unsigned qual_stream; + if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream, + &qual_stream) || + !validate_stream_qualifier(&loc, state, qual_stream)) { + /* If the stream qualifier is invalid it doesn't make sense to continue + * on and try to compare stream layouts on member variables against it + * so just return early. + */ + return NULL; + } + + unsigned expl_location = 0; + if (layout.flags.q.explicit_location) { + if (!process_qualifier_constant(state, &loc, "location", + layout.location, &expl_location)) { + return NULL; + } else { + expl_location = VARYING_SLOT_VAR0 + expl_location; + } + } + + unsigned int num_variables = + ast_process_struct_or_iface_block_members(&declared_variables, + state, + &this->declarations, + &fields, + true, + matrix_layout, + redeclaring_per_vertex, + var_mode, + &this->layout, + qual_stream, + expl_location); + + state->struct_specifier_depth--; + + if (!redeclaring_per_vertex) { + validate_identifier(this->block_name, loc, state); + + /* From section 4.3.9 ("Interface Blocks") of the GLSL 4.50 spec: + * + * "Block names have no other use within a shader beyond interface + * matching; it is a compile-time error to use a block name at global + * scope for anything other than as a block name." + */ + ir_variable *var = state->symbols->get_variable(this->block_name); + if (var && !var->type->is_interface()) { + _mesa_glsl_error(&loc, state, "Block name `%s' is " + "already used in the scope.", + this->block_name); + } + } + + const glsl_type *earlier_per_vertex = NULL; + if (redeclaring_per_vertex) { + /* Find the previous declaration of gl_PerVertex. If we're redeclaring + * the named interface block gl_in, we can find it by looking at the + * previous declaration of gl_in. Otherwise we can find it by looking + * at the previous decalartion of any of the built-in outputs, + * e.g. gl_Position. + * + * Also check that the instance name and array-ness of the redeclaration + * are correct. + */ + switch (var_mode) { + case ir_var_shader_in: + if (ir_variable *earlier_gl_in = + state->symbols->get_variable("gl_in")) { + earlier_per_vertex = earlier_gl_in->get_interface_type(); + } else { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex input not allowed " + "in the %s shader", + _mesa_shader_stage_to_string(state->stage)); + } + if (this->instance_name == NULL || + strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL || + !this->array_specifier->is_single_dimension()) { + _mesa_glsl_error(&loc, state, + "gl_PerVertex input must be redeclared as " + "gl_in[]"); + } + break; + case ir_var_shader_out: + if (ir_variable *earlier_gl_Position = + state->symbols->get_variable("gl_Position")) { + earlier_per_vertex = earlier_gl_Position->get_interface_type(); + } else if (ir_variable *earlier_gl_out = + state->symbols->get_variable("gl_out")) { + earlier_per_vertex = earlier_gl_out->get_interface_type(); + } else { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex output not " + "allowed in the %s shader", + _mesa_shader_stage_to_string(state->stage)); + } + if (state->stage == MESA_SHADER_TESS_CTRL) { + if (this->instance_name == NULL || + strcmp(this->instance_name, "gl_out") != 0 || this->array_specifier == NULL) { + _mesa_glsl_error(&loc, state, + "gl_PerVertex output must be redeclared as " + "gl_out[]"); + } + } else { + if (this->instance_name != NULL) { + _mesa_glsl_error(&loc, state, + "gl_PerVertex output may not be redeclared with " + "an instance name"); + } + } + break; + default: + _mesa_glsl_error(&loc, state, + "gl_PerVertex must be declared as an input or an " + "output"); + break; + } + + if (earlier_per_vertex == NULL) { + /* An error has already been reported. Bail out to avoid null + * dereferences later in this function. + */ + return NULL; + } + + /* Copy locations from the old gl_PerVertex interface block. */ + for (unsigned i = 0; i < num_variables; i++) { + int j = earlier_per_vertex->field_index(fields[i].name); + if (j == -1) { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex must be a subset " + "of the built-in members of gl_PerVertex"); + } else { + fields[i].location = + earlier_per_vertex->fields.structure[j].location; + fields[i].interpolation = + earlier_per_vertex->fields.structure[j].interpolation; + fields[i].centroid = + earlier_per_vertex->fields.structure[j].centroid; + fields[i].sample = + earlier_per_vertex->fields.structure[j].sample; + fields[i].patch = + earlier_per_vertex->fields.structure[j].patch; + fields[i].precision = + earlier_per_vertex->fields.structure[j].precision; + } + } + + /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 + * spec: + * + * If a built-in interface block is redeclared, it must appear in + * the shader before any use of any member included in the built-in + * declaration, or a compilation error will result. + * + * This appears to be a clarification to the behaviour established for + * gl_PerVertex by GLSL 1.50, therefore we implement this behaviour + * regardless of GLSL version. + */ + interface_block_usage_visitor v(var_mode, earlier_per_vertex); + v.run(instructions); + if (v.usage_found()) { + _mesa_glsl_error(&loc, state, + "redeclaration of a built-in interface block must " + "appear before any use of any member of the " + "interface block"); + } + } + + const glsl_type *block_type = + glsl_type::get_interface_instance(fields, + num_variables, + packing, + this->block_name); + + if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) { + YYLTYPE loc = this->get_location(); + _mesa_glsl_error(&loc, state, "interface block `%s' with type `%s' " + "already taken in the current scope", + this->block_name, iface_type_name); + } + + /* Since interface blocks cannot contain statements, it should be + * impossible for the block to generate any instructions. + */ + assert(declared_variables.is_empty()); + + /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec: + * + * Geometry shader input variables get the per-vertex values written + * out by vertex shader output variables of the same names. Since a + * geometry shader operates on a set of vertices, each input varying + * variable (or input block, see interface blocks below) needs to be + * declared as an array. + */ + if (state->stage == MESA_SHADER_GEOMETRY && this->array_specifier == NULL && + var_mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "geometry shader inputs must be arrays"); + } else if ((state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) && + this->array_specifier == NULL && + var_mode == ir_var_shader_in) { + _mesa_glsl_error(&loc, state, "per-vertex tessellation shader inputs must be arrays"); + } else if (state->stage == MESA_SHADER_TESS_CTRL && + this->array_specifier == NULL && + var_mode == ir_var_shader_out) { + _mesa_glsl_error(&loc, state, "tessellation control shader outputs must be arrays"); + } + + + /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec + * says: + * + * "If an instance name (instance-name) is used, then it puts all the + * members inside a scope within its own name space, accessed with the + * field selector ( . ) operator (analogously to structures)." + */ + if (this->instance_name) { + if (redeclaring_per_vertex) { + /* When a built-in in an unnamed interface block is redeclared, + * get_variable_being_redeclared() calls + * check_builtin_array_max_size() to make sure that built-in array + * variables aren't redeclared to illegal sizes. But we're looking + * at a redeclaration of a named built-in interface block. So we + * have to manually call check_builtin_array_max_size() for all parts + * of the interface that are arrays. + */ + for (unsigned i = 0; i < num_variables; i++) { + if (fields[i].type->is_array()) { + const unsigned size = fields[i].type->array_size(); + check_builtin_array_max_size(fields[i].name, size, loc, state); + } + } + } else { + validate_identifier(this->instance_name, loc, state); + } + + ir_variable *var; + + if (this->array_specifier != NULL) { + const glsl_type *block_array_type = + process_array_type(&loc, block_type, this->array_specifier, state); + + /* Section 4.3.7 (Interface Blocks) of the GLSL 1.50 spec says: + * + * For uniform blocks declared an array, each individual array + * element corresponds to a separate buffer object backing one + * instance of the block. As the array size indicates the number + * of buffer objects needed, uniform block array declarations + * must specify an array size. + * + * And a few paragraphs later: + * + * Geometry shader input blocks must be declared as arrays and + * follow the array declaration and linking rules for all + * geometry shader inputs. All other input and output block + * arrays must specify an array size. + * + * The same applies to tessellation shaders. + * + * The upshot of this is that the only circumstance where an + * interface array size *doesn't* need to be specified is on a + * geometry shader input, tessellation control shader input, + * tessellation control shader output, and tessellation evaluation + * shader input. + */ + if (block_array_type->is_unsized_array()) { + bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY || + state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL; + bool allow_outputs = state->stage == MESA_SHADER_TESS_CTRL; + + if (this->layout.flags.q.in) { + if (!allow_inputs) + _mesa_glsl_error(&loc, state, + "unsized input block arrays not allowed in " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } else if (this->layout.flags.q.out) { + if (!allow_outputs) + _mesa_glsl_error(&loc, state, + "unsized output block arrays not allowed in " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } else { + /* by elimination, this is a uniform block array */ + _mesa_glsl_error(&loc, state, + "unsized uniform block arrays not allowed in " + "%s shader", + _mesa_shader_stage_to_string(state->stage)); + } + } + + /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec: + * + * * Arrays of arrays of blocks are not allowed + */ + if (state->es_shader && block_array_type->is_array() && + block_array_type->fields.array->is_array()) { + _mesa_glsl_error(&loc, state, + "arrays of arrays interface blocks are " + "not allowed"); + } + + var = new(state) ir_variable(block_array_type, + this->instance_name, + var_mode); + } else { + var = new(state) ir_variable(block_type, + this->instance_name, + var_mode); + } + + var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED + ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; + + if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) + var->data.read_only = true; + + if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in) + handle_geometry_shader_input_decl(state, loc, var); + else if ((state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) && var_mode == ir_var_shader_in) + handle_tess_shader_input_decl(state, loc, var); + else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out) + handle_tess_ctrl_shader_output_decl(state, loc, var); + + for (unsigned i = 0; i < num_variables; i++) { + if (fields[i].type->is_unsized_array()) { + if (var_mode == ir_var_shader_storage) { + if (i != (num_variables - 1)) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + fields[i].name); + } + } else { + /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays": + * + * "If an array is declared as the last member of a shader storage + * block and the size is not specified at compile-time, it is + * sized at run-time. In all other cases, arrays are sized only + * at compile-time." + */ + if (state->es_shader) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + fields[i].name); + } + } + } + } + + if (ir_variable *earlier = + state->symbols->get_variable(this->instance_name)) { + if (!redeclaring_per_vertex) { + _mesa_glsl_error(&loc, state, "`%s' redeclared", + this->instance_name); + } + earlier->data.how_declared = ir_var_declared_normally; + earlier->type = var->type; + earlier->reinit_interface_type(block_type); + delete var; + } else { + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, var->type, + &this->layout); + } + + var->data.stream = qual_stream; + if (layout.flags.q.explicit_location) { + var->data.location = expl_location; + var->data.explicit_location = true; + } + + state->symbols->add_variable(var); + instructions->push_tail(var); + } + } else { + /* In order to have an array size, the block must also be declared with + * an instance name. + */ + assert(this->array_specifier == NULL); + + for (unsigned i = 0; i < num_variables; i++) { + ir_variable *var = + new(state) ir_variable(fields[i].type, + ralloc_strdup(state, fields[i].name), + var_mode); + var->data.interpolation = fields[i].interpolation; + var->data.centroid = fields[i].centroid; + var->data.sample = fields[i].sample; + var->data.patch = fields[i].patch; + var->data.stream = qual_stream; + var->data.location = fields[i].location; + if (fields[i].location != -1) + var->data.explicit_location = true; + var->init_interface_type(block_type); + + if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) + var->data.read_only = true; + + /* Precision qualifiers do not have any meaning in Desktop GLSL */ + if (state->es_shader) { + var->data.precision = + select_gles_precision(fields[i].precision, fields[i].type, + state, &loc); + } + + if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) { + var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED + ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; + } else { + var->data.matrix_layout = fields[i].matrix_layout; + } + + if (var->data.mode == ir_var_shader_storage) { + var->data.image_read_only = fields[i].image_read_only; + var->data.image_write_only = fields[i].image_write_only; + var->data.image_coherent = fields[i].image_coherent; + var->data.image_volatile = fields[i].image_volatile; + var->data.image_restrict = fields[i].image_restrict; + } + + /* Examine var name here since var may get deleted in the next call */ + bool var_is_gl_id = is_gl_identifier(var->name); + + if (redeclaring_per_vertex) { + ir_variable *earlier = + get_variable_being_redeclared(var, loc, state, + true /* allow_all_redeclarations */); + if (!var_is_gl_id || earlier == NULL) { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex can only " + "include built-in variables"); + } else if (earlier->data.how_declared == ir_var_declared_normally) { + _mesa_glsl_error(&loc, state, + "`%s' has already been redeclared", + earlier->name); + } else { + earlier->data.how_declared = ir_var_declared_in_block; + earlier->reinit_interface_type(block_type); + } + continue; + } + + if (state->symbols->get_variable(var->name) != NULL) + _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name); + + /* Propagate the "binding" keyword into this UBO/SSBO's fields. + * The UBO declaration itself doesn't get an ir_variable unless it + * has an instance name. This is ugly. + */ + if (this->layout.flags.q.explicit_binding) { + apply_explicit_binding(state, &loc, var, + var->get_interface_type(), &this->layout); + } + + if (var->type->is_unsized_array()) { + if (var->is_in_shader_storage_block()) { + if (!is_unsized_array_last_element(var)) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + var->name); + } + var->data.from_ssbo_unsized_array = true; + } else { + /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays": + * + * "If an array is declared as the last member of a shader storage + * block and the size is not specified at compile-time, it is + * sized at run-time. In all other cases, arrays are sized only + * at compile-time." + */ + if (state->es_shader) { + _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " + "only last member of a shader storage block " + "can be defined as unsized array", + var->name); + } + } + } + + state->symbols->add_variable(var); + instructions->push_tail(var); + } + + if (redeclaring_per_vertex && block_type != earlier_per_vertex) { + /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 spec: + * + * It is also a compilation error ... to redeclare a built-in + * block and then use a member from that built-in block that was + * not included in the redeclaration. + * + * This appears to be a clarification to the behaviour established + * for gl_PerVertex by GLSL 1.50, therefore we implement this + * behaviour regardless of GLSL version. + * + * To prevent the shader from using a member that was not included in + * the redeclaration, we disable any ir_variables that are still + * associated with the old declaration of gl_PerVertex (since we've + * already updated all of the variables contained in the new + * gl_PerVertex to point to it). + * + * As a side effect this will prevent + * validate_intrastage_interface_blocks() from getting confused and + * thinking there are conflicting definitions of gl_PerVertex in the + * shader. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *const var = node->as_variable(); + if (var != NULL && + var->get_interface_type() == earlier_per_vertex && + var->data.mode == var_mode) { + if (var->data.how_declared == ir_var_declared_normally) { + _mesa_glsl_error(&loc, state, + "redeclaration of gl_PerVertex cannot " + "follow a redeclaration of `%s'", + var->name); + } + state->symbols->disable_variable(var->name); + var->remove(); + } + } + } + } + + return NULL; +} + + +ir_rvalue * +ast_tcs_output_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + unsigned num_vertices; + if (!state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &num_vertices, + false)) { + /* return here to stop cascading incorrect error messages */ + return NULL; + } + + /* If any shader outputs occurred before this declaration and specified an + * array size, make sure the size they specified is consistent with the + * primitive type. + */ + if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) { + _mesa_glsl_error(&loc, state, + "this tessellation control shader output layout " + "specifies %u vertices, but a previous output " + "is declared with size %u", + num_vertices, state->tcs_output_size); + return NULL; + } + + state->tcs_output_vertices_specified = true; + + /* If any shader outputs occurred before this declaration and did not + * specify an array size, their size is determined now. + */ + foreach_in_list (ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (var == NULL || var->data.mode != ir_var_shader_out) + continue; + + /* Note: Not all tessellation control shader output are arrays. */ + if (!var->type->is_unsized_array() || var->data.patch) + continue; + + if (var->data.max_array_access >= num_vertices) { + _mesa_glsl_error(&loc, state, + "this tessellation control shader output layout " + "specifies %u vertices, but an access to element " + "%u of output `%s' already exists", num_vertices, + var->data.max_array_access, var->name); + } else { + var->type = glsl_type::get_array_instance(var->type->fields.array, + num_vertices); + } + } + + return NULL; +} + + +ir_rvalue * +ast_gs_input_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + /* If any geometry input layout declaration preceded this one, make sure it + * was consistent with this one. + */ + if (state->gs_input_prim_type_specified && + state->in_qualifier->prim_type != this->prim_type) { + _mesa_glsl_error(&loc, state, + "geometry shader input layout does not match" + " previous declaration"); + return NULL; + } + + /* If any shader inputs occurred before this declaration and specified an + * array size, make sure the size they specified is consistent with the + * primitive type. + */ + unsigned num_vertices = vertices_per_prim(this->prim_type); + if (state->gs_input_size != 0 && state->gs_input_size != num_vertices) { + _mesa_glsl_error(&loc, state, + "this geometry shader input layout implies %u vertices" + " per primitive, but a previous input is declared" + " with size %u", num_vertices, state->gs_input_size); + return NULL; + } + + state->gs_input_prim_type_specified = true; + + /* If any shader inputs occurred before this declaration and did not + * specify an array size, their size is determined now. + */ + foreach_in_list(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (var == NULL || var->data.mode != ir_var_shader_in) + continue; + + /* Note: gl_PrimitiveIDIn has mode ir_var_shader_in, but it's not an + * array; skip it. + */ + + if (var->type->is_unsized_array()) { + if (var->data.max_array_access >= num_vertices) { + _mesa_glsl_error(&loc, state, + "this geometry shader input layout implies %u" + " vertices, but an access to element %u of input" + " `%s' already exists", num_vertices, + var->data.max_array_access, var->name); + } else { + var->type = glsl_type::get_array_instance(var->type->fields.array, + num_vertices); + } + } + } + + return NULL; +} + + +ir_rvalue * +ast_cs_input_layout::hir(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + YYLTYPE loc = this->get_location(); + + /* From the ARB_compute_shader specification: + * + * If the local size of the shader in any dimension is greater + * than the maximum size supported by the implementation for that + * dimension, a compile-time error results. + * + * It is not clear from the spec how the error should be reported if + * the total size of the work group exceeds + * MAX_COMPUTE_WORK_GROUP_INVOCATIONS, but it seems reasonable to + * report it at compile time as well. + */ + GLuint64 total_invocations = 1; + unsigned qual_local_size[3]; + for (int i = 0; i < 3; i++) { + + char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c", + 'x' + i); + /* Infer a local_size of 1 for unspecified dimensions */ + if (this->local_size[i] == NULL) { + qual_local_size[i] = 1; + } else if (!this->local_size[i]-> + process_qualifier_constant(state, local_size_str, + &qual_local_size[i], false)) { + ralloc_free(local_size_str); + return NULL; + } + ralloc_free(local_size_str); + + if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) { + _mesa_glsl_error(&loc, state, + "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE" + " (%d)", 'x' + i, + state->ctx->Const.MaxComputeWorkGroupSize[i]); + break; + } + total_invocations *= qual_local_size[i]; + if (total_invocations > + state->ctx->Const.MaxComputeWorkGroupInvocations) { + _mesa_glsl_error(&loc, state, + "product of local_sizes exceeds " + "MAX_COMPUTE_WORK_GROUP_INVOCATIONS (%d)", + state->ctx->Const.MaxComputeWorkGroupInvocations); + break; + } + } + + /* If any compute input layout declaration preceded this one, make sure it + * was consistent with this one. + */ + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) { + if (state->cs_input_local_size[i] != qual_local_size[i]) { + _mesa_glsl_error(&loc, state, + "compute shader input layout does not match" + " previous declaration"); + return NULL; + } + } + } + + state->cs_input_local_size_specified = true; + for (int i = 0; i < 3; i++) + state->cs_input_local_size[i] = qual_local_size[i]; + + /* We may now declare the built-in constant gl_WorkGroupSize (see + * builtin_variable_generator::generate_constants() for why we didn't + * declare it earlier). + */ + ir_variable *var = new(state->symbols) + ir_variable(glsl_type::uvec3_type, "gl_WorkGroupSize", ir_var_auto); + var->data.how_declared = ir_var_declared_implicitly; + var->data.read_only = true; + instructions->push_tail(var); + state->symbols->add_variable(var); + ir_constant_data data; + memset(&data, 0, sizeof(data)); + for (int i = 0; i < 3; i++) + data.u[i] = qual_local_size[i]; + var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data); + var->constant_initializer = + new(var) ir_constant(glsl_type::uvec3_type, &data); + var->data.has_initializer = true; + + return NULL; +} + + +static void +detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, + exec_list *instructions) +{ + bool gl_FragColor_assigned = false; + bool gl_FragData_assigned = false; + bool gl_FragSecondaryColor_assigned = false; + bool gl_FragSecondaryData_assigned = false; + bool user_defined_fs_output_assigned = false; + ir_variable *user_defined_fs_output = NULL; + + /* It would be nice to have proper location information. */ + YYLTYPE loc; + memset(&loc, 0, sizeof(loc)); + + foreach_in_list(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + + if (!var || !var->data.assigned) + continue; + + if (strcmp(var->name, "gl_FragColor") == 0) + gl_FragColor_assigned = true; + else if (strcmp(var->name, "gl_FragData") == 0) + gl_FragData_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0) + gl_FragSecondaryColor_assigned = true; + else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0) + gl_FragSecondaryData_assigned = true; + else if (!is_gl_identifier(var->name)) { + if (state->stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_out) { + user_defined_fs_output_assigned = true; + user_defined_fs_output = var; + } + } + } + + /* From the GLSL 1.30 spec: + * + * "If a shader statically assigns a value to gl_FragColor, it + * may not assign a value to any element of gl_FragData. If a + * shader statically writes a value to any element of + * gl_FragData, it may not assign a value to + * gl_FragColor. That is, a shader may assign values to either + * gl_FragColor or gl_FragData, but not both. Multiple shaders + * linked together must also consistently write just one of + * these variables. Similarly, if user declared output + * variables are in use (statically assigned to), then the + * built-in variables gl_FragColor and gl_FragData may not be + * assigned to. These incorrect usages all generate compile + * time errors." + */ + if (gl_FragColor_assigned && gl_FragData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and `gl_FragData'"); + } else if (gl_FragColor_assigned && user_defined_fs_output_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and `%s'", + user_defined_fs_output->name); + } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragSecondaryColorEXT' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragColor' and" + " `gl_FragSecondaryDataEXT'"); + } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragData' and" + " `gl_FragSecondaryColorEXT'"); + } else if (gl_FragData_assigned && user_defined_fs_output_assigned) { + _mesa_glsl_error(&loc, state, "fragment shader writes to both " + "`gl_FragData' and `%s'", + user_defined_fs_output->name); + } + + if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) && + !state->EXT_blend_func_extended_enable) { + _mesa_glsl_error(&loc, state, + "Dual source blending requires EXT_blend_func_extended"); + } +} + + +static void +remove_per_vertex_blocks(exec_list *instructions, + _mesa_glsl_parse_state *state, ir_variable_mode mode) +{ + /* Find the gl_PerVertex interface block of the appropriate (in/out) mode, + * if it exists in this shader type. + */ + const glsl_type *per_vertex = NULL; + switch (mode) { + case ir_var_shader_in: + if (ir_variable *gl_in = state->symbols->get_variable("gl_in")) + per_vertex = gl_in->get_interface_type(); + break; + case ir_var_shader_out: + if (ir_variable *gl_Position = + state->symbols->get_variable("gl_Position")) { + per_vertex = gl_Position->get_interface_type(); + } + break; + default: + assert(!"Unexpected mode"); + break; + } + + /* If we didn't find a built-in gl_PerVertex interface block, then we don't + * need to do anything. + */ + if (per_vertex == NULL) + return; + + /* If the interface block is used by the shader, then we don't need to do + * anything. + */ + interface_block_usage_visitor v(mode, per_vertex); + v.run(instructions); + if (v.usage_found()) + return; + + /* Remove any ir_variable declarations that refer to the interface block + * we're removing. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *const var = node->as_variable(); + if (var != NULL && var->get_interface_type() == per_vertex && + var->data.mode == mode) { + state->symbols->disable_variable(var->name); + var->remove(); + } + } +} diff --git a/src/compiler/glsl/ast_type.cpp b/src/compiler/glsl/ast_type.cpp new file mode 100644 index 00000000000..e0e331152dd --- /dev/null +++ b/src/compiler/glsl/ast_type.cpp @@ -0,0 +1,548 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ast.h" + +void +ast_type_specifier::print(void) const +{ + if (structure) { + structure->print(); + } else { + printf("%s ", type_name); + } + + if (array_specifier) { + array_specifier->print(); + } +} + +bool +ast_fully_specified_type::has_qualifiers(_mesa_glsl_parse_state *state) const +{ + /* 'subroutine' isnt a real qualifier. */ + ast_type_qualifier subroutine_only; + subroutine_only.flags.i = 0; + subroutine_only.flags.q.subroutine = 1; + subroutine_only.flags.q.subroutine_def = 1; + if (state->has_explicit_uniform_location()) { + subroutine_only.flags.q.explicit_index = 1; + } + return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0; +} + +bool ast_type_qualifier::has_interpolation() const +{ + return this->flags.q.smooth + || this->flags.q.flat + || this->flags.q.noperspective; +} + +bool +ast_type_qualifier::has_layout() const +{ + return this->flags.q.origin_upper_left + || this->flags.q.pixel_center_integer + || this->flags.q.depth_any + || this->flags.q.depth_greater + || this->flags.q.depth_less + || this->flags.q.depth_unchanged + || this->flags.q.std140 + || this->flags.q.std430 + || this->flags.q.shared + || this->flags.q.column_major + || this->flags.q.row_major + || this->flags.q.packed + || this->flags.q.explicit_location + || this->flags.q.explicit_image_format + || this->flags.q.explicit_index + || this->flags.q.explicit_binding + || this->flags.q.explicit_offset + || this->flags.q.explicit_stream; +} + +bool +ast_type_qualifier::has_storage() const +{ + return this->flags.q.constant + || this->flags.q.attribute + || this->flags.q.varying + || this->flags.q.in + || this->flags.q.out + || this->flags.q.uniform + || this->flags.q.buffer + || this->flags.q.shared_storage; +} + +bool +ast_type_qualifier::has_auxiliary_storage() const +{ + return this->flags.q.centroid + || this->flags.q.sample + || this->flags.q.patch; +} + +const char* +ast_type_qualifier::interpolation_string() const +{ + if (this->flags.q.smooth) + return "smooth"; + else if (this->flags.q.flat) + return "flat"; + else if (this->flags.q.noperspective) + return "noperspective"; + else + return NULL; +} + +/** + * This function merges both duplicate identifies within a single layout and + * multiple layout qualifiers on a single variable declaration. The + * is_single_layout_merge param is used differentiate between the two. + */ +bool +ast_type_qualifier::merge_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + bool is_single_layout_merge) +{ + ast_type_qualifier ubo_mat_mask; + ubo_mat_mask.flags.i = 0; + ubo_mat_mask.flags.q.row_major = 1; + ubo_mat_mask.flags.q.column_major = 1; + + ast_type_qualifier ubo_layout_mask; + ubo_layout_mask.flags.i = 0; + ubo_layout_mask.flags.q.std140 = 1; + ubo_layout_mask.flags.q.packed = 1; + ubo_layout_mask.flags.q.shared = 1; + ubo_layout_mask.flags.q.std430 = 1; + + ast_type_qualifier ubo_binding_mask; + ubo_binding_mask.flags.i = 0; + ubo_binding_mask.flags.q.explicit_binding = 1; + ubo_binding_mask.flags.q.explicit_offset = 1; + + ast_type_qualifier stream_layout_mask; + stream_layout_mask.flags.i = 0; + stream_layout_mask.flags.q.stream = 1; + + /* Uniform block layout qualifiers get to overwrite each + * other (rightmost having priority), while all other + * qualifiers currently don't allow duplicates. + */ + ast_type_qualifier allowed_duplicates_mask; + allowed_duplicates_mask.flags.i = + ubo_mat_mask.flags.i | + ubo_layout_mask.flags.i | + ubo_binding_mask.flags.i; + + /* Geometry shaders can have several layout qualifiers + * assigning different stream values. + */ + if (state->stage == MESA_SHADER_GEOMETRY) + allowed_duplicates_mask.flags.i |= + stream_layout_mask.flags.i; + + if (is_single_layout_merge && !state->has_enhanced_layouts() && + (this->flags.i & q.flags.i & ~allowed_duplicates_mask.flags.i) != 0) { + _mesa_glsl_error(loc, state, + "duplicate layout qualifiers used"); + return false; + } + + if (q.flags.q.prim_type) { + if (this->flags.q.prim_type && this->prim_type != q.prim_type) { + _mesa_glsl_error(loc, state, + "conflicting primitive type qualifiers used"); + return false; + } + this->prim_type = q.prim_type; + } + + if (q.flags.q.max_vertices) { + if (this->max_vertices) { + this->max_vertices->merge_qualifier(q.max_vertices); + } else { + this->max_vertices = q.max_vertices; + } + } + + if (q.flags.q.subroutine_def) { + if (this->flags.q.subroutine_def) { + _mesa_glsl_error(loc, state, + "conflicting subroutine qualifiers used"); + } else { + this->subroutine_list = q.subroutine_list; + } + } + + if (q.flags.q.invocations) { + if (this->invocations) { + this->invocations->merge_qualifier(q.invocations); + } else { + this->invocations = q.invocations; + } + } + + if (state->stage == MESA_SHADER_GEOMETRY && + state->has_explicit_attrib_stream()) { + if (!this->flags.q.explicit_stream) { + if (q.flags.q.stream) { + this->flags.q.stream = 1; + this->stream = q.stream; + } else if (!this->flags.q.stream && this->flags.q.out) { + /* Assign default global stream value */ + this->flags.q.stream = 1; + this->stream = state->out_qualifier->stream; + } + } + } + + if (q.flags.q.vertices) { + if (this->vertices) { + this->vertices->merge_qualifier(q.vertices); + } else { + this->vertices = q.vertices; + } + } + + if (q.flags.q.vertex_spacing) { + if (this->flags.q.vertex_spacing && this->vertex_spacing != q.vertex_spacing) { + _mesa_glsl_error(loc, state, + "conflicting vertex spacing used"); + return false; + } + this->vertex_spacing = q.vertex_spacing; + } + + if (q.flags.q.ordering) { + if (this->flags.q.ordering && this->ordering != q.ordering) { + _mesa_glsl_error(loc, state, + "conflicting ordering used"); + return false; + } + this->ordering = q.ordering; + } + + if (q.flags.q.point_mode) { + if (this->flags.q.point_mode && this->point_mode != q.point_mode) { + _mesa_glsl_error(loc, state, + "conflicting point mode used"); + return false; + } + this->point_mode = q.point_mode; + } + + if ((q.flags.i & ubo_mat_mask.flags.i) != 0) + this->flags.i &= ~ubo_mat_mask.flags.i; + if ((q.flags.i & ubo_layout_mask.flags.i) != 0) + this->flags.i &= ~ubo_layout_mask.flags.i; + + for (int i = 0; i < 3; i++) { + if (q.flags.q.local_size & (1 << i)) { + if (this->local_size[i]) { + this->local_size[i]->merge_qualifier(q.local_size[i]); + } else { + this->local_size[i] = q.local_size[i]; + } + } + } + + this->flags.i |= q.flags.i; + + if (q.flags.q.explicit_location) + this->location = q.location; + + if (q.flags.q.explicit_index) + this->index = q.index; + + if (q.flags.q.explicit_binding) + this->binding = q.binding; + + if (q.flags.q.explicit_offset) + this->offset = q.offset; + + if (q.precision != ast_precision_none) + this->precision = q.precision; + + if (q.flags.q.explicit_image_format) { + this->image_format = q.image_format; + this->image_base_type = q.image_base_type; + } + + return true; +} + +bool +ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + ast_node* &node, bool create_node) +{ + void *mem_ctx = state; + const bool r = this->merge_qualifier(loc, state, q, false); + + if (state->stage == MESA_SHADER_GEOMETRY) { + if (q.flags.q.prim_type) { + /* Make sure this is a valid output primitive type. */ + switch (q.prim_type) { + case GL_POINTS: + case GL_LINE_STRIP: + case GL_TRIANGLE_STRIP: + break; + default: + _mesa_glsl_error(loc, state, "invalid geometry shader output " + "primitive type"); + break; + } + } + + /* Allow future assigments of global out's stream id value */ + this->flags.q.explicit_stream = 0; + } else if (state->stage == MESA_SHADER_TESS_CTRL) { + if (create_node) { + node = new(mem_ctx) ast_tcs_output_layout(*loc); + } + } else { + _mesa_glsl_error(loc, state, "out layout qualifiers only valid in " + "tessellation control or geometry shaders"); + } + + return r; +} + +bool +ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, + _mesa_glsl_parse_state *state, + const ast_type_qualifier &q, + ast_node* &node, bool create_node) +{ + void *mem_ctx = state; + bool create_gs_ast = false; + bool create_cs_ast = false; + ast_type_qualifier valid_in_mask; + valid_in_mask.flags.i = 0; + + switch (state->stage) { + case MESA_SHADER_TESS_EVAL: + if (q.flags.q.prim_type) { + /* Make sure this is a valid input primitive type. */ + switch (q.prim_type) { + case GL_TRIANGLES: + case GL_QUADS: + case GL_ISOLINES: + break; + default: + _mesa_glsl_error(loc, state, + "invalid tessellation evaluation " + "shader input primitive type"); + break; + } + } + + valid_in_mask.flags.q.prim_type = 1; + valid_in_mask.flags.q.vertex_spacing = 1; + valid_in_mask.flags.q.ordering = 1; + valid_in_mask.flags.q.point_mode = 1; + break; + case MESA_SHADER_GEOMETRY: + if (q.flags.q.prim_type) { + /* Make sure this is a valid input primitive type. */ + switch (q.prim_type) { + case GL_POINTS: + case GL_LINES: + case GL_LINES_ADJACENCY: + case GL_TRIANGLES: + case GL_TRIANGLES_ADJACENCY: + break; + default: + _mesa_glsl_error(loc, state, + "invalid geometry shader input primitive type"); + break; + } + } + + create_gs_ast |= + q.flags.q.prim_type && + !state->in_qualifier->flags.q.prim_type; + + valid_in_mask.flags.q.prim_type = 1; + valid_in_mask.flags.q.invocations = 1; + break; + case MESA_SHADER_FRAGMENT: + valid_in_mask.flags.q.early_fragment_tests = 1; + break; + case MESA_SHADER_COMPUTE: + create_cs_ast |= + q.flags.q.local_size != 0 && + state->in_qualifier->flags.q.local_size == 0; + + valid_in_mask.flags.q.local_size = 7; + break; + default: + _mesa_glsl_error(loc, state, + "input layout qualifiers only valid in " + "geometry, fragment and compute shaders"); + break; + } + + /* Generate an error when invalid input layout qualifiers are used. */ + if ((q.flags.i & ~valid_in_mask.flags.i) != 0) { + _mesa_glsl_error(loc, state, + "invalid input layout qualifiers used"); + return false; + } + + /* Input layout qualifiers can be specified multiple + * times in separate declarations, as long as they match. + */ + if (this->flags.q.prim_type) { + if (q.flags.q.prim_type && + this->prim_type != q.prim_type) { + _mesa_glsl_error(loc, state, + "conflicting input primitive %s specified", + state->stage == MESA_SHADER_GEOMETRY ? + "type" : "mode"); + } + } else if (q.flags.q.prim_type) { + state->in_qualifier->flags.q.prim_type = 1; + state->in_qualifier->prim_type = q.prim_type; + } + + if (q.flags.q.invocations) { + this->flags.q.invocations = 1; + if (this->invocations) { + this->invocations->merge_qualifier(q.invocations); + } else { + this->invocations = q.invocations; + } + } + + if (q.flags.q.early_fragment_tests) { + state->fs_early_fragment_tests = true; + } + + if (this->flags.q.vertex_spacing) { + if (q.flags.q.vertex_spacing && + this->vertex_spacing != q.vertex_spacing) { + _mesa_glsl_error(loc, state, + "conflicting vertex spacing specified"); + } + } else if (q.flags.q.vertex_spacing) { + this->flags.q.vertex_spacing = 1; + this->vertex_spacing = q.vertex_spacing; + } + + if (this->flags.q.ordering) { + if (q.flags.q.ordering && + this->ordering != q.ordering) { + _mesa_glsl_error(loc, state, + "conflicting ordering specified"); + } + } else if (q.flags.q.ordering) { + this->flags.q.ordering = 1; + this->ordering = q.ordering; + } + + if (this->flags.q.point_mode) { + if (q.flags.q.point_mode && + this->point_mode != q.point_mode) { + _mesa_glsl_error(loc, state, + "conflicting point mode specified"); + } + } else if (q.flags.q.point_mode) { + this->flags.q.point_mode = 1; + this->point_mode = q.point_mode; + } + + if (create_node) { + if (create_gs_ast) { + node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type); + } else if (create_cs_ast) { + node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size); + } + } + + return true; +} + +bool +ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state *state, + const char *qual_indentifier, + unsigned *value, + bool can_be_zero) +{ + int min_value = 0; + bool first_pass = true; + *value = 0; + + if (!can_be_zero) + min_value = 1; + + for (exec_node *node = layout_const_expressions.head; + !node->is_tail_sentinel(); node = node->next) { + + exec_list dummy_instructions; + ast_node *const_expression = exec_node_data(ast_node, node, link); + + ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); + + ir_constant *const const_int = ir->constant_expression_value(); + if (const_int == NULL || !const_int->type->is_integer()) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s must be an integral constant " + "expression", qual_indentifier); + return false; + } + + if (const_int->value.i[0] < min_value) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s layout qualifier is invalid " + "(%d < %d)", qual_indentifier, + const_int->value.i[0], min_value); + return false; + } + + if (!first_pass && *value != const_int->value.u[0]) { + YYLTYPE loc = const_expression->get_location(); + _mesa_glsl_error(&loc, state, "%s layout qualifier does not " + "match previous declaration (%d vs %d)", + qual_indentifier, *value, const_int->value.i[0]); + return false; + } else { + first_pass = false; + *value = const_int->value.u[0]; + } + + /* If the location is const (and we've verified that + * it is) then no instructions should have been emitted + * when we converted it to HIR. If they were emitted, + * then either the location isn't const after all, or + * we are emitting unnecessary instructions. + */ + assert(dummy_instructions.is_empty()); + } + + return true; +} diff --git a/src/compiler/glsl/blob.c b/src/compiler/glsl/blob.c new file mode 100644 index 00000000000..dd4341be961 --- /dev/null +++ b/src/compiler/glsl/blob.c @@ -0,0 +1,323 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include + +#include "main/macros.h" +#include "util/ralloc.h" +#include "blob.h" + +#define BLOB_INITIAL_SIZE 4096 + +/* Ensure that \blob will be able to fit an additional object of size + * \additional. The growing (if any) will occur by doubling the existing + * allocation. + */ +static bool +grow_to_fit(struct blob *blob, size_t additional) +{ + size_t to_allocate; + uint8_t *new_data; + + if (blob->size + additional <= blob->allocated) + return true; + + if (blob->allocated == 0) + to_allocate = BLOB_INITIAL_SIZE; + else + to_allocate = blob->allocated * 2; + + to_allocate = MAX2(to_allocate, blob->allocated + additional); + + new_data = reralloc_size(blob, blob->data, to_allocate); + if (new_data == NULL) + return false; + + blob->data = new_data; + blob->allocated = to_allocate; + + return true; +} + +/* Align the blob->size so that reading or writing a value at (blob->data + + * blob->size) will result in an access aligned to a granularity of \alignment + * bytes. + * + * \return True unless allocation fails + */ +static bool +align_blob(struct blob *blob, size_t alignment) +{ + const size_t new_size = ALIGN(blob->size, alignment); + + if (! grow_to_fit (blob, new_size - blob->size)) + return false; + + blob->size = new_size; + + return true; +} + +static void +align_blob_reader(struct blob_reader *blob, size_t alignment) +{ + blob->current = blob->data + ALIGN(blob->current - blob->data, alignment); +} + +struct blob * +blob_create(void *mem_ctx) +{ + struct blob *blob; + + blob = ralloc(mem_ctx, struct blob); + if (blob == NULL) + return NULL; + + blob->data = NULL; + blob->allocated = 0; + blob->size = 0; + + return blob; +} + +bool +blob_overwrite_bytes(struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write) +{ + /* Detect an attempt to overwrite data out of bounds. */ + if (offset < 0 || blob->size - offset < to_write) + return false; + + memcpy(blob->data + offset, bytes, to_write); + + return true; +} + +bool +blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write) +{ + if (! grow_to_fit(blob, to_write)) + return false; + + memcpy(blob->data + blob->size, bytes, to_write); + blob->size += to_write; + + return true; +} + +uint8_t * +blob_reserve_bytes(struct blob *blob, size_t to_write) +{ + uint8_t *ret; + + if (! grow_to_fit (blob, to_write)) + return NULL; + + ret = blob->data + blob->size; + blob->size += to_write; + + return ret; +} + +bool +blob_write_uint32(struct blob *blob, uint32_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_overwrite_uint32 (struct blob *blob, + size_t offset, + uint32_t value) +{ + return blob_overwrite_bytes(blob, offset, &value, sizeof(value)); +} + +bool +blob_write_uint64(struct blob *blob, uint64_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_write_intptr(struct blob *blob, intptr_t value) +{ + align_blob(blob, sizeof(value)); + + return blob_write_bytes(blob, &value, sizeof(value)); +} + +bool +blob_write_string(struct blob *blob, const char *str) +{ + return blob_write_bytes(blob, str, strlen(str) + 1); +} + +void +blob_reader_init(struct blob_reader *blob, uint8_t *data, size_t size) +{ + blob->data = data; + blob->end = data + size; + blob->current = data; + blob->overrun = false; +} + +/* Check that an object of size \size can be read from this blob. + * + * If not, set blob->overrun to indicate that we attempted to read too far. + */ +static bool +ensure_can_read(struct blob_reader *blob, size_t size) +{ + if (blob->current < blob->end && blob->end - blob->current >= size) + return true; + + blob->overrun = true; + + return false; +} + +void * +blob_read_bytes(struct blob_reader *blob, size_t size) +{ + void *ret; + + if (! ensure_can_read (blob, size)) + return NULL; + + ret = blob->current; + + blob->current += size; + + return ret; +} + +void +blob_copy_bytes(struct blob_reader *blob, uint8_t *dest, size_t size) +{ + uint8_t *bytes; + + bytes = blob_read_bytes(blob, size); + if (bytes == NULL) + return; + + memcpy(dest, bytes, size); +} + +/* These next three read functions have identical form. If we add any beyond + * these first three we should probably switch to generating these with a + * preprocessor macro. +*/ +uint32_t +blob_read_uint32(struct blob_reader *blob) +{ + uint32_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((uint32_t*) blob->current); + + blob->current += size; + + return ret; +} + +uint64_t +blob_read_uint64(struct blob_reader *blob) +{ + uint64_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((uint64_t*) blob->current); + + blob->current += size; + + return ret; +} + +intptr_t +blob_read_intptr(struct blob_reader *blob) +{ + intptr_t ret; + int size = sizeof(ret); + + align_blob_reader(blob, size); + + if (! ensure_can_read(blob, size)) + return 0; + + ret = *((intptr_t *) blob->current); + + blob->current += size; + + return ret; +} + +char * +blob_read_string(struct blob_reader *blob) +{ + int size; + char *ret; + uint8_t *nul; + + /* If we're already at the end, then this is an overrun. */ + if (blob->current >= blob->end) { + blob->overrun = true; + return NULL; + } + + /* Similarly, if there is no zero byte in the data remaining in this blob, + * we also consider that an overrun. + */ + nul = memchr(blob->current, 0, blob->end - blob->current); + + if (nul == NULL) { + blob->overrun = true; + return NULL; + } + + size = nul - blob->current + 1; + + assert(ensure_can_read(blob, size)); + + ret = (char *) blob->current; + + blob->current += size; + + return ret; +} diff --git a/src/compiler/glsl/blob.h b/src/compiler/glsl/blob.h new file mode 100644 index 00000000000..ec903ec140f --- /dev/null +++ b/src/compiler/glsl/blob.h @@ -0,0 +1,289 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once +#ifndef BLOB_H +#define BLOB_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* The blob functions implement a simple, low-level API for serializing and + * deserializing. + * + * All objects written to a blob will be serialized directly, (without any + * additional meta-data to describe the data written). Therefore, it is the + * caller's responsibility to ensure that any data can be read later, (either + * by knowing exactly what data is expected, or by writing to the blob + * sufficient meta-data to describe what has been written). + * + * A blob is efficient in that it dynamically grows by doubling in size, so + * allocation costs are logarithmic. + */ + +struct blob { + /* The data actually written to the blob. */ + uint8_t *data; + + /** Number of bytes that have been allocated for \c data. */ + size_t allocated; + + /** The number of bytes that have actual data written to them. */ + size_t size; +}; + +/* When done reading, the caller can ensure that everything was consumed by + * checking the following: + * + * 1. blob->current should be equal to blob->end, (if not, too little was + * read). + * + * 2. blob->overrun should be false, (otherwise, too much was read). + */ +struct blob_reader { + uint8_t *data; + uint8_t *end; + uint8_t *current; + bool overrun; +}; + +/** + * Create a new, empty blob, belonging to \mem_ctx. + * + * \return The new blob, (or NULL in case of allocation failure). + */ +struct blob * +blob_create (void *mem_ctx); + +/** + * Add some unstructured, fixed-size data to a blob. + * + * \return True unless allocation failed. + */ +bool +blob_write_bytes (struct blob *blob, const void *bytes, size_t to_write); + +/** + * Reserve space in \blob for a number of bytes. + * + * Space will be allocated within the blob for these byes, but the bytes will + * be left uninitialized. The caller is expected to use the return value to + * write directly (and immediately) to these bytes. + * + * \note The return value is valid immediately upon return, but can be + * invalidated by any other call to a blob function. So the caller should call + * blob_reserve_byes immediately before writing through the returned pointer. + * + * This function is intended to be used when interfacing with an existing API + * that is not aware of the blob API, (so that blob_write_bytes cannot be + * called). + * + * \return A pointer to space allocated within \blob to which \to_write bytes + * can be written, (or NULL in case of any allocation error). + */ +uint8_t * +blob_reserve_bytes (struct blob *blob, size_t to_write); + +/** + * Overwrite some data previously written to the blob. + * + * Writes data to an existing portion of the blob at an offset of \offset. + * This data range must have previously been written to the blob by one of the + * blob_write_* calls. + * + * For example usage, see blob_overwrite_uint32 + * + * \return True unless the requested offset or offset+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_bytes (struct blob *blob, + size_t offset, + const void *bytes, + size_t to_write); + +/** + * Add a uint32_t to a blob. + * + * \note This function will only write to a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint32 (struct blob *blob, uint32_t value); + +/** + * Overwrite a uint32_t previously written to the blob. + * + * Writes a uint32_t value to an existing portion of the blob at an offset of + * \offset. This data range must have previously been written to the blob by + * one of the blob_write_* calls. + * + * + * The expected usage is something like the following pattern: + * + * size_t offset; + * + * offset = blob->size; + * blob_write_uint32 (blob, 0); // placeholder + * ... various blob write calls, writing N items ... + * blob_overwrite_uint32 (blob, offset, N); + * + * \return True unless the requested position or position+to_write lie outside + * the current blob's size. + */ +bool +blob_overwrite_uint32 (struct blob *blob, + size_t offset, + uint32_t value); + +/** + * Add a uint64_t to a blob. + * + * \note This function will only write to a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_uint64 (struct blob *blob, uint64_t value); + +/** + * Add an intptr_t to a blob. + * + * \note This function will only write to an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be added to the + * blob if this write follows some unaligned write (such as + * blob_write_string). + * + * \return True unless allocation failed. + */ +bool +blob_write_intptr (struct blob *blob, intptr_t value); + +/** + * Add a NULL-terminated string to a blob, (including the NULL terminator). + * + * \return True unless allocation failed. + */ +bool +blob_write_string (struct blob *blob, const char *str); + +/** + * Start reading a blob, (initializing the contents of \blob for reading). + * + * After this call, the caller can use the various blob_read_* functions to + * read elements from the data array. + * + * For all of the blob_read_* functions, if there is insufficient data + * remaining, the functions will do nothing, (perhaps returning default values + * such as 0). The caller can detect this by noting that the blob_reader's + * current value is unchanged before and after the call. + */ +void +blob_reader_init (struct blob_reader *blob, uint8_t *data, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, (and + * update the current location to just past this data). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the data in order to use it after the lifetime of the data + * underlying the blob reader. + * + * \return The bytes read (see note above about memory lifetime). + */ +void * +blob_read_bytes (struct blob_reader *blob, size_t size); + +/** + * Read some unstructured, fixed-size data from the current location, copying + * it to \dest (and update the current location to just past this data) + */ +void +blob_copy_bytes (struct blob_reader *blob, uint8_t *dest, size_t size); + +/** + * Read a uint32_t from the current location, (and update the current location + * to just past this uint32_t). + * + * \note This function will only read from a uint32_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint32_t read + */ +uint32_t +blob_read_uint32 (struct blob_reader *blob); + +/** + * Read a uint64_t from the current location, (and update the current location + * to just past this uint64_t). + * + * \note This function will only read from a uint64_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The uint64_t read + */ +uint64_t +blob_read_uint64 (struct blob_reader *blob); + +/** + * Read an intptr_t value from the current location, (and update the + * current location to just past this intptr_t). + * + * \note This function will only read from an intptr_t-aligned offset from the + * beginning of the blob's data, so some padding bytes may be skipped. + * + * \return The intptr_t read + */ +intptr_t +blob_read_intptr (struct blob_reader *blob); + +/** + * Read a NULL-terminated string from the current location, (and update the + * current location to just past this string). + * + * \note The memory returned belongs to the data underlying the blob reader. The + * caller must copy the string in order to use the string after the lifetime + * of the data underlying the blob reader. + * + * \return The string read (see note above about memory lifetime). However, if + * there is no NULL byte remaining within the blob, this function returns + * NULL. + */ +char * +blob_read_string (struct blob_reader *blob); + +#ifdef __cplusplus +} +#endif + +#endif /* BLOB_H */ diff --git a/src/compiler/glsl/builtin_functions.cpp b/src/compiler/glsl/builtin_functions.cpp new file mode 100644 index 00000000000..95e86df1cdd --- /dev/null +++ b/src/compiler/glsl/builtin_functions.cpp @@ -0,0 +1,5502 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file builtin_functions.cpp + * + * Support for GLSL built-in functions. + * + * This file is split into several main components: + * + * 1. Availability predicates + * + * A series of small functions that check whether the current shader + * supports the version/extensions required to expose a built-in. + * + * 2. Core builtin_builder class functionality + * + * 3. Lists of built-in functions + * + * The builtin_builder::create_builtins() function contains lists of all + * built-in function signatures, where they're available, what types they + * take, and so on. + * + * 4. Implementations of built-in function signatures + * + * A series of functions which create ir_function_signatures and emit IR + * via ir_builder to implement them. + * + * 5. External API + * + * A few functions the rest of the compiler can use to interact with the + * built-in function module. For example, searching for a built-in by + * name and parameters. + */ + +#include +#include +#include "main/core.h" /* for struct gl_shader */ +#include "main/shaderobj.h" +#include "ir_builder.h" +#include "glsl_parser_extras.h" +#include "program/prog_instruction.h" +#include + +#define M_PIf ((float) M_PI) +#define M_PI_2f ((float) M_PI_2) +#define M_PI_4f ((float) M_PI_4) + +using namespace ir_builder; + +/** + * Availability predicates: + * @{ + */ +static bool +always_available(const _mesa_glsl_parse_state *) +{ + return true; +} + +static bool +compatibility_vs_only(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_VERTEX && + state->language_version <= 130 && + !state->es_shader; +} + +static bool +fs_only(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT; +} + +static bool +gs_only(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_GEOMETRY; +} + +static bool +v110(const _mesa_glsl_parse_state *state) +{ + return !state->es_shader; +} + +static bool +v110_fs_only(const _mesa_glsl_parse_state *state) +{ + return !state->es_shader && state->stage == MESA_SHADER_FRAGMENT; +} + +static bool +v120(const _mesa_glsl_parse_state *state) +{ + return state->is_version(120, 300); +} + +static bool +v130(const _mesa_glsl_parse_state *state) +{ + return state->is_version(130, 300); +} + +static bool +v130_fs_only(const _mesa_glsl_parse_state *state) +{ + return state->is_version(130, 300) && + state->stage == MESA_SHADER_FRAGMENT; +} + +static bool +v140(const _mesa_glsl_parse_state *state) +{ + return state->is_version(140, 0); +} + +static bool +v140_or_es3(const _mesa_glsl_parse_state *state) +{ + return state->is_version(140, 300); +} + +static bool +v400_fs_only(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) && + state->stage == MESA_SHADER_FRAGMENT; +} + +static bool +es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(0, 310); +} + +static bool +texture_rectangle(const _mesa_glsl_parse_state *state) +{ + return state->ARB_texture_rectangle_enable; +} + +static bool +texture_external(const _mesa_glsl_parse_state *state) +{ + return state->OES_EGL_image_external_enable; +} + +/** True if texturing functions with explicit LOD are allowed. */ +static bool +lod_exists_in_stage(const _mesa_glsl_parse_state *state) +{ + /* Texturing functions with "Lod" in their name exist: + * - In the vertex shader stage (for all languages) + * - In any stage for GLSL 1.30+ or GLSL ES 3.00 + * - In any stage for desktop GLSL with ARB_shader_texture_lod enabled. + * + * Since ARB_shader_texture_lod can only be enabled on desktop GLSL, we + * don't need to explicitly check state->es_shader. + */ + return state->stage == MESA_SHADER_VERTEX || + state->is_version(130, 300) || + state->ARB_shader_texture_lod_enable; +} + +static bool +v110_lod(const _mesa_glsl_parse_state *state) +{ + return !state->es_shader && lod_exists_in_stage(state); +} + +static bool +shader_texture_lod(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_texture_lod_enable; +} + +static bool +shader_texture_lod_and_rect(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_texture_lod_enable && + state->ARB_texture_rectangle_enable; +} + +static bool +shader_bit_encoding(const _mesa_glsl_parse_state *state) +{ + return state->is_version(330, 300) || + state->ARB_shader_bit_encoding_enable || + state->ARB_gpu_shader5_enable; +} + +static bool +shader_integer_mix(const _mesa_glsl_parse_state *state) +{ + return state->is_version(450, 310) || + (v130(state) && state->EXT_shader_integer_mix_enable); +} + +static bool +shader_packing_or_es3(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shading_language_packing_enable || + state->is_version(420, 300); +} + +static bool +shader_packing_or_es3_or_gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shading_language_packing_enable || + state->ARB_gpu_shader5_enable || + state->is_version(400, 300); +} + +static bool +gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) || state->ARB_gpu_shader5_enable; +} + +static bool +gpu_shader5_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || state->ARB_gpu_shader5_enable; +} + +static bool +shader_packing_or_es31_or_gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shading_language_packing_enable || + state->ARB_gpu_shader5_enable || + state->is_version(400, 310); +} + +static bool +fs_gpu_shader5(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(400, 0) || state->ARB_gpu_shader5_enable); +} + + +static bool +texture_array_lod(const _mesa_glsl_parse_state *state) +{ + return lod_exists_in_stage(state) && + state->EXT_texture_array_enable; +} + +static bool +fs_texture_array(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + state->EXT_texture_array_enable; +} + +static bool +texture_array(const _mesa_glsl_parse_state *state) +{ + return state->EXT_texture_array_enable; +} + +static bool +texture_multisample(const _mesa_glsl_parse_state *state) +{ + return state->is_version(150, 310) || + state->ARB_texture_multisample_enable; +} + +static bool +texture_multisample_array(const _mesa_glsl_parse_state *state) +{ + return state->is_version(150, 320) || + state->ARB_texture_multisample_enable || + state->OES_texture_storage_multisample_2d_array_enable; +} + +static bool +texture_samples_identical(const _mesa_glsl_parse_state *state) +{ + return texture_multisample(state) && + state->EXT_shader_samples_identical_enable; +} + +static bool +texture_samples_identical_array(const _mesa_glsl_parse_state *state) +{ + return texture_multisample_array(state) && + state->EXT_shader_samples_identical_enable; +} + +static bool +fs_texture_cube_map_array(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(400, 0) || + state->ARB_texture_cube_map_array_enable); +} + +static bool +texture_cube_map_array(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) || + state->ARB_texture_cube_map_array_enable; +} + +static bool +texture_query_levels(const _mesa_glsl_parse_state *state) +{ + return state->is_version(430, 0) || + state->ARB_texture_query_levels_enable; +} + +static bool +texture_query_lod(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + state->ARB_texture_query_lod_enable; +} + +static bool +texture_gather(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 0) || + state->ARB_texture_gather_enable || + state->ARB_gpu_shader5_enable; +} + +static bool +texture_gather_or_es31(const _mesa_glsl_parse_state *state) +{ + return state->is_version(400, 310) || + state->ARB_texture_gather_enable || + state->ARB_gpu_shader5_enable; +} + +/* Only ARB_texture_gather but not GLSL 4.0 or ARB_gpu_shader5. + * used for relaxation of const offset requirements. + */ +static bool +texture_gather_only_or_es31(const _mesa_glsl_parse_state *state) +{ + return !state->is_version(400, 0) && + !state->ARB_gpu_shader5_enable && + (state->ARB_texture_gather_enable || + state->is_version(0, 310)); +} + +/* Desktop GL or OES_standard_derivatives + fragment shader only */ +static bool +fs_oes_derivatives(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(110, 300) || + state->OES_standard_derivatives_enable); +} + +static bool +fs_derivative_control(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (state->is_version(450, 0) || + state->ARB_derivative_control_enable); +} + +static bool +tex1d_lod(const _mesa_glsl_parse_state *state) +{ + return !state->es_shader && lod_exists_in_stage(state); +} + +/** True if sampler3D exists */ +static bool +tex3d(const _mesa_glsl_parse_state *state) +{ + /* sampler3D exists in all desktop GLSL versions, GLSL ES 1.00 with the + * OES_texture_3D extension, and in GLSL ES 3.00. + */ + return !state->es_shader || + state->OES_texture_3D_enable || + state->language_version >= 300; +} + +static bool +fs_tex3d(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_FRAGMENT && + (!state->es_shader || state->OES_texture_3D_enable); +} + +static bool +tex3d_lod(const _mesa_glsl_parse_state *state) +{ + return tex3d(state) && lod_exists_in_stage(state); +} + +static bool +shader_atomic_counters(const _mesa_glsl_parse_state *state) +{ + return state->has_atomic_counters(); +} + +static bool +shader_clock(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_clock_enable; +} + +static bool +shader_storage_buffer_object(const _mesa_glsl_parse_state *state) +{ + return state->has_shader_storage_buffer_objects(); +} + +static bool +shader_trinary_minmax(const _mesa_glsl_parse_state *state) +{ + return state->AMD_shader_trinary_minmax_enable; +} + +static bool +shader_image_load_store(const _mesa_glsl_parse_state *state) +{ + return (state->is_version(420, 310) || + state->ARB_shader_image_load_store_enable); +} + +static bool +shader_image_atomic(const _mesa_glsl_parse_state *state) +{ + return (state->is_version(420, 0) || + state->ARB_shader_image_load_store_enable); +} + +static bool +shader_image_size(const _mesa_glsl_parse_state *state) +{ + return state->is_version(430, 310) || + state->ARB_shader_image_size_enable; +} + +static bool +shader_samples(const _mesa_glsl_parse_state *state) +{ + return state->is_version(450, 0) || + state->ARB_shader_texture_image_samples_enable; +} + +static bool +gs_streams(const _mesa_glsl_parse_state *state) +{ + return gpu_shader5(state) && gs_only(state); +} + +static bool +fp64(const _mesa_glsl_parse_state *state) +{ + return state->has_double(); +} + +static bool +compute_shader(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_COMPUTE; +} + +static bool +buffer_atomics_supported(const _mesa_glsl_parse_state *state) +{ + return compute_shader(state) || shader_storage_buffer_object(state); +} + +static bool +barrier_supported(const _mesa_glsl_parse_state *state) +{ + return compute_shader(state) || + state->stage == MESA_SHADER_TESS_CTRL; +} + +/** @} */ + +/******************************************************************************/ + +namespace { + +/** + * builtin_builder: A singleton object representing the core of the built-in + * function module. + * + * It generates IR for every built-in function signature, and organizes them + * into functions. + */ +class builtin_builder { +public: + builtin_builder(); + ~builtin_builder(); + + void initialize(); + void release(); + ir_function_signature *find(_mesa_glsl_parse_state *state, + const char *name, exec_list *actual_parameters); + + /** + * A shader to hold all the built-in signatures; created by this module. + * + * This includes signatures for every built-in, regardless of version or + * enabled extensions. The availability predicate associated with each + * signature allows matching_signature() to filter out the irrelevant ones. + */ + gl_shader *shader; + +private: + void *mem_ctx; + + /** Global variables used by built-in functions. */ + ir_variable *gl_ModelViewProjectionMatrix; + ir_variable *gl_Vertex; + + void create_shader(); + void create_intrinsics(); + void create_builtins(); + + /** + * IR builder helpers: + * + * These convenience functions assist in emitting IR, but don't necessarily + * fit in ir_builder itself. Many of them rely on having a mem_ctx class + * member available. + */ + ir_variable *in_var(const glsl_type *type, const char *name); + ir_variable *out_var(const glsl_type *type, const char *name); + ir_constant *imm(float f, unsigned vector_elements=1); + ir_constant *imm(bool b, unsigned vector_elements=1); + ir_constant *imm(int i, unsigned vector_elements=1); + ir_constant *imm(unsigned u, unsigned vector_elements=1); + ir_constant *imm(double d, unsigned vector_elements=1); + ir_constant *imm(const glsl_type *type, const ir_constant_data &); + ir_dereference_variable *var_ref(ir_variable *var); + ir_dereference_array *array_ref(ir_variable *var, int i); + ir_swizzle *matrix_elt(ir_variable *var, int col, int row); + + ir_expression *asin_expr(ir_variable *x); + void do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x); + + /** + * Call function \param f with parameters specified as the linked + * list \param params of \c ir_variable objects. \param ret should + * point to the ir_variable that will hold the function return + * value, or be \c NULL if the function has void return type. + */ + ir_call *call(ir_function *f, ir_variable *ret, exec_list params); + + /** Create a new function and add the given signatures. */ + void add_function(const char *name, ...); + + typedef ir_function_signature *(builtin_builder::*image_prototype_ctr)(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags); + + enum image_function_flags { + IMAGE_FUNCTION_EMIT_STUB = (1 << 0), + IMAGE_FUNCTION_RETURNS_VOID = (1 << 1), + IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2), + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3), + IMAGE_FUNCTION_READ_ONLY = (1 << 4), + IMAGE_FUNCTION_WRITE_ONLY = (1 << 5), + IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6), + IMAGE_FUNCTION_MS_ONLY = (1 << 7), + }; + + /** + * Create a new image built-in function for all known image types. + * \p flags is a bitfield of \c image_function_flags flags. + */ + void add_image_function(const char *name, + const char *intrinsic_name, + image_prototype_ctr prototype, + unsigned num_arguments, + unsigned flags); + + /** + * Create new functions for all known image built-ins and types. + * If \p glsl is \c true, use the GLSL built-in names and emit code + * to call into the actual compiler intrinsic. If \p glsl is + * false, emit a function prototype with no body for each image + * intrinsic name. + */ + void add_image_functions(bool glsl); + + ir_function_signature *new_sig(const glsl_type *return_type, + builtin_available_predicate avail, + int num_params, ...); + + /** + * Function signature generators: + * @{ + */ + ir_function_signature *unop(builtin_available_predicate avail, + ir_expression_operation opcode, + const glsl_type *return_type, + const glsl_type *param_type); + ir_function_signature *binop(builtin_available_predicate avail, + ir_expression_operation opcode, + const glsl_type *return_type, + const glsl_type *param0_type, + const glsl_type *param1_type); + +#define B0(X) ir_function_signature *_##X(); +#define B1(X) ir_function_signature *_##X(const glsl_type *); +#define B2(X) ir_function_signature *_##X(const glsl_type *, const glsl_type *); +#define B3(X) ir_function_signature *_##X(const glsl_type *, const glsl_type *, const glsl_type *); +#define BA1(X) ir_function_signature *_##X(builtin_available_predicate, const glsl_type *); +#define BA2(X) ir_function_signature *_##X(builtin_available_predicate, const glsl_type *, const glsl_type *); + B1(radians) + B1(degrees) + B1(sin) + B1(cos) + B1(tan) + B1(asin) + B1(acos) + B1(atan2) + B1(atan) + B1(sinh) + B1(cosh) + B1(tanh) + B1(asinh) + B1(acosh) + B1(atanh) + B1(pow) + B1(exp) + B1(log) + B1(exp2) + B1(log2) + BA1(sqrt) + BA1(inversesqrt) + BA1(abs) + BA1(sign) + BA1(floor) + BA1(trunc) + BA1(round) + BA1(roundEven) + BA1(ceil) + BA1(fract) + B2(mod) + BA1(modf) + BA2(min) + BA2(max) + BA2(clamp) + BA2(mix_lrp) + ir_function_signature *_mix_sel(builtin_available_predicate avail, + const glsl_type *val_type, + const glsl_type *blend_type); + BA2(step) + BA2(smoothstep) + BA1(isnan) + BA1(isinf) + B1(floatBitsToInt) + B1(floatBitsToUint) + B1(intBitsToFloat) + B1(uintBitsToFloat) + ir_function_signature *_packUnorm2x16(builtin_available_predicate avail); + ir_function_signature *_packSnorm2x16(builtin_available_predicate avail); + ir_function_signature *_packUnorm4x8(builtin_available_predicate avail); + ir_function_signature *_packSnorm4x8(builtin_available_predicate avail); + ir_function_signature *_unpackUnorm2x16(builtin_available_predicate avail); + ir_function_signature *_unpackSnorm2x16(builtin_available_predicate avail); + ir_function_signature *_unpackUnorm4x8(builtin_available_predicate avail); + ir_function_signature *_unpackSnorm4x8(builtin_available_predicate avail); + ir_function_signature *_packHalf2x16(builtin_available_predicate avail); + ir_function_signature *_unpackHalf2x16(builtin_available_predicate avail); + ir_function_signature *_packDouble2x32(builtin_available_predicate avail); + ir_function_signature *_unpackDouble2x32(builtin_available_predicate avail); + + BA1(length) + BA1(distance); + BA1(dot); + BA1(cross); + BA1(normalize); + B0(ftransform); + BA1(faceforward); + BA1(reflect); + BA1(refract); + BA1(matrixCompMult); + BA1(outerProduct); + BA1(determinant_mat2); + BA1(determinant_mat3); + BA1(determinant_mat4); + BA1(inverse_mat2); + BA1(inverse_mat3); + BA1(inverse_mat4); + BA1(transpose); + BA1(lessThan); + BA1(lessThanEqual); + BA1(greaterThan); + BA1(greaterThanEqual); + BA1(equal); + BA1(notEqual); + B1(any); + B1(all); + B1(not); + BA2(textureSize); + B1(textureSamples); + +/** Flags to _texture() */ +#define TEX_PROJECT 1 +#define TEX_OFFSET 2 +#define TEX_COMPONENT 4 +#define TEX_OFFSET_NONCONST 8 +#define TEX_OFFSET_ARRAY 16 + + ir_function_signature *_texture(ir_texture_opcode opcode, + builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type, + const glsl_type *coord_type, + int flags = 0); + B0(textureCubeArrayShadow); + ir_function_signature *_texelFetch(builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type, + const glsl_type *coord_type, + const glsl_type *offset_type = NULL); + + B0(EmitVertex) + B0(EndPrimitive) + ir_function_signature *_EmitStreamVertex(builtin_available_predicate avail, + const glsl_type *stream_type); + ir_function_signature *_EndStreamPrimitive(builtin_available_predicate avail, + const glsl_type *stream_type); + B0(barrier) + + BA2(textureQueryLod); + B1(textureQueryLevels); + BA2(textureSamplesIdentical); + B1(dFdx); + B1(dFdy); + B1(fwidth); + B1(dFdxCoarse); + B1(dFdyCoarse); + B1(fwidthCoarse); + B1(dFdxFine); + B1(dFdyFine); + B1(fwidthFine); + B1(noise1); + B1(noise2); + B1(noise3); + B1(noise4); + + B1(bitfieldExtract) + B1(bitfieldInsert) + B1(bitfieldReverse) + B1(bitCount) + B1(findLSB) + B1(findMSB) + BA1(fma) + B2(ldexp) + B2(frexp) + B2(dfrexp) + B1(uaddCarry) + B1(usubBorrow) + B1(mulExtended) + B1(interpolateAtCentroid) + B1(interpolateAtOffset) + B1(interpolateAtSample) + + ir_function_signature *_atomic_counter_intrinsic(builtin_available_predicate avail); + ir_function_signature *_atomic_counter_op(const char *intrinsic, + builtin_available_predicate avail); + + ir_function_signature *_atomic_intrinsic2(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_op2(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_intrinsic3(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_atomic_op3(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type); + + B1(min3) + B1(max3) + B1(mid3) + + ir_function_signature *_image_prototype(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags); + ir_function_signature *_image_size_prototype(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags); + ir_function_signature *_image_samples_prototype(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags); + ir_function_signature *_image(image_prototype_ctr prototype, + const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags); + + ir_function_signature *_memory_barrier_intrinsic( + builtin_available_predicate avail); + ir_function_signature *_memory_barrier(const char *intrinsic_name, + builtin_available_predicate avail); + + ir_function_signature *_shader_clock_intrinsic(builtin_available_predicate avail, + const glsl_type *type); + ir_function_signature *_shader_clock(builtin_available_predicate avail, + const glsl_type *type); + +#undef B0 +#undef B1 +#undef B2 +#undef B3 +#undef BA1 +#undef BA2 + /** @} */ +}; + +} /* anonymous namespace */ + +/** + * Core builtin_builder functionality: + * @{ + */ +builtin_builder::builtin_builder() + : shader(NULL), + gl_ModelViewProjectionMatrix(NULL), + gl_Vertex(NULL) +{ + mem_ctx = NULL; +} + +builtin_builder::~builtin_builder() +{ + ralloc_free(mem_ctx); +} + +ir_function_signature * +builtin_builder::find(_mesa_glsl_parse_state *state, + const char *name, exec_list *actual_parameters) +{ + /* The shader currently being compiled requested a built-in function; + * it needs to link against builtin_builder::shader in order to get them. + * + * Even if we don't find a matching signature, we still need to do this so + * that the "no matching signature" error will list potential candidates + * from the available built-ins. + */ + state->uses_builtin_functions = true; + + ir_function *f = shader->symbols->get_function(name); + if (f == NULL) + return NULL; + + ir_function_signature *sig = + f->matching_signature(state, actual_parameters, true); + if (sig == NULL) + return NULL; + + return sig; +} + +void +builtin_builder::initialize() +{ + /* If already initialized, don't do it again. */ + if (mem_ctx != NULL) + return; + + mem_ctx = ralloc_context(NULL); + create_shader(); + create_intrinsics(); + create_builtins(); +} + +void +builtin_builder::release() +{ + ralloc_free(mem_ctx); + mem_ctx = NULL; + + ralloc_free(shader); + shader = NULL; +} + +void +builtin_builder::create_shader() +{ + /* The target doesn't actually matter. There's no target for generic + * GLSL utility code that could be linked against any stage, so just + * arbitrarily pick GL_VERTEX_SHADER. + */ + shader = _mesa_new_shader(NULL, 0, GL_VERTEX_SHADER); + shader->symbols = new(mem_ctx) glsl_symbol_table; + + gl_ModelViewProjectionMatrix = + new(mem_ctx) ir_variable(glsl_type::mat4_type, + "gl_ModelViewProjectionMatrix", + ir_var_uniform); + + shader->symbols->add_variable(gl_ModelViewProjectionMatrix); + + gl_Vertex = in_var(glsl_type::vec4_type, "gl_Vertex"); + shader->symbols->add_variable(gl_Vertex); +} + +/** @} */ + +/** + * Create ir_function and ir_function_signature objects for each + * intrinsic. + */ +void +builtin_builder::create_intrinsics() +{ + add_function("__intrinsic_atomic_read", + _atomic_counter_intrinsic(shader_atomic_counters), + NULL); + add_function("__intrinsic_atomic_increment", + _atomic_counter_intrinsic(shader_atomic_counters), + NULL); + add_function("__intrinsic_atomic_predecrement", + _atomic_counter_intrinsic(shader_atomic_counters), + NULL); + + add_function("__intrinsic_atomic_add", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_min", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_max", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_and", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_or", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_xor", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_exchange", + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic2(buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("__intrinsic_atomic_comp_swap", + _atomic_intrinsic3(buffer_atomics_supported, + glsl_type::uint_type), + _atomic_intrinsic3(buffer_atomics_supported, + glsl_type::int_type), + NULL); + + add_image_functions(false); + + add_function("__intrinsic_memory_barrier", + _memory_barrier_intrinsic(shader_image_load_store), + NULL); + add_function("__intrinsic_group_memory_barrier", + _memory_barrier_intrinsic(compute_shader), + NULL); + add_function("__intrinsic_memory_barrier_atomic_counter", + _memory_barrier_intrinsic(compute_shader), + NULL); + add_function("__intrinsic_memory_barrier_buffer", + _memory_barrier_intrinsic(compute_shader), + NULL); + add_function("__intrinsic_memory_barrier_image", + _memory_barrier_intrinsic(compute_shader), + NULL); + add_function("__intrinsic_memory_barrier_shared", + _memory_barrier_intrinsic(compute_shader), + NULL); + + add_function("__intrinsic_shader_clock", + _shader_clock_intrinsic(shader_clock, + glsl_type::uvec2_type), + NULL); +} + +/** + * Create ir_function and ir_function_signature objects for each built-in. + * + * Contains a list of every available built-in. + */ +void +builtin_builder::create_builtins() +{ +#define F(NAME) \ + add_function(#NAME, \ + _##NAME(glsl_type::float_type), \ + _##NAME(glsl_type::vec2_type), \ + _##NAME(glsl_type::vec3_type), \ + _##NAME(glsl_type::vec4_type), \ + NULL); + +#define FD(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FD130(NAME) \ + add_function(#NAME, \ + _##NAME(v130, glsl_type::float_type), \ + _##NAME(v130, glsl_type::vec2_type), \ + _##NAME(v130, glsl_type::vec3_type), \ + _##NAME(v130, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FDGS5(NAME) \ + add_function(#NAME, \ + _##NAME(gpu_shader5, glsl_type::float_type), \ + _##NAME(gpu_shader5, glsl_type::vec2_type), \ + _##NAME(gpu_shader5, glsl_type::vec3_type), \ + _##NAME(gpu_shader5, glsl_type::vec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FI(NAME) \ + add_function(#NAME, \ + _##NAME(glsl_type::float_type), \ + _##NAME(glsl_type::vec2_type), \ + _##NAME(glsl_type::vec3_type), \ + _##NAME(glsl_type::vec4_type), \ + _##NAME(glsl_type::int_type), \ + _##NAME(glsl_type::ivec2_type), \ + _##NAME(glsl_type::ivec3_type), \ + _##NAME(glsl_type::ivec4_type), \ + NULL); + +#define FID(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + _##NAME(always_available, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FIUD(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + \ + _##NAME(always_available, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type), \ + \ + _##NAME(v130, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec2_type), \ + _##NAME(v130, glsl_type::uvec3_type), \ + _##NAME(v130, glsl_type::uvec4_type), \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define IU(NAME) \ + add_function(#NAME, \ + _##NAME(glsl_type::int_type), \ + _##NAME(glsl_type::ivec2_type), \ + _##NAME(glsl_type::ivec3_type), \ + _##NAME(glsl_type::ivec4_type), \ + \ + _##NAME(glsl_type::uint_type), \ + _##NAME(glsl_type::uvec2_type), \ + _##NAME(glsl_type::uvec3_type), \ + _##NAME(glsl_type::uvec4_type), \ + NULL); + +#define FIUBD(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type), \ + \ + _##NAME(always_available, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type), \ + \ + _##NAME(v130, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec2_type), \ + _##NAME(v130, glsl_type::uvec3_type), \ + _##NAME(v130, glsl_type::uvec4_type), \ + \ + _##NAME(always_available, glsl_type::bool_type), \ + _##NAME(always_available, glsl_type::bvec2_type), \ + _##NAME(always_available, glsl_type::bvec3_type), \ + _##NAME(always_available, glsl_type::bvec4_type), \ + \ + _##NAME(fp64, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type), \ + NULL); + +#define FIUD2_MIXED(NAME) \ + add_function(#NAME, \ + _##NAME(always_available, glsl_type::float_type, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec2_type, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec3_type, glsl_type::float_type), \ + _##NAME(always_available, glsl_type::vec4_type, glsl_type::float_type), \ + \ + _##NAME(always_available, glsl_type::vec2_type, glsl_type::vec2_type), \ + _##NAME(always_available, glsl_type::vec3_type, glsl_type::vec3_type), \ + _##NAME(always_available, glsl_type::vec4_type, glsl_type::vec4_type), \ + \ + _##NAME(always_available, glsl_type::int_type, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec2_type, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec3_type, glsl_type::int_type), \ + _##NAME(always_available, glsl_type::ivec4_type, glsl_type::int_type), \ + \ + _##NAME(always_available, glsl_type::ivec2_type, glsl_type::ivec2_type), \ + _##NAME(always_available, glsl_type::ivec3_type, glsl_type::ivec3_type), \ + _##NAME(always_available, glsl_type::ivec4_type, glsl_type::ivec4_type), \ + \ + _##NAME(v130, glsl_type::uint_type, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec2_type, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec3_type, glsl_type::uint_type), \ + _##NAME(v130, glsl_type::uvec4_type, glsl_type::uint_type), \ + \ + _##NAME(v130, glsl_type::uvec2_type, glsl_type::uvec2_type), \ + _##NAME(v130, glsl_type::uvec3_type, glsl_type::uvec3_type), \ + _##NAME(v130, glsl_type::uvec4_type, glsl_type::uvec4_type), \ + \ + _##NAME(fp64, glsl_type::double_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec3_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec4_type, glsl_type::double_type), \ + _##NAME(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), \ + _##NAME(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), \ + _##NAME(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), \ + NULL); + + F(radians) + F(degrees) + F(sin) + F(cos) + F(tan) + F(asin) + F(acos) + + add_function("atan", + _atan(glsl_type::float_type), + _atan(glsl_type::vec2_type), + _atan(glsl_type::vec3_type), + _atan(glsl_type::vec4_type), + _atan2(glsl_type::float_type), + _atan2(glsl_type::vec2_type), + _atan2(glsl_type::vec3_type), + _atan2(glsl_type::vec4_type), + NULL); + + F(sinh) + F(cosh) + F(tanh) + F(asinh) + F(acosh) + F(atanh) + F(pow) + F(exp) + F(log) + F(exp2) + F(log2) + FD(sqrt) + FD(inversesqrt) + FID(abs) + FID(sign) + FD(floor) + FD(trunc) + FD(round) + FD(roundEven) + FD(ceil) + FD(fract) + + add_function("mod", + _mod(glsl_type::float_type, glsl_type::float_type), + _mod(glsl_type::vec2_type, glsl_type::float_type), + _mod(glsl_type::vec3_type, glsl_type::float_type), + _mod(glsl_type::vec4_type, glsl_type::float_type), + + _mod(glsl_type::vec2_type, glsl_type::vec2_type), + _mod(glsl_type::vec3_type, glsl_type::vec3_type), + _mod(glsl_type::vec4_type, glsl_type::vec4_type), + + _mod(glsl_type::double_type, glsl_type::double_type), + _mod(glsl_type::dvec2_type, glsl_type::double_type), + _mod(glsl_type::dvec3_type, glsl_type::double_type), + _mod(glsl_type::dvec4_type, glsl_type::double_type), + + _mod(glsl_type::dvec2_type, glsl_type::dvec2_type), + _mod(glsl_type::dvec3_type, glsl_type::dvec3_type), + _mod(glsl_type::dvec4_type, glsl_type::dvec4_type), + NULL); + + FD(modf) + + FIUD2_MIXED(min) + FIUD2_MIXED(max) + FIUD2_MIXED(clamp) + + add_function("mix", + _mix_lrp(always_available, glsl_type::float_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::float_type), + _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::float_type), + + _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + + _mix_lrp(fp64, glsl_type::double_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::double_type), + _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::double_type), + + _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), + + _mix_sel(v130, glsl_type::float_type, glsl_type::bool_type), + _mix_sel(v130, glsl_type::vec2_type, glsl_type::bvec2_type), + _mix_sel(v130, glsl_type::vec3_type, glsl_type::bvec3_type), + _mix_sel(v130, glsl_type::vec4_type, glsl_type::bvec4_type), + + _mix_sel(fp64, glsl_type::double_type, glsl_type::bool_type), + _mix_sel(fp64, glsl_type::dvec2_type, glsl_type::bvec2_type), + _mix_sel(fp64, glsl_type::dvec3_type, glsl_type::bvec3_type), + _mix_sel(fp64, glsl_type::dvec4_type, glsl_type::bvec4_type), + + _mix_sel(shader_integer_mix, glsl_type::int_type, glsl_type::bool_type), + _mix_sel(shader_integer_mix, glsl_type::ivec2_type, glsl_type::bvec2_type), + _mix_sel(shader_integer_mix, glsl_type::ivec3_type, glsl_type::bvec3_type), + _mix_sel(shader_integer_mix, glsl_type::ivec4_type, glsl_type::bvec4_type), + + _mix_sel(shader_integer_mix, glsl_type::uint_type, glsl_type::bool_type), + _mix_sel(shader_integer_mix, glsl_type::uvec2_type, glsl_type::bvec2_type), + _mix_sel(shader_integer_mix, glsl_type::uvec3_type, glsl_type::bvec3_type), + _mix_sel(shader_integer_mix, glsl_type::uvec4_type, glsl_type::bvec4_type), + + _mix_sel(shader_integer_mix, glsl_type::bool_type, glsl_type::bool_type), + _mix_sel(shader_integer_mix, glsl_type::bvec2_type, glsl_type::bvec2_type), + _mix_sel(shader_integer_mix, glsl_type::bvec3_type, glsl_type::bvec3_type), + _mix_sel(shader_integer_mix, glsl_type::bvec4_type, glsl_type::bvec4_type), + NULL); + + add_function("step", + _step(always_available, glsl_type::float_type, glsl_type::float_type), + _step(always_available, glsl_type::float_type, glsl_type::vec2_type), + _step(always_available, glsl_type::float_type, glsl_type::vec3_type), + _step(always_available, glsl_type::float_type, glsl_type::vec4_type), + + _step(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _step(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _step(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + _step(fp64, glsl_type::double_type, glsl_type::double_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec2_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec3_type), + _step(fp64, glsl_type::double_type, glsl_type::dvec4_type), + + _step(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _step(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _step(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), + NULL); + + add_function("smoothstep", + _smoothstep(always_available, glsl_type::float_type, glsl_type::float_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec2_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec3_type), + _smoothstep(always_available, glsl_type::float_type, glsl_type::vec4_type), + + _smoothstep(always_available, glsl_type::vec2_type, glsl_type::vec2_type), + _smoothstep(always_available, glsl_type::vec3_type, glsl_type::vec3_type), + _smoothstep(always_available, glsl_type::vec4_type, glsl_type::vec4_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::double_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec2_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec3_type), + _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec4_type), + + _smoothstep(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), + _smoothstep(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), + _smoothstep(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), + NULL); + + FD130(isnan) + FD130(isinf) + + F(floatBitsToInt) + F(floatBitsToUint) + add_function("intBitsToFloat", + _intBitsToFloat(glsl_type::int_type), + _intBitsToFloat(glsl_type::ivec2_type), + _intBitsToFloat(glsl_type::ivec3_type), + _intBitsToFloat(glsl_type::ivec4_type), + NULL); + add_function("uintBitsToFloat", + _uintBitsToFloat(glsl_type::uint_type), + _uintBitsToFloat(glsl_type::uvec2_type), + _uintBitsToFloat(glsl_type::uvec3_type), + _uintBitsToFloat(glsl_type::uvec4_type), + NULL); + + add_function("packUnorm2x16", _packUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); + add_function("packSnorm2x16", _packSnorm2x16(shader_packing_or_es3), NULL); + add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("unpackUnorm2x16", _unpackUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); + add_function("unpackSnorm2x16", _unpackSnorm2x16(shader_packing_or_es3), NULL); + add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); + add_function("packHalf2x16", _packHalf2x16(shader_packing_or_es3), NULL); + add_function("unpackHalf2x16", _unpackHalf2x16(shader_packing_or_es3), NULL); + add_function("packDouble2x32", _packDouble2x32(fp64), NULL); + add_function("unpackDouble2x32", _unpackDouble2x32(fp64), NULL); + + + FD(length) + FD(distance) + FD(dot) + + add_function("cross", _cross(always_available, glsl_type::vec3_type), + _cross(fp64, glsl_type::dvec3_type), NULL); + + FD(normalize) + add_function("ftransform", _ftransform(), NULL); + FD(faceforward) + FD(reflect) + FD(refract) + // ... + add_function("matrixCompMult", + _matrixCompMult(always_available, glsl_type::mat2_type), + _matrixCompMult(always_available, glsl_type::mat3_type), + _matrixCompMult(always_available, glsl_type::mat4_type), + _matrixCompMult(always_available, glsl_type::mat2x3_type), + _matrixCompMult(always_available, glsl_type::mat2x4_type), + _matrixCompMult(always_available, glsl_type::mat3x2_type), + _matrixCompMult(always_available, glsl_type::mat3x4_type), + _matrixCompMult(always_available, glsl_type::mat4x2_type), + _matrixCompMult(always_available, glsl_type::mat4x3_type), + _matrixCompMult(fp64, glsl_type::dmat2_type), + _matrixCompMult(fp64, glsl_type::dmat3_type), + _matrixCompMult(fp64, glsl_type::dmat4_type), + _matrixCompMult(fp64, glsl_type::dmat2x3_type), + _matrixCompMult(fp64, glsl_type::dmat2x4_type), + _matrixCompMult(fp64, glsl_type::dmat3x2_type), + _matrixCompMult(fp64, glsl_type::dmat3x4_type), + _matrixCompMult(fp64, glsl_type::dmat4x2_type), + _matrixCompMult(fp64, glsl_type::dmat4x3_type), + NULL); + add_function("outerProduct", + _outerProduct(v120, glsl_type::mat2_type), + _outerProduct(v120, glsl_type::mat3_type), + _outerProduct(v120, glsl_type::mat4_type), + _outerProduct(v120, glsl_type::mat2x3_type), + _outerProduct(v120, glsl_type::mat2x4_type), + _outerProduct(v120, glsl_type::mat3x2_type), + _outerProduct(v120, glsl_type::mat3x4_type), + _outerProduct(v120, glsl_type::mat4x2_type), + _outerProduct(v120, glsl_type::mat4x3_type), + _outerProduct(fp64, glsl_type::dmat2_type), + _outerProduct(fp64, glsl_type::dmat3_type), + _outerProduct(fp64, glsl_type::dmat4_type), + _outerProduct(fp64, glsl_type::dmat2x3_type), + _outerProduct(fp64, glsl_type::dmat2x4_type), + _outerProduct(fp64, glsl_type::dmat3x2_type), + _outerProduct(fp64, glsl_type::dmat3x4_type), + _outerProduct(fp64, glsl_type::dmat4x2_type), + _outerProduct(fp64, glsl_type::dmat4x3_type), + NULL); + add_function("determinant", + _determinant_mat2(v120, glsl_type::mat2_type), + _determinant_mat3(v120, glsl_type::mat3_type), + _determinant_mat4(v120, glsl_type::mat4_type), + _determinant_mat2(fp64, glsl_type::dmat2_type), + _determinant_mat3(fp64, glsl_type::dmat3_type), + _determinant_mat4(fp64, glsl_type::dmat4_type), + + NULL); + add_function("inverse", + _inverse_mat2(v140_or_es3, glsl_type::mat2_type), + _inverse_mat3(v140_or_es3, glsl_type::mat3_type), + _inverse_mat4(v140_or_es3, glsl_type::mat4_type), + _inverse_mat2(fp64, glsl_type::dmat2_type), + _inverse_mat3(fp64, glsl_type::dmat3_type), + _inverse_mat4(fp64, glsl_type::dmat4_type), + NULL); + add_function("transpose", + _transpose(v120, glsl_type::mat2_type), + _transpose(v120, glsl_type::mat3_type), + _transpose(v120, glsl_type::mat4_type), + _transpose(v120, glsl_type::mat2x3_type), + _transpose(v120, glsl_type::mat2x4_type), + _transpose(v120, glsl_type::mat3x2_type), + _transpose(v120, glsl_type::mat3x4_type), + _transpose(v120, glsl_type::mat4x2_type), + _transpose(v120, glsl_type::mat4x3_type), + _transpose(fp64, glsl_type::dmat2_type), + _transpose(fp64, glsl_type::dmat3_type), + _transpose(fp64, glsl_type::dmat4_type), + _transpose(fp64, glsl_type::dmat2x3_type), + _transpose(fp64, glsl_type::dmat2x4_type), + _transpose(fp64, glsl_type::dmat3x2_type), + _transpose(fp64, glsl_type::dmat3x4_type), + _transpose(fp64, glsl_type::dmat4x2_type), + _transpose(fp64, glsl_type::dmat4x3_type), + NULL); + FIUD(lessThan) + FIUD(lessThanEqual) + FIUD(greaterThan) + FIUD(greaterThanEqual) + FIUBD(notEqual) + FIUBD(equal) + + add_function("any", + _any(glsl_type::bvec2_type), + _any(glsl_type::bvec3_type), + _any(glsl_type::bvec4_type), + NULL); + + add_function("all", + _all(glsl_type::bvec2_type), + _all(glsl_type::bvec3_type), + _all(glsl_type::bvec4_type), + NULL); + + add_function("not", + _not(glsl_type::bvec2_type), + _not(glsl_type::bvec3_type), + _not(glsl_type::bvec4_type), + NULL); + + add_function("textureSize", + _textureSize(v130, glsl_type::int_type, glsl_type::sampler1D_type), + _textureSize(v130, glsl_type::int_type, glsl_type::isampler1D_type), + _textureSize(v130, glsl_type::int_type, glsl_type::usampler1D_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2D_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::isampler2D_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler2D_type), + + _textureSize(v130, glsl_type::ivec3_type, glsl_type::sampler3D_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::isampler3D_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::usampler3D_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::samplerCube_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::isamplerCube_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::usamplerCube_type), + + _textureSize(v130, glsl_type::int_type, glsl_type::sampler1DShadow_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DShadow_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::samplerCubeShadow_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler1DArray_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::isampler1DArray_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler1DArray_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::sampler2DArray_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::isampler2DArray_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::usampler2DArray_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler1DArrayShadow_type), + _textureSize(v130, glsl_type::ivec3_type, glsl_type::sampler2DArrayShadow_type), + + _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::samplerCubeArray_type), + _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::isamplerCubeArray_type), + _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::usamplerCubeArray_type), + _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::samplerCubeArrayShadow_type), + + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DRect_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::isampler2DRect_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler2DRect_type), + _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DRectShadow_type), + + _textureSize(v140, glsl_type::int_type, glsl_type::samplerBuffer_type), + _textureSize(v140, glsl_type::int_type, glsl_type::isamplerBuffer_type), + _textureSize(v140, glsl_type::int_type, glsl_type::usamplerBuffer_type), + _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::sampler2DMS_type), + _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::isampler2DMS_type), + _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::usampler2DMS_type), + + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::sampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::isampler2DMSArray_type), + _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type), + NULL); + + add_function("textureSamples", + _textureSamples(glsl_type::sampler2DMS_type), + _textureSamples(glsl_type::isampler2DMS_type), + _textureSamples(glsl_type::usampler2DMS_type), + + _textureSamples(glsl_type::sampler2DMSArray_type), + _textureSamples(glsl_type::isampler2DMSArray_type), + _textureSamples(glsl_type::usampler2DMSArray_type), + NULL); + + add_function("texture", + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::samplerCubeShadow_type, glsl_type::vec4_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_tex, texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tex, texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tex, texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), + /* samplerCubeArrayShadow is special; it has an extra parameter + * for the shadow comparitor since there is no vec5 type. + */ + _textureCubeArrayShadow(), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::samplerCubeShadow_type, glsl_type::vec4_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_txb, fs_texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txb, fs_texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txb, fs_texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureLod", + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_txl, texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txl, texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txl, texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureOffset", + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + NULL); + + add_function("textureProj", + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texelFetch", + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::int_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::ivec3_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::ivec3_type), + + _texelFetch(v140, glsl_type::vec4_type, glsl_type::samplerBuffer_type, glsl_type::int_type), + _texelFetch(v140, glsl_type::ivec4_type, glsl_type::isamplerBuffer_type, glsl_type::int_type), + _texelFetch(v140, glsl_type::uvec4_type, glsl_type::usamplerBuffer_type, glsl_type::int_type), + + _texelFetch(texture_multisample, glsl_type::vec4_type, glsl_type::sampler2DMS_type, glsl_type::ivec2_type), + _texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), + _texelFetch(texture_multisample, glsl_type::uvec4_type, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), + + _texelFetch(texture_multisample_array, glsl_type::vec4_type, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::ivec4_type, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), + _texelFetch(texture_multisample_array, glsl_type::uvec4_type, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), + NULL); + + add_function("texelFetchOffset", + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::int_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::int_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::int_type, glsl_type::int_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::ivec3_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::ivec3_type, glsl_type::ivec3_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::ivec3_type, glsl_type::ivec3_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::ivec2_type, glsl_type::ivec2_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::ivec2_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::ivec2_type, glsl_type::int_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::ivec2_type, glsl_type::int_type), + + _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::ivec3_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::ivec3_type, glsl_type::ivec2_type), + _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::ivec3_type, glsl_type::ivec2_type), + + NULL); + + add_function("textureProjOffset", + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + NULL); + + add_function("textureLodOffset", + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + NULL); + + add_function("textureProjLod", + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureProjLodOffset", + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + NULL); + + add_function("textureGrad", + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::samplerCubeShadow_type, glsl_type::vec4_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_txd, texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txd, texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_txd, texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), + NULL); + + add_function("textureGradOffset", + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type, TEX_OFFSET), + NULL); + + add_function("textureProjGrad", + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureProjGradOffset", + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), + NULL); + + add_function("EmitVertex", _EmitVertex(), NULL); + add_function("EndPrimitive", _EndPrimitive(), NULL); + add_function("EmitStreamVertex", + _EmitStreamVertex(gs_streams, glsl_type::uint_type), + _EmitStreamVertex(gs_streams, glsl_type::int_type), + NULL); + add_function("EndStreamPrimitive", + _EndStreamPrimitive(gs_streams, glsl_type::uint_type), + _EndStreamPrimitive(gs_streams, glsl_type::int_type), + NULL); + add_function("barrier", _barrier(), NULL); + + add_function("textureQueryLOD", + _textureQueryLod(texture_query_lod, glsl_type::sampler1D_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler1D_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler1D_type, glsl_type::float_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler2D_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler2D_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler3D_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler3D_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::samplerCube_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler1DArray_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler1DArray_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler1DArray_type, glsl_type::float_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::isampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::usampler2DArray_type, glsl_type::vec2_type), + + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), + + _textureQueryLod(texture_query_lod, glsl_type::sampler1DShadow_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), + _textureQueryLod(texture_query_lod, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), + _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureQueryLod", + _textureQueryLod(v400_fs_only, glsl_type::sampler1D_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler1D_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler1D_type, glsl_type::float_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler2D_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler2D_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler3D_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler3D_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler3D_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::samplerCube_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler1DArray_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler1DArray_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler1DArray_type, glsl_type::float_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::isampler2DArray_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::usampler2DArray_type, glsl_type::vec2_type), + + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), + + _textureQueryLod(v400_fs_only, glsl_type::sampler1DShadow_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), + _textureQueryLod(v400_fs_only, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), + _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("textureQueryLevels", + _textureQueryLevels(glsl_type::sampler1D_type), + _textureQueryLevels(glsl_type::sampler2D_type), + _textureQueryLevels(glsl_type::sampler3D_type), + _textureQueryLevels(glsl_type::samplerCube_type), + _textureQueryLevels(glsl_type::sampler1DArray_type), + _textureQueryLevels(glsl_type::sampler2DArray_type), + _textureQueryLevels(glsl_type::samplerCubeArray_type), + _textureQueryLevels(glsl_type::sampler1DShadow_type), + _textureQueryLevels(glsl_type::sampler2DShadow_type), + _textureQueryLevels(glsl_type::samplerCubeShadow_type), + _textureQueryLevels(glsl_type::sampler1DArrayShadow_type), + _textureQueryLevels(glsl_type::sampler2DArrayShadow_type), + _textureQueryLevels(glsl_type::samplerCubeArrayShadow_type), + + _textureQueryLevels(glsl_type::isampler1D_type), + _textureQueryLevels(glsl_type::isampler2D_type), + _textureQueryLevels(glsl_type::isampler3D_type), + _textureQueryLevels(glsl_type::isamplerCube_type), + _textureQueryLevels(glsl_type::isampler1DArray_type), + _textureQueryLevels(glsl_type::isampler2DArray_type), + _textureQueryLevels(glsl_type::isamplerCubeArray_type), + + _textureQueryLevels(glsl_type::usampler1D_type), + _textureQueryLevels(glsl_type::usampler2D_type), + _textureQueryLevels(glsl_type::usampler3D_type), + _textureQueryLevels(glsl_type::usamplerCube_type), + _textureQueryLevels(glsl_type::usampler1DArray_type), + _textureQueryLevels(glsl_type::usampler2DArray_type), + _textureQueryLevels(glsl_type::usamplerCubeArray_type), + + NULL); + + add_function("textureSamplesIdenticalEXT", + _textureSamplesIdentical(texture_samples_identical, glsl_type::sampler2DMS_type, glsl_type::ivec2_type), + _textureSamplesIdentical(texture_samples_identical, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), + _textureSamplesIdentical(texture_samples_identical, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), + + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), + _textureSamplesIdentical(texture_samples_identical_array, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), + NULL); + + add_function("texture1D", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + NULL); + + add_function("texture1DArray", + _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + NULL); + + add_function("texture1DProj", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture1DLod", + _texture(ir_txl, tex1d_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + NULL); + + add_function("texture1DArrayLod", + _texture(ir_txl, texture_array_lod, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), + NULL); + + add_function("texture1DProjLod", + _texture(ir_txl, tex1d_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txl, tex1d_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture2D", + _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tex, texture_external, glsl_type::vec4_type, glsl_type::samplerExternalOES_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DArray", + _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + NULL); + + add_function("texture2DProj", + _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_tex, texture_external, glsl_type::vec4_type, glsl_type::samplerExternalOES_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, texture_external, glsl_type::vec4_type, glsl_type::samplerExternalOES_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture2DLod", + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DArrayLod", + _texture(ir_txl, texture_array_lod, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + NULL); + + add_function("texture2DProjLod", + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture3D", + _texture(ir_tex, tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + _texture(ir_txb, fs_tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + NULL); + + add_function("texture3DProj", + _texture(ir_tex, tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, fs_tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture3DLod", + _texture(ir_txl, tex3d_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + NULL); + + add_function("texture3DProjLod", + _texture(ir_txl, tex3d_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureCube", + _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + NULL); + + add_function("textureCubeLod", + _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + NULL); + + add_function("texture2DRect", + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DRectProj", + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow1D", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DArray", + _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2D", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DArray", + _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), + _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), + NULL); + + add_function("shadow1DProj", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DProj", + _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow1DLod", + _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DLod", + _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DArrayLod", + _texture(ir_txl, texture_array_lod, glsl_type::vec4_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DProjLod", + _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DProjLod", + _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DRect", + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DRectProj", + _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture1DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), + NULL); + + add_function("texture1DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture2DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture3DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), + NULL); + + add_function("texture3DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureCubeGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow1DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DProjGradARB", + _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("texture2DRectGradARB", + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + NULL); + + add_function("texture2DRectProjGradARB", + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("shadow2DRectGradARB", + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), + NULL); + + add_function("shadow2DRectProjGradARB", + _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), + NULL); + + add_function("textureGather", + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), + + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), + + _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), + _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), + + _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), + _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), + + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec4_type), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type), + NULL); + + add_function("textureGatherOffset", + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), + + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), + + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET), + _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), + NULL); + + add_function("textureGatherOffsets", + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), + + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), + _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), + NULL); + + F(dFdx) + F(dFdy) + F(fwidth) + F(dFdxCoarse) + F(dFdyCoarse) + F(fwidthCoarse) + F(dFdxFine) + F(dFdyFine) + F(fwidthFine) + F(noise1) + F(noise2) + F(noise3) + F(noise4) + + IU(bitfieldExtract) + IU(bitfieldInsert) + IU(bitfieldReverse) + IU(bitCount) + IU(findLSB) + IU(findMSB) + FDGS5(fma) + + add_function("ldexp", + _ldexp(glsl_type::float_type, glsl_type::int_type), + _ldexp(glsl_type::vec2_type, glsl_type::ivec2_type), + _ldexp(glsl_type::vec3_type, glsl_type::ivec3_type), + _ldexp(glsl_type::vec4_type, glsl_type::ivec4_type), + _ldexp(glsl_type::double_type, glsl_type::int_type), + _ldexp(glsl_type::dvec2_type, glsl_type::ivec2_type), + _ldexp(glsl_type::dvec3_type, glsl_type::ivec3_type), + _ldexp(glsl_type::dvec4_type, glsl_type::ivec4_type), + NULL); + + add_function("frexp", + _frexp(glsl_type::float_type, glsl_type::int_type), + _frexp(glsl_type::vec2_type, glsl_type::ivec2_type), + _frexp(glsl_type::vec3_type, glsl_type::ivec3_type), + _frexp(glsl_type::vec4_type, glsl_type::ivec4_type), + _dfrexp(glsl_type::double_type, glsl_type::int_type), + _dfrexp(glsl_type::dvec2_type, glsl_type::ivec2_type), + _dfrexp(glsl_type::dvec3_type, glsl_type::ivec3_type), + _dfrexp(glsl_type::dvec4_type, glsl_type::ivec4_type), + NULL); + add_function("uaddCarry", + _uaddCarry(glsl_type::uint_type), + _uaddCarry(glsl_type::uvec2_type), + _uaddCarry(glsl_type::uvec3_type), + _uaddCarry(glsl_type::uvec4_type), + NULL); + add_function("usubBorrow", + _usubBorrow(glsl_type::uint_type), + _usubBorrow(glsl_type::uvec2_type), + _usubBorrow(glsl_type::uvec3_type), + _usubBorrow(glsl_type::uvec4_type), + NULL); + add_function("imulExtended", + _mulExtended(glsl_type::int_type), + _mulExtended(glsl_type::ivec2_type), + _mulExtended(glsl_type::ivec3_type), + _mulExtended(glsl_type::ivec4_type), + NULL); + add_function("umulExtended", + _mulExtended(glsl_type::uint_type), + _mulExtended(glsl_type::uvec2_type), + _mulExtended(glsl_type::uvec3_type), + _mulExtended(glsl_type::uvec4_type), + NULL); + add_function("interpolateAtCentroid", + _interpolateAtCentroid(glsl_type::float_type), + _interpolateAtCentroid(glsl_type::vec2_type), + _interpolateAtCentroid(glsl_type::vec3_type), + _interpolateAtCentroid(glsl_type::vec4_type), + NULL); + add_function("interpolateAtOffset", + _interpolateAtOffset(glsl_type::float_type), + _interpolateAtOffset(glsl_type::vec2_type), + _interpolateAtOffset(glsl_type::vec3_type), + _interpolateAtOffset(glsl_type::vec4_type), + NULL); + add_function("interpolateAtSample", + _interpolateAtSample(glsl_type::float_type), + _interpolateAtSample(glsl_type::vec2_type), + _interpolateAtSample(glsl_type::vec3_type), + _interpolateAtSample(glsl_type::vec4_type), + NULL); + + add_function("atomicCounter", + _atomic_counter_op("__intrinsic_atomic_read", + shader_atomic_counters), + NULL); + add_function("atomicCounterIncrement", + _atomic_counter_op("__intrinsic_atomic_increment", + shader_atomic_counters), + NULL); + add_function("atomicCounterDecrement", + _atomic_counter_op("__intrinsic_atomic_predecrement", + shader_atomic_counters), + NULL); + + add_function("atomicAdd", + _atomic_op2("__intrinsic_atomic_add", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_add", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicMin", + _atomic_op2("__intrinsic_atomic_min", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_min", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicMax", + _atomic_op2("__intrinsic_atomic_max", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_max", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicAnd", + _atomic_op2("__intrinsic_atomic_and", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_and", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicOr", + _atomic_op2("__intrinsic_atomic_or", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_or", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicXor", + _atomic_op2("__intrinsic_atomic_xor", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_xor", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicExchange", + _atomic_op2("__intrinsic_atomic_exchange", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op2("__intrinsic_atomic_exchange", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + add_function("atomicCompSwap", + _atomic_op3("__intrinsic_atomic_comp_swap", + buffer_atomics_supported, + glsl_type::uint_type), + _atomic_op3("__intrinsic_atomic_comp_swap", + buffer_atomics_supported, + glsl_type::int_type), + NULL); + + add_function("min3", + _min3(glsl_type::float_type), + _min3(glsl_type::vec2_type), + _min3(glsl_type::vec3_type), + _min3(glsl_type::vec4_type), + + _min3(glsl_type::int_type), + _min3(glsl_type::ivec2_type), + _min3(glsl_type::ivec3_type), + _min3(glsl_type::ivec4_type), + + _min3(glsl_type::uint_type), + _min3(glsl_type::uvec2_type), + _min3(glsl_type::uvec3_type), + _min3(glsl_type::uvec4_type), + NULL); + + add_function("max3", + _max3(glsl_type::float_type), + _max3(glsl_type::vec2_type), + _max3(glsl_type::vec3_type), + _max3(glsl_type::vec4_type), + + _max3(glsl_type::int_type), + _max3(glsl_type::ivec2_type), + _max3(glsl_type::ivec3_type), + _max3(glsl_type::ivec4_type), + + _max3(glsl_type::uint_type), + _max3(glsl_type::uvec2_type), + _max3(glsl_type::uvec3_type), + _max3(glsl_type::uvec4_type), + NULL); + + add_function("mid3", + _mid3(glsl_type::float_type), + _mid3(glsl_type::vec2_type), + _mid3(glsl_type::vec3_type), + _mid3(glsl_type::vec4_type), + + _mid3(glsl_type::int_type), + _mid3(glsl_type::ivec2_type), + _mid3(glsl_type::ivec3_type), + _mid3(glsl_type::ivec4_type), + + _mid3(glsl_type::uint_type), + _mid3(glsl_type::uvec2_type), + _mid3(glsl_type::uvec3_type), + _mid3(glsl_type::uvec4_type), + NULL); + + add_image_functions(true); + + add_function("memoryBarrier", + _memory_barrier("__intrinsic_memory_barrier", + shader_image_load_store), + NULL); + add_function("groupMemoryBarrier", + _memory_barrier("__intrinsic_group_memory_barrier", + compute_shader), + NULL); + add_function("memoryBarrierAtomicCounter", + _memory_barrier("__intrinsic_memory_barrier_atomic_counter", + compute_shader), + NULL); + add_function("memoryBarrierBuffer", + _memory_barrier("__intrinsic_memory_barrier_buffer", + compute_shader), + NULL); + add_function("memoryBarrierImage", + _memory_barrier("__intrinsic_memory_barrier_image", + compute_shader), + NULL); + add_function("memoryBarrierShared", + _memory_barrier("__intrinsic_memory_barrier_shared", + compute_shader), + NULL); + + add_function("clock2x32ARB", + _shader_clock(shader_clock, + glsl_type::uvec2_type), + NULL); + +#undef F +#undef FI +#undef FIUD +#undef FIUBD +#undef FIU2_MIXED +} + +void +builtin_builder::add_function(const char *name, ...) +{ + va_list ap; + + ir_function *f = new(mem_ctx) ir_function(name); + + va_start(ap, name); + while (true) { + ir_function_signature *sig = va_arg(ap, ir_function_signature *); + if (sig == NULL) + break; + + if (false) { + exec_list stuff; + stuff.push_tail(sig); + validate_ir_tree(&stuff); + } + + f->add_signature(sig); + } + va_end(ap); + + shader->symbols->add_function(f); +} + +void +builtin_builder::add_image_function(const char *name, + const char *intrinsic_name, + image_prototype_ctr prototype, + unsigned num_arguments, + unsigned flags) +{ + static const glsl_type *const types[] = { + glsl_type::image1D_type, + glsl_type::image2D_type, + glsl_type::image3D_type, + glsl_type::image2DRect_type, + glsl_type::imageCube_type, + glsl_type::imageBuffer_type, + glsl_type::image1DArray_type, + glsl_type::image2DArray_type, + glsl_type::imageCubeArray_type, + glsl_type::image2DMS_type, + glsl_type::image2DMSArray_type, + glsl_type::iimage1D_type, + glsl_type::iimage2D_type, + glsl_type::iimage3D_type, + glsl_type::iimage2DRect_type, + glsl_type::iimageCube_type, + glsl_type::iimageBuffer_type, + glsl_type::iimage1DArray_type, + glsl_type::iimage2DArray_type, + glsl_type::iimageCubeArray_type, + glsl_type::iimage2DMS_type, + glsl_type::iimage2DMSArray_type, + glsl_type::uimage1D_type, + glsl_type::uimage2D_type, + glsl_type::uimage3D_type, + glsl_type::uimage2DRect_type, + glsl_type::uimageCube_type, + glsl_type::uimageBuffer_type, + glsl_type::uimage1DArray_type, + glsl_type::uimage2DArray_type, + glsl_type::uimageCubeArray_type, + glsl_type::uimage2DMS_type, + glsl_type::uimage2DMSArray_type + }; + + ir_function *f = new(mem_ctx) ir_function(name); + + for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) { + if ((types[i]->sampler_type != GLSL_TYPE_FLOAT || + (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) && + (types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS || + !(flags & IMAGE_FUNCTION_MS_ONLY))) + f->add_signature(_image(prototype, types[i], intrinsic_name, + num_arguments, flags)); + } + + shader->symbols->add_function(f); +} + +void +builtin_builder::add_image_functions(bool glsl) +{ + const unsigned flags = (glsl ? IMAGE_FUNCTION_EMIT_STUB : 0); + + add_image_function(glsl ? "imageLoad" : "__intrinsic_image_load", + "__intrinsic_image_load", + &builtin_builder::_image_prototype, 0, + (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | + IMAGE_FUNCTION_READ_ONLY)); + + add_image_function(glsl ? "imageStore" : "__intrinsic_image_store", + "__intrinsic_image_store", + &builtin_builder::_image_prototype, 1, + (flags | IMAGE_FUNCTION_RETURNS_VOID | + IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | + IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | + IMAGE_FUNCTION_WRITE_ONLY)); + + const unsigned atom_flags = flags | IMAGE_FUNCTION_AVAIL_ATOMIC; + + add_image_function(glsl ? "imageAtomicAdd" : "__intrinsic_image_atomic_add", + "__intrinsic_image_atomic_add", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicMin" : "__intrinsic_image_atomic_min", + "__intrinsic_image_atomic_min", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicMax" : "__intrinsic_image_atomic_max", + "__intrinsic_image_atomic_max", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicAnd" : "__intrinsic_image_atomic_and", + "__intrinsic_image_atomic_and", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicOr" : "__intrinsic_image_atomic_or", + "__intrinsic_image_atomic_or", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function(glsl ? "imageAtomicXor" : "__intrinsic_image_atomic_xor", + "__intrinsic_image_atomic_xor", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function((glsl ? "imageAtomicExchange" : + "__intrinsic_image_atomic_exchange"), + "__intrinsic_image_atomic_exchange", + &builtin_builder::_image_prototype, 1, atom_flags); + + add_image_function((glsl ? "imageAtomicCompSwap" : + "__intrinsic_image_atomic_comp_swap"), + "__intrinsic_image_atomic_comp_swap", + &builtin_builder::_image_prototype, 2, atom_flags); + + add_image_function(glsl ? "imageSize" : "__intrinsic_image_size", + "__intrinsic_image_size", + &builtin_builder::_image_size_prototype, 1, + flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE); + + add_image_function(glsl ? "imageSamples" : "__intrinsic_image_samples", + "__intrinsic_image_samples", + &builtin_builder::_image_samples_prototype, 1, + flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | + IMAGE_FUNCTION_MS_ONLY); +} + +ir_variable * +builtin_builder::in_var(const glsl_type *type, const char *name) +{ + return new(mem_ctx) ir_variable(type, name, ir_var_function_in); +} + +ir_variable * +builtin_builder::out_var(const glsl_type *type, const char *name) +{ + return new(mem_ctx) ir_variable(type, name, ir_var_function_out); +} + +ir_constant * +builtin_builder::imm(bool b, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(b, vector_elements); +} + +ir_constant * +builtin_builder::imm(float f, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(f, vector_elements); +} + +ir_constant * +builtin_builder::imm(int i, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(i, vector_elements); +} + +ir_constant * +builtin_builder::imm(unsigned u, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(u, vector_elements); +} + +ir_constant * +builtin_builder::imm(double d, unsigned vector_elements) +{ + return new(mem_ctx) ir_constant(d, vector_elements); +} + +ir_constant * +builtin_builder::imm(const glsl_type *type, const ir_constant_data &data) +{ + return new(mem_ctx) ir_constant(type, &data); +} + +#define IMM_FP(type, val) (type->base_type == GLSL_TYPE_DOUBLE) ? imm(val) : imm((float)val) + +ir_dereference_variable * +builtin_builder::var_ref(ir_variable *var) +{ + return new(mem_ctx) ir_dereference_variable(var); +} + +ir_dereference_array * +builtin_builder::array_ref(ir_variable *var, int idx) +{ + return new(mem_ctx) ir_dereference_array(var, imm(idx)); +} + +/** Return an element of a matrix */ +ir_swizzle * +builtin_builder::matrix_elt(ir_variable *var, int column, int row) +{ + return swizzle(array_ref(var, column), row, 1); +} + +/** + * Implementations of built-in functions: + * @{ + */ +ir_function_signature * +builtin_builder::new_sig(const glsl_type *return_type, + builtin_available_predicate avail, + int num_params, + ...) +{ + va_list ap; + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(return_type, avail); + + exec_list plist; + va_start(ap, num_params); + for (int i = 0; i < num_params; i++) { + plist.push_tail(va_arg(ap, ir_variable *)); + } + va_end(ap); + + sig->replace_parameters(&plist); + return sig; +} + +#define MAKE_SIG(return_type, avail, ...) \ + ir_function_signature *sig = \ + new_sig(return_type, avail, __VA_ARGS__); \ + ir_factory body(&sig->body, mem_ctx); \ + sig->is_defined = true; + +#define MAKE_INTRINSIC(return_type, avail, ...) \ + ir_function_signature *sig = \ + new_sig(return_type, avail, __VA_ARGS__); \ + sig->is_intrinsic = true; + +ir_function_signature * +builtin_builder::unop(builtin_available_predicate avail, + ir_expression_operation opcode, + const glsl_type *return_type, + const glsl_type *param_type) +{ + ir_variable *x = in_var(param_type, "x"); + MAKE_SIG(return_type, avail, 1, x); + body.emit(ret(expr(opcode, x))); + return sig; +} + +#define UNOP(NAME, OPCODE, AVAIL) \ +ir_function_signature * \ +builtin_builder::_##NAME(const glsl_type *type) \ +{ \ + return unop(&AVAIL, OPCODE, type, type); \ +} + +#define UNOPA(NAME, OPCODE) \ +ir_function_signature * \ +builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *type) \ +{ \ + return unop(avail, OPCODE, type, type); \ +} + +ir_function_signature * +builtin_builder::binop(builtin_available_predicate avail, + ir_expression_operation opcode, + const glsl_type *return_type, + const glsl_type *param0_type, + const glsl_type *param1_type) +{ + ir_variable *x = in_var(param0_type, "x"); + ir_variable *y = in_var(param1_type, "y"); + MAKE_SIG(return_type, avail, 2, x, y); + body.emit(ret(expr(opcode, x, y))); + return sig; +} + +#define BINOP(NAME, OPCODE, AVAIL) \ +ir_function_signature * \ +builtin_builder::_##NAME(const glsl_type *return_type, \ + const glsl_type *param0_type, \ + const glsl_type *param1_type) \ +{ \ + return binop(&AVAIL, OPCODE, return_type, param0_type, param1_type); \ +} + +/** + * Angle and Trigonometry Functions @{ + */ + +ir_function_signature * +builtin_builder::_radians(const glsl_type *type) +{ + ir_variable *degrees = in_var(type, "degrees"); + MAKE_SIG(type, always_available, 1, degrees); + body.emit(ret(mul(degrees, imm(0.0174532925f)))); + return sig; +} + +ir_function_signature * +builtin_builder::_degrees(const glsl_type *type) +{ + ir_variable *radians = in_var(type, "radians"); + MAKE_SIG(type, always_available, 1, radians); + body.emit(ret(mul(radians, imm(57.29578f)))); + return sig; +} + +UNOP(sin, ir_unop_sin, always_available) +UNOP(cos, ir_unop_cos, always_available) + +ir_function_signature * +builtin_builder::_tan(const glsl_type *type) +{ + ir_variable *theta = in_var(type, "theta"); + MAKE_SIG(type, always_available, 1, theta); + body.emit(ret(div(sin(theta), cos(theta)))); + return sig; +} + +ir_expression * +builtin_builder::asin_expr(ir_variable *x) +{ + return mul(sign(x), + sub(imm(M_PI_2f), + mul(sqrt(sub(imm(1.0f), abs(x))), + add(imm(M_PI_2f), + mul(abs(x), + add(imm(M_PI_4f - 1.0f), + mul(abs(x), + add(imm(0.086566724f), + mul(abs(x), imm(-0.03102955f)))))))))); +} + +ir_call * +builtin_builder::call(ir_function *f, ir_variable *ret, exec_list params) +{ + exec_list actual_params; + + foreach_in_list(ir_variable, var, ¶ms) { + actual_params.push_tail(var_ref(var)); + } + + ir_function_signature *sig = + f->exact_matching_signature(NULL, &actual_params); + if (!sig) + return NULL; + + ir_dereference_variable *deref = + (sig->return_type->is_void() ? NULL : var_ref(ret)); + + return new(mem_ctx) ir_call(sig, deref, &actual_params); +} + +ir_function_signature * +builtin_builder::_asin(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, always_available, 1, x); + + body.emit(ret(asin_expr(x))); + + return sig; +} + +ir_function_signature * +builtin_builder::_acos(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, always_available, 1, x); + + body.emit(ret(sub(imm(M_PI_2f), asin_expr(x)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_atan2(const glsl_type *type) +{ + ir_variable *vec_y = in_var(type, "vec_y"); + ir_variable *vec_x = in_var(type, "vec_x"); + MAKE_SIG(type, always_available, 2, vec_y, vec_x); + + ir_variable *vec_result = body.make_temp(type, "vec_result"); + ir_variable *r = body.make_temp(glsl_type::float_type, "r"); + for (int i = 0; i < type->vector_elements; i++) { + ir_variable *y = body.make_temp(glsl_type::float_type, "y"); + ir_variable *x = body.make_temp(glsl_type::float_type, "x"); + body.emit(assign(y, swizzle(vec_y, i, 1))); + body.emit(assign(x, swizzle(vec_x, i, 1))); + + /* If |x| >= 1.0e-8 * |y|: */ + ir_if *outer_if = + new(mem_ctx) ir_if(greater(abs(x), mul(imm(1.0e-8f), abs(y)))); + + ir_factory outer_then(&outer_if->then_instructions, mem_ctx); + + /* Then...call atan(y/x) */ + do_atan(outer_then, glsl_type::float_type, r, div(y, x)); + + /* ...and fix it up: */ + ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f))); + inner_if->then_instructions.push_tail( + if_tree(gequal(y, imm(0.0f)), + assign(r, add(r, imm(M_PIf))), + assign(r, sub(r, imm(M_PIf))))); + outer_then.emit(inner_if); + + /* Else... */ + outer_if->else_instructions.push_tail( + assign(r, mul(sign(y), imm(M_PI_2f)))); + + body.emit(outer_if); + + body.emit(assign(vec_result, r, 1 << i)); + } + body.emit(ret(vec_result)); + + return sig; +} + +void +builtin_builder::do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x) +{ + /* + * range-reduction, first step: + * + * / y_over_x if |y_over_x| <= 1.0; + * x = < + * \ 1.0 / y_over_x otherwise + */ + ir_variable *x = body.make_temp(type, "atan_x"); + body.emit(assign(x, div(min2(abs(y_over_x), + imm(1.0f)), + max2(abs(y_over_x), + imm(1.0f))))); + + /* + * approximate atan by evaluating polynomial: + * + * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + + * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + + * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 + */ + ir_variable *tmp = body.make_temp(type, "atan_tmp"); + body.emit(assign(tmp, mul(x, x))); + body.emit(assign(tmp, mul(add(mul(sub(mul(add(mul(sub(mul(add(mul(imm(-0.0121323213173444f), + tmp), + imm(0.0536813784310406f)), + tmp), + imm(0.1173503194786851f)), + tmp), + imm(0.1938924977115610f)), + tmp), + imm(0.3326756418091246f)), + tmp), + imm(0.9999793128310355f)), + x))); + + /* range-reduction fixup */ + body.emit(assign(tmp, add(tmp, + mul(b2f(greater(abs(y_over_x), + imm(1.0f, type->components()))), + add(mul(tmp, + imm(-2.0f)), + imm(M_PI_2f)))))); + + /* sign fixup */ + body.emit(assign(res, mul(tmp, sign(y_over_x)))); +} + +ir_function_signature * +builtin_builder::_atan(const glsl_type *type) +{ + ir_variable *y_over_x = in_var(type, "y_over_x"); + MAKE_SIG(type, always_available, 1, y_over_x); + + ir_variable *tmp = body.make_temp(type, "tmp"); + do_atan(body, type, tmp, y_over_x); + body.emit(ret(tmp)); + + return sig; +} + +ir_function_signature * +builtin_builder::_sinh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + /* 0.5 * (e^x - e^(-x)) */ + body.emit(ret(mul(imm(0.5f), sub(exp(x), exp(neg(x)))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_cosh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + /* 0.5 * (e^x + e^(-x)) */ + body.emit(ret(mul(imm(0.5f), add(exp(x), exp(neg(x)))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_tanh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + /* (e^x - e^(-x)) / (e^x + e^(-x)) */ + body.emit(ret(div(sub(exp(x), exp(neg(x))), + add(exp(x), exp(neg(x)))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_asinh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + body.emit(ret(mul(sign(x), log(add(abs(x), sqrt(add(mul(x, x), + imm(1.0f)))))))); + return sig; +} + +ir_function_signature * +builtin_builder::_acosh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + body.emit(ret(log(add(x, sqrt(sub(mul(x, x), imm(1.0f))))))); + return sig; +} + +ir_function_signature * +builtin_builder::_atanh(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, v130, 1, x); + + body.emit(ret(mul(imm(0.5f), log(div(add(imm(1.0f), x), + sub(imm(1.0f), x)))))); + return sig; +} +/** @} */ + +/** + * Exponential Functions @{ + */ + +ir_function_signature * +builtin_builder::_pow(const glsl_type *type) +{ + return binop(always_available, ir_binop_pow, type, type, type); +} + +UNOP(exp, ir_unop_exp, always_available) +UNOP(log, ir_unop_log, always_available) +UNOP(exp2, ir_unop_exp2, always_available) +UNOP(log2, ir_unop_log2, always_available) +UNOPA(sqrt, ir_unop_sqrt) +UNOPA(inversesqrt, ir_unop_rsq) + +/** @} */ + +UNOPA(abs, ir_unop_abs) +UNOPA(sign, ir_unop_sign) +UNOPA(floor, ir_unop_floor) +UNOPA(trunc, ir_unop_trunc) +UNOPA(round, ir_unop_round_even) +UNOPA(roundEven, ir_unop_round_even) +UNOPA(ceil, ir_unop_ceil) +UNOPA(fract, ir_unop_fract) + +ir_function_signature * +builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type) +{ + return binop(always_available, ir_binop_mod, x_type, x_type, y_type); +} + +ir_function_signature * +builtin_builder::_modf(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *i = out_var(type, "i"); + MAKE_SIG(type, avail, 2, x, i); + + ir_variable *t = body.make_temp(type, "t"); + body.emit(assign(t, expr(ir_unop_trunc, x))); + body.emit(assign(i, t)); + body.emit(ret(sub(x, t))); + + return sig; +} + +ir_function_signature * +builtin_builder::_min(builtin_available_predicate avail, + const glsl_type *x_type, const glsl_type *y_type) +{ + return binop(avail, ir_binop_min, x_type, x_type, y_type); +} + +ir_function_signature * +builtin_builder::_max(builtin_available_predicate avail, + const glsl_type *x_type, const glsl_type *y_type) +{ + return binop(avail, ir_binop_max, x_type, x_type, y_type); +} + +ir_function_signature * +builtin_builder::_clamp(builtin_available_predicate avail, + const glsl_type *val_type, const glsl_type *bound_type) +{ + ir_variable *x = in_var(val_type, "x"); + ir_variable *minVal = in_var(bound_type, "minVal"); + ir_variable *maxVal = in_var(bound_type, "maxVal"); + MAKE_SIG(val_type, avail, 3, x, minVal, maxVal); + + body.emit(ret(clamp(x, minVal, maxVal))); + + return sig; +} + +ir_function_signature * +builtin_builder::_mix_lrp(builtin_available_predicate avail, const glsl_type *val_type, const glsl_type *blend_type) +{ + ir_variable *x = in_var(val_type, "x"); + ir_variable *y = in_var(val_type, "y"); + ir_variable *a = in_var(blend_type, "a"); + MAKE_SIG(val_type, avail, 3, x, y, a); + + body.emit(ret(lrp(x, y, a))); + + return sig; +} + +ir_function_signature * +builtin_builder::_mix_sel(builtin_available_predicate avail, + const glsl_type *val_type, + const glsl_type *blend_type) +{ + ir_variable *x = in_var(val_type, "x"); + ir_variable *y = in_var(val_type, "y"); + ir_variable *a = in_var(blend_type, "a"); + MAKE_SIG(val_type, avail, 3, x, y, a); + + /* csel matches the ternary operator in that a selector of true choses the + * first argument. This differs from mix(x, y, false) which choses the + * second argument (to remain consistent with the interpolating version of + * mix() which takes a blend factor from 0.0 to 1.0 where 0.0 is only x. + * + * To handle the behavior mismatch, reverse the x and y arguments. + */ + body.emit(ret(csel(a, y, x))); + + return sig; +} + +ir_function_signature * +builtin_builder::_step(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) +{ + ir_variable *edge = in_var(edge_type, "edge"); + ir_variable *x = in_var(x_type, "x"); + MAKE_SIG(x_type, avail, 2, edge, x); + + ir_variable *t = body.make_temp(x_type, "t"); + if (x_type->vector_elements == 1) { + /* Both are floats */ + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(x, edge))))); + else + body.emit(assign(t, b2f(gequal(x, edge)))); + } else if (edge_type->vector_elements == 1) { + /* x is a vector but edge is a float */ + for (int i = 0; i < x_type->vector_elements; i++) { + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), edge))), 1 << i)); + else + body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i)); + } + } else { + /* Both are vectors */ + for (int i = 0; i < x_type->vector_elements; i++) { + if (edge_type->base_type == GLSL_TYPE_DOUBLE) + body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1)))), + 1 << i)); + else + body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))), + 1 << i)); + + } + } + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_smoothstep(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) +{ + ir_variable *edge0 = in_var(edge_type, "edge0"); + ir_variable *edge1 = in_var(edge_type, "edge1"); + ir_variable *x = in_var(x_type, "x"); + MAKE_SIG(x_type, avail, 3, edge0, edge1, x); + + /* From the GLSL 1.10 specification: + * + * genType t; + * t = clamp((x - edge0) / (edge1 - edge0), 0, 1); + * return t * t * (3 - 2 * t); + */ + + ir_variable *t = body.make_temp(x_type, "t"); + body.emit(assign(t, clamp(div(sub(x, edge0), sub(edge1, edge0)), + IMM_FP(x_type, 0.0), IMM_FP(x_type, 1.0)))); + + body.emit(ret(mul(t, mul(t, sub(IMM_FP(x_type, 3.0), mul(IMM_FP(x_type, 2.0), t)))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_isnan(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); + + body.emit(ret(nequal(x, x))); + + return sig; +} + +ir_function_signature * +builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); + + ir_constant_data infinities; + for (int i = 0; i < type->vector_elements; i++) { + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + infinities.f[i] = INFINITY; + break; + case GLSL_TYPE_DOUBLE: + infinities.d[i] = INFINITY; + break; + default: + unreachable("unknown type"); + } + } + + body.emit(ret(equal(abs(x), imm(type, infinities)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_floatBitsToInt(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::ivec(type->vector_elements), shader_bit_encoding, 1, x); + body.emit(ret(bitcast_f2i(x))); + return sig; +} + +ir_function_signature * +builtin_builder::_floatBitsToUint(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::uvec(type->vector_elements), shader_bit_encoding, 1, x); + body.emit(ret(bitcast_f2u(x))); + return sig; +} + +ir_function_signature * +builtin_builder::_intBitsToFloat(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::vec(type->vector_elements), shader_bit_encoding, 1, x); + body.emit(ret(bitcast_i2f(x))); + return sig; +} + +ir_function_signature * +builtin_builder::_uintBitsToFloat(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(glsl_type::vec(type->vector_elements), shader_bit_encoding, 1, x); + body.emit(ret(bitcast_u2f(x))); + return sig; +} + +ir_function_signature * +builtin_builder::_packUnorm2x16(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec2_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_unorm_2x16, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_packSnorm2x16(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec2_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_snorm_2x16, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_packUnorm4x8(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec4_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_unorm_4x8, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_packSnorm4x8(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec4_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_snorm_4x8, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackUnorm2x16(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_unorm_2x16, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackSnorm2x16(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_snorm_2x16, p))); + return sig; +} + + +ir_function_signature * +builtin_builder::_unpackUnorm4x8(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec4_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_unorm_4x8, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackSnorm4x8(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec4_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_snorm_4x8, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_packHalf2x16(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::vec2_type, "v"); + MAKE_SIG(glsl_type::uint_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_half_2x16, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackHalf2x16(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::uint_type, "p"); + MAKE_SIG(glsl_type::vec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_half_2x16, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_packDouble2x32(builtin_available_predicate avail) +{ + ir_variable *v = in_var(glsl_type::uvec2_type, "v"); + MAKE_SIG(glsl_type::double_type, avail, 1, v); + body.emit(ret(expr(ir_unop_pack_double_2x32, v))); + return sig; +} + +ir_function_signature * +builtin_builder::_unpackDouble2x32(builtin_available_predicate avail) +{ + ir_variable *p = in_var(glsl_type::double_type, "p"); + MAKE_SIG(glsl_type::uvec2_type, avail, 1, p); + body.emit(ret(expr(ir_unop_unpack_double_2x32, p))); + return sig; +} + +ir_function_signature * +builtin_builder::_length(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type->get_base_type(), avail, 1, x); + + body.emit(ret(sqrt(dot(x, x)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_distance(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *p0 = in_var(type, "p0"); + ir_variable *p1 = in_var(type, "p1"); + MAKE_SIG(type->get_base_type(), avail, 2, p0, p1); + + if (type->vector_elements == 1) { + body.emit(ret(abs(sub(p0, p1)))); + } else { + ir_variable *p = body.make_temp(type, "p"); + body.emit(assign(p, sub(p0, p1))); + body.emit(ret(sqrt(dot(p, p)))); + } + + return sig; +} + +ir_function_signature * +builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type) +{ + if (type->vector_elements == 1) + return binop(avail, ir_binop_mul, type, type, type); + + return binop(avail, ir_binop_dot, + type->get_base_type(), type, type); +} + +ir_function_signature * +builtin_builder::_cross(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *a = in_var(type, "a"); + ir_variable *b = in_var(type, "b"); + MAKE_SIG(type, avail, 2, a, b); + + int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0); + int zxy = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, 0); + + body.emit(ret(sub(mul(swizzle(a, yzx, 3), swizzle(b, zxy, 3)), + mul(swizzle(a, zxy, 3), swizzle(b, yzx, 3))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_normalize(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + MAKE_SIG(type, avail, 1, x); + + if (type->vector_elements == 1) { + body.emit(ret(sign(x))); + } else { + body.emit(ret(mul(x, rsq(dot(x, x))))); + } + + return sig; +} + +ir_function_signature * +builtin_builder::_ftransform() +{ + MAKE_SIG(glsl_type::vec4_type, compatibility_vs_only, 0); + + body.emit(ret(new(mem_ctx) ir_expression(ir_binop_mul, + glsl_type::vec4_type, + var_ref(gl_ModelViewProjectionMatrix), + var_ref(gl_Vertex)))); + + /* FINISHME: Once the ir_expression() constructor handles type inference + * for matrix operations, we can simplify this to: + * + * body.emit(ret(mul(gl_ModelViewProjectionMatrix, gl_Vertex))); + */ + return sig; +} + +ir_function_signature * +builtin_builder::_faceforward(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *N = in_var(type, "N"); + ir_variable *I = in_var(type, "I"); + ir_variable *Nref = in_var(type, "Nref"); + MAKE_SIG(type, avail, 3, N, I, Nref); + + body.emit(if_tree(less(dot(Nref, I), IMM_FP(type, 0.0)), + ret(N), ret(neg(N)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_reflect(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *I = in_var(type, "I"); + ir_variable *N = in_var(type, "N"); + MAKE_SIG(type, avail, 2, I, N); + + /* I - 2 * dot(N, I) * N */ + body.emit(ret(sub(I, mul(IMM_FP(type, 2.0), mul(dot(N, I), N))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_refract(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *I = in_var(type, "I"); + ir_variable *N = in_var(type, "N"); + ir_variable *eta = in_var(type->get_base_type(), "eta"); + MAKE_SIG(type, avail, 3, I, N, eta); + + ir_variable *n_dot_i = body.make_temp(type->get_base_type(), "n_dot_i"); + body.emit(assign(n_dot_i, dot(N, I))); + + /* From the GLSL 1.10 specification: + * k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) + * if (k < 0.0) + * return genType(0.0) + * else + * return eta * I - (eta * dot(N, I) + sqrt(k)) * N + */ + ir_variable *k = body.make_temp(type->get_base_type(), "k"); + body.emit(assign(k, sub(IMM_FP(type, 1.0), + mul(eta, mul(eta, sub(IMM_FP(type, 1.0), + mul(n_dot_i, n_dot_i))))))); + body.emit(if_tree(less(k, IMM_FP(type, 0.0)), + ret(ir_constant::zero(mem_ctx, type)), + ret(sub(mul(eta, I), + mul(add(mul(eta, n_dot_i), sqrt(k)), N))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_matrixCompMult(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + MAKE_SIG(type, avail, 2, x, y); + + ir_variable *z = body.make_temp(type, "z"); + for (int i = 0; i < type->matrix_columns; i++) { + body.emit(assign(array_ref(z, i), mul(array_ref(x, i), array_ref(y, i)))); + } + body.emit(ret(z)); + + return sig; +} + +ir_function_signature * +builtin_builder::_outerProduct(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *c; + ir_variable *r; + + if (type->base_type == GLSL_TYPE_DOUBLE) { + r = in_var(glsl_type::dvec(type->matrix_columns), "r"); + c = in_var(glsl_type::dvec(type->vector_elements), "c"); + } else { + r = in_var(glsl_type::vec(type->matrix_columns), "r"); + c = in_var(glsl_type::vec(type->vector_elements), "c"); + } + MAKE_SIG(type, avail, 2, c, r); + + ir_variable *m = body.make_temp(type, "m"); + for (int i = 0; i < type->matrix_columns; i++) { + body.emit(assign(array_ref(m, i), mul(c, swizzle(r, i, 1)))); + } + body.emit(ret(m)); + + return sig; +} + +ir_function_signature * +builtin_builder::_transpose(builtin_available_predicate avail, const glsl_type *orig_type) +{ + const glsl_type *transpose_type = + glsl_type::get_instance(orig_type->base_type, + orig_type->matrix_columns, + orig_type->vector_elements); + + ir_variable *m = in_var(orig_type, "m"); + MAKE_SIG(transpose_type, avail, 1, m); + + ir_variable *t = body.make_temp(transpose_type, "t"); + for (int i = 0; i < orig_type->matrix_columns; i++) { + for (int j = 0; j < orig_type->vector_elements; j++) { + body.emit(assign(array_ref(t, j), + matrix_elt(m, i, j), + 1 << i)); + } + } + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_determinant_mat2(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type->get_base_type(), avail, 1, m); + + body.emit(ret(sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), + mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_determinant_mat3(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type->get_base_type(), avail, 1, m); + + ir_expression *f1 = + sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 1))); + + ir_expression *f2 = + sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 0))); + + ir_expression *f3 = + sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), + mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 0))); + + body.emit(ret(add(sub(mul(matrix_elt(m, 0, 0), f1), + mul(matrix_elt(m, 0, 1), f2)), + mul(matrix_elt(m, 0, 2), f3)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_determinant_mat4(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(btype, avail, 1, m); + + ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); + ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); + ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); + ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); + ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); + ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); + ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); + ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); + ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); + ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); + ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); + ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); + ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); + ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); + ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); + ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); + ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); + ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); + ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); + + body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor02, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 2))))); + body.emit(assign(SubFactor03, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor04, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 2))))); + body.emit(assign(SubFactor05, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 1))))); + body.emit(assign(SubFactor06, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor07, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor08, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor09, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor10, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor11, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor12, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 1))))); + body.emit(assign(SubFactor13, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 2), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor14, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor15, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor16, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); + + ir_variable *adj_0 = body.make_temp(btype == glsl_type::float_type ? glsl_type::vec4_type : glsl_type::dvec4_type, "adj_0"); + + body.emit(assign(adj_0, + add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), + mul(matrix_elt(m, 1, 2), SubFactor01)), + mul(matrix_elt(m, 1, 3), SubFactor02)), + WRITEMASK_X)); + body.emit(assign(adj_0, neg( + add(sub(mul(matrix_elt(m, 1, 0), SubFactor00), + mul(matrix_elt(m, 1, 2), SubFactor03)), + mul(matrix_elt(m, 1, 3), SubFactor04))), + WRITEMASK_Y)); + body.emit(assign(adj_0, + add(sub(mul(matrix_elt(m, 1, 0), SubFactor01), + mul(matrix_elt(m, 1, 1), SubFactor03)), + mul(matrix_elt(m, 1, 3), SubFactor05)), + WRITEMASK_Z)); + body.emit(assign(adj_0, neg( + add(sub(mul(matrix_elt(m, 1, 0), SubFactor02), + mul(matrix_elt(m, 1, 1), SubFactor04)), + mul(matrix_elt(m, 1, 2), SubFactor05))), + WRITEMASK_W)); + + body.emit(ret(dot(array_ref(m, 0), adj_0))); + + return sig; +} + +ir_function_signature * +builtin_builder::_inverse_mat2(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + MAKE_SIG(type, avail, 1, m); + + ir_variable *adj = body.make_temp(type, "adj"); + body.emit(assign(array_ref(adj, 0), matrix_elt(m, 1, 1), 1 << 0)); + body.emit(assign(array_ref(adj, 0), neg(matrix_elt(m, 0, 1)), 1 << 1)); + body.emit(assign(array_ref(adj, 1), neg(matrix_elt(m, 1, 0)), 1 << 0)); + body.emit(assign(array_ref(adj, 1), matrix_elt(m, 0, 0), 1 << 1)); + + ir_expression *det = + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), + mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))); + + body.emit(ret(div(adj, det))); + return sig; +} + +ir_function_signature * +builtin_builder::_inverse_mat3(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(type, avail, 1, m); + + ir_variable *f11_22_21_12 = body.make_temp(btype, "f11_22_21_12"); + ir_variable *f10_22_20_12 = body.make_temp(btype, "f10_22_20_12"); + ir_variable *f10_21_20_11 = body.make_temp(btype, "f10_21_20_11"); + + body.emit(assign(f11_22_21_12, + sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(f10_22_20_12, + sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(f10_21_20_11, + sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), + mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); + + ir_variable *adj = body.make_temp(type, "adj"); + body.emit(assign(array_ref(adj, 0), f11_22_21_12, WRITEMASK_X)); + body.emit(assign(array_ref(adj, 1), neg(f10_22_20_12), WRITEMASK_X)); + body.emit(assign(array_ref(adj, 2), f10_21_20_11, WRITEMASK_X)); + + body.emit(assign(array_ref(adj, 0), neg( + sub(mul(matrix_elt(m, 0, 1), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 2, 1), matrix_elt(m, 0, 2)))), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 1), + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 2, 2)), + mul(matrix_elt(m, 2, 0), matrix_elt(m, 0, 2))), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 2), neg( + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 2, 1)), + mul(matrix_elt(m, 2, 0), matrix_elt(m, 0, 1)))), + WRITEMASK_Y)); + + body.emit(assign(array_ref(adj, 0), + sub(mul(matrix_elt(m, 0, 1), matrix_elt(m, 1, 2)), + mul(matrix_elt(m, 1, 1), matrix_elt(m, 0, 2))), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 1), neg( + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 2)), + mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 2)))), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 2), + sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), + mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))), + WRITEMASK_Z)); + + ir_expression *det = + add(sub(mul(matrix_elt(m, 0, 0), f11_22_21_12), + mul(matrix_elt(m, 0, 1), f10_22_20_12)), + mul(matrix_elt(m, 0, 2), f10_21_20_11)); + + body.emit(ret(div(adj, det))); + + return sig; +} + +ir_function_signature * +builtin_builder::_inverse_mat4(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *m = in_var(type, "m"); + const glsl_type *btype = type->get_base_type(); + MAKE_SIG(type, avail, 1, m); + + ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); + ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); + ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); + ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); + ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); + ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); + ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); + ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); + ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); + ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); + ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); + ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); + ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); + ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); + ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); + ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); + ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); + ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); + ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); + + body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor02, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 2))))); + body.emit(assign(SubFactor03, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 3))))); + body.emit(assign(SubFactor04, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 2))))); + body.emit(assign(SubFactor05, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 1))))); + body.emit(assign(SubFactor06, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor07, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor08, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor09, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor10, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor11, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor12, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 1))))); + body.emit(assign(SubFactor13, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 2), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor14, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor15, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor16, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 3))))); + body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); + body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); + + ir_variable *adj = body.make_temp(btype == glsl_type::float_type ? glsl_type::mat4_type : glsl_type::dmat4_type, "adj"); + body.emit(assign(array_ref(adj, 0), + add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), + mul(matrix_elt(m, 1, 2), SubFactor01)), + mul(matrix_elt(m, 1, 3), SubFactor02)), + WRITEMASK_X)); + body.emit(assign(array_ref(adj, 1), neg( + add(sub(mul(matrix_elt(m, 1, 0), SubFactor00), + mul(matrix_elt(m, 1, 2), SubFactor03)), + mul(matrix_elt(m, 1, 3), SubFactor04))), + WRITEMASK_X)); + body.emit(assign(array_ref(adj, 2), + add(sub(mul(matrix_elt(m, 1, 0), SubFactor01), + mul(matrix_elt(m, 1, 1), SubFactor03)), + mul(matrix_elt(m, 1, 3), SubFactor05)), + WRITEMASK_X)); + body.emit(assign(array_ref(adj, 3), neg( + add(sub(mul(matrix_elt(m, 1, 0), SubFactor02), + mul(matrix_elt(m, 1, 1), SubFactor04)), + mul(matrix_elt(m, 1, 2), SubFactor05))), + WRITEMASK_X)); + + body.emit(assign(array_ref(adj, 0), neg( + add(sub(mul(matrix_elt(m, 0, 1), SubFactor00), + mul(matrix_elt(m, 0, 2), SubFactor01)), + mul(matrix_elt(m, 0, 3), SubFactor02))), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 1), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor00), + mul(matrix_elt(m, 0, 2), SubFactor03)), + mul(matrix_elt(m, 0, 3), SubFactor04)), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 2), neg( + add(sub(mul(matrix_elt(m, 0, 0), SubFactor01), + mul(matrix_elt(m, 0, 1), SubFactor03)), + mul(matrix_elt(m, 0, 3), SubFactor05))), + WRITEMASK_Y)); + body.emit(assign(array_ref(adj, 3), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor02), + mul(matrix_elt(m, 0, 1), SubFactor04)), + mul(matrix_elt(m, 0, 2), SubFactor05)), + WRITEMASK_Y)); + + body.emit(assign(array_ref(adj, 0), + add(sub(mul(matrix_elt(m, 0, 1), SubFactor06), + mul(matrix_elt(m, 0, 2), SubFactor07)), + mul(matrix_elt(m, 0, 3), SubFactor08)), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 1), neg( + add(sub(mul(matrix_elt(m, 0, 0), SubFactor06), + mul(matrix_elt(m, 0, 2), SubFactor09)), + mul(matrix_elt(m, 0, 3), SubFactor10))), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 2), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor11), + mul(matrix_elt(m, 0, 1), SubFactor09)), + mul(matrix_elt(m, 0, 3), SubFactor12)), + WRITEMASK_Z)); + body.emit(assign(array_ref(adj, 3), neg( + add(sub(mul(matrix_elt(m, 0, 0), SubFactor08), + mul(matrix_elt(m, 0, 1), SubFactor10)), + mul(matrix_elt(m, 0, 2), SubFactor12))), + WRITEMASK_Z)); + + body.emit(assign(array_ref(adj, 0), neg( + add(sub(mul(matrix_elt(m, 0, 1), SubFactor13), + mul(matrix_elt(m, 0, 2), SubFactor14)), + mul(matrix_elt(m, 0, 3), SubFactor15))), + WRITEMASK_W)); + body.emit(assign(array_ref(adj, 1), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor13), + mul(matrix_elt(m, 0, 2), SubFactor16)), + mul(matrix_elt(m, 0, 3), SubFactor17)), + WRITEMASK_W)); + body.emit(assign(array_ref(adj, 2), neg( + add(sub(mul(matrix_elt(m, 0, 0), SubFactor14), + mul(matrix_elt(m, 0, 1), SubFactor16)), + mul(matrix_elt(m, 0, 3), SubFactor18))), + WRITEMASK_W)); + body.emit(assign(array_ref(adj, 3), + add(sub(mul(matrix_elt(m, 0, 0), SubFactor15), + mul(matrix_elt(m, 0, 1), SubFactor17)), + mul(matrix_elt(m, 0, 2), SubFactor18)), + WRITEMASK_W)); + + ir_expression *det = + add(mul(matrix_elt(m, 0, 0), matrix_elt(adj, 0, 0)), + add(mul(matrix_elt(m, 0, 1), matrix_elt(adj, 1, 0)), + add(mul(matrix_elt(m, 0, 2), matrix_elt(adj, 2, 0)), + mul(matrix_elt(m, 0, 3), matrix_elt(adj, 3, 0))))); + + body.emit(ret(div(adj, det))); + + return sig; +} + + +ir_function_signature * +builtin_builder::_lessThan(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_less, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_lessThanEqual(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_lequal, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_greaterThan(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_greater, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_greaterThanEqual(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_gequal, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_equal(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_equal, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_notEqual(builtin_available_predicate avail, + const glsl_type *type) +{ + return binop(avail, ir_binop_nequal, + glsl_type::bvec(type->vector_elements), type, type); +} + +ir_function_signature * +builtin_builder::_any(const glsl_type *type) +{ + ir_variable *v = in_var(type, "v"); + MAKE_SIG(glsl_type::bool_type, always_available, 1, v); + + const unsigned vec_elem = v->type->vector_elements; + body.emit(ret(expr(ir_binop_any_nequal, v, imm(false, vec_elem)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_all(const glsl_type *type) +{ + ir_variable *v = in_var(type, "v"); + MAKE_SIG(glsl_type::bool_type, always_available, 1, v); + + const unsigned vec_elem = v->type->vector_elements; + body.emit(ret(expr(ir_binop_all_equal, v, imm(true, vec_elem)))); + + return sig; +} + +UNOP(not, ir_unop_logic_not, always_available) + +static bool +has_lod(const glsl_type *sampler_type) +{ + assert(sampler_type->is_sampler()); + + switch (sampler_type->sampler_dimensionality) { + case GLSL_SAMPLER_DIM_RECT: + case GLSL_SAMPLER_DIM_BUF: + case GLSL_SAMPLER_DIM_MS: + return false; + default: + return true; + } +} + +ir_function_signature * +builtin_builder::_textureSize(builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + /* The sampler always exists; add optional lod later. */ + MAKE_SIG(return_type, avail, 1, s); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_txs); + tex->set_sampler(new(mem_ctx) ir_dereference_variable(s), return_type); + + if (has_lod(sampler_type)) { + ir_variable *lod = in_var(glsl_type::int_type, "lod"); + sig->parameters.push_tail(lod); + tex->lod_info.lod = var_ref(lod); + } else { + tex->lod_info.lod = imm(0u); + } + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_textureSamples(const glsl_type *sampler_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + MAKE_SIG(glsl_type::int_type, shader_samples, 1, s); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_texture_samples); + tex->set_sampler(new(mem_ctx) ir_dereference_variable(s), glsl_type::int_type); + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_texture(ir_texture_opcode opcode, + builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type, + const glsl_type *coord_type, + int flags) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *P = in_var(coord_type, "P"); + /* The sampler and coordinate always exist; add optional parameters later. */ + MAKE_SIG(return_type, avail, 2, s, P); + + ir_texture *tex = new(mem_ctx) ir_texture(opcode); + tex->set_sampler(var_ref(s), return_type); + + const int coord_size = sampler_type->coordinate_components(); + + if (coord_size == coord_type->vector_elements) { + tex->coordinate = var_ref(P); + } else { + /* The incoming coordinate also has the projector or shadow comparitor, + * so we need to swizzle those away. + */ + tex->coordinate = swizzle_for_size(P, coord_size); + } + + /* The projector is always in the last component. */ + if (flags & TEX_PROJECT) + tex->projector = swizzle(P, coord_type->vector_elements - 1, 1); + + if (sampler_type->sampler_shadow) { + if (opcode == ir_tg4) { + /* gather has refz as a separate parameter, immediately after the + * coordinate + */ + ir_variable *refz = in_var(glsl_type::float_type, "refz"); + sig->parameters.push_tail(refz); + tex->shadow_comparitor = var_ref(refz); + } else { + /* The shadow comparitor is normally in the Z component, but a few types + * have sufficiently large coordinates that it's in W. + */ + tex->shadow_comparitor = swizzle(P, MAX2(coord_size, SWIZZLE_Z), 1); + } + } + + if (opcode == ir_txl) { + ir_variable *lod = in_var(glsl_type::float_type, "lod"); + sig->parameters.push_tail(lod); + tex->lod_info.lod = var_ref(lod); + } else if (opcode == ir_txd) { + int grad_size = coord_size - (sampler_type->sampler_array ? 1 : 0); + ir_variable *dPdx = in_var(glsl_type::vec(grad_size), "dPdx"); + ir_variable *dPdy = in_var(glsl_type::vec(grad_size), "dPdy"); + sig->parameters.push_tail(dPdx); + sig->parameters.push_tail(dPdy); + tex->lod_info.grad.dPdx = var_ref(dPdx); + tex->lod_info.grad.dPdy = var_ref(dPdy); + } + + if (flags & (TEX_OFFSET | TEX_OFFSET_NONCONST)) { + int offset_size = coord_size - (sampler_type->sampler_array ? 1 : 0); + ir_variable *offset = + new(mem_ctx) ir_variable(glsl_type::ivec(offset_size), "offset", + (flags & TEX_OFFSET) ? ir_var_const_in : ir_var_function_in); + sig->parameters.push_tail(offset); + tex->offset = var_ref(offset); + } + + if (flags & TEX_OFFSET_ARRAY) { + ir_variable *offsets = + new(mem_ctx) ir_variable(glsl_type::get_array_instance(glsl_type::ivec2_type, 4), + "offsets", ir_var_const_in); + sig->parameters.push_tail(offsets); + tex->offset = var_ref(offsets); + } + + if (opcode == ir_tg4) { + if (flags & TEX_COMPONENT) { + ir_variable *component = + new(mem_ctx) ir_variable(glsl_type::int_type, "comp", ir_var_const_in); + sig->parameters.push_tail(component); + tex->lod_info.component = var_ref(component); + } + else { + tex->lod_info.component = imm(0); + } + } + + /* The "bias" parameter comes /after/ the "offset" parameter, which is + * inconsistent with both textureLodOffset and textureGradOffset. + */ + if (opcode == ir_txb) { + ir_variable *bias = in_var(glsl_type::float_type, "bias"); + sig->parameters.push_tail(bias); + tex->lod_info.bias = var_ref(bias); + } + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_textureCubeArrayShadow() +{ + ir_variable *s = in_var(glsl_type::samplerCubeArrayShadow_type, "sampler"); + ir_variable *P = in_var(glsl_type::vec4_type, "P"); + ir_variable *compare = in_var(glsl_type::float_type, "compare"); + MAKE_SIG(glsl_type::float_type, texture_cube_map_array, 3, s, P, compare); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_tex); + tex->set_sampler(var_ref(s), glsl_type::float_type); + + tex->coordinate = var_ref(P); + tex->shadow_comparitor = var_ref(compare); + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_texelFetch(builtin_available_predicate avail, + const glsl_type *return_type, + const glsl_type *sampler_type, + const glsl_type *coord_type, + const glsl_type *offset_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *P = in_var(coord_type, "P"); + /* The sampler and coordinate always exist; add optional parameters later. */ + MAKE_SIG(return_type, avail, 2, s, P); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_txf); + tex->coordinate = var_ref(P); + tex->set_sampler(var_ref(s), return_type); + + if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { + ir_variable *sample = in_var(glsl_type::int_type, "sample"); + sig->parameters.push_tail(sample); + tex->lod_info.sample_index = var_ref(sample); + tex->op = ir_txf_ms; + } else if (has_lod(sampler_type)) { + ir_variable *lod = in_var(glsl_type::int_type, "lod"); + sig->parameters.push_tail(lod); + tex->lod_info.lod = var_ref(lod); + } else { + tex->lod_info.lod = imm(0u); + } + + if (offset_type != NULL) { + ir_variable *offset = + new(mem_ctx) ir_variable(offset_type, "offset", ir_var_const_in); + sig->parameters.push_tail(offset); + tex->offset = var_ref(offset); + } + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_EmitVertex() +{ + MAKE_SIG(glsl_type::void_type, gs_only, 0); + + ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); + body.emit(new(mem_ctx) ir_emit_vertex(stream)); + + return sig; +} + +ir_function_signature * +builtin_builder::_EmitStreamVertex(builtin_available_predicate avail, + const glsl_type *stream_type) +{ + /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says: + * + * "Emit the current values of output variables to the current output + * primitive on stream stream. The argument to stream must be a constant + * integral expression." + */ + ir_variable *stream = + new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); + + MAKE_SIG(glsl_type::void_type, avail, 1, stream); + + body.emit(new(mem_ctx) ir_emit_vertex(var_ref(stream))); + + return sig; +} + +ir_function_signature * +builtin_builder::_EndPrimitive() +{ + MAKE_SIG(glsl_type::void_type, gs_only, 0); + + ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); + body.emit(new(mem_ctx) ir_end_primitive(stream)); + + return sig; +} + +ir_function_signature * +builtin_builder::_EndStreamPrimitive(builtin_available_predicate avail, + const glsl_type *stream_type) +{ + /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says: + * + * "Completes the current output primitive on stream stream and starts + * a new one. The argument to stream must be a constant integral + * expression." + */ + ir_variable *stream = + new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); + + MAKE_SIG(glsl_type::void_type, avail, 1, stream); + + body.emit(new(mem_ctx) ir_end_primitive(var_ref(stream))); + + return sig; +} + +ir_function_signature * +builtin_builder::_barrier() +{ + MAKE_SIG(glsl_type::void_type, barrier_supported, 0); + + body.emit(new(mem_ctx) ir_barrier()); + return sig; +} + +ir_function_signature * +builtin_builder::_textureQueryLod(builtin_available_predicate avail, + const glsl_type *sampler_type, + const glsl_type *coord_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *coord = in_var(coord_type, "coord"); + /* The sampler and coordinate always exist; add optional parameters later. */ + MAKE_SIG(glsl_type::vec2_type, avail, 2, s, coord); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_lod); + tex->coordinate = var_ref(coord); + tex->set_sampler(var_ref(s), glsl_type::vec2_type); + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_textureQueryLevels(const glsl_type *sampler_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + const glsl_type *return_type = glsl_type::int_type; + MAKE_SIG(return_type, texture_query_levels, 1, s); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_query_levels); + tex->set_sampler(var_ref(s), return_type); + + body.emit(ret(tex)); + + return sig; +} + +ir_function_signature * +builtin_builder::_textureSamplesIdentical(builtin_available_predicate avail, + const glsl_type *sampler_type, + const glsl_type *coord_type) +{ + ir_variable *s = in_var(sampler_type, "sampler"); + ir_variable *P = in_var(coord_type, "P"); + const glsl_type *return_type = glsl_type::bool_type; + MAKE_SIG(return_type, avail, 2, s, P); + + ir_texture *tex = new(mem_ctx) ir_texture(ir_samples_identical); + tex->coordinate = var_ref(P); + tex->set_sampler(var_ref(s), return_type); + + body.emit(ret(tex)); + + return sig; +} + +UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives) +UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control) +UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control) +UNOP(dFdy, ir_unop_dFdy, fs_oes_derivatives) +UNOP(dFdyCoarse, ir_unop_dFdy_coarse, fs_derivative_control) +UNOP(dFdyFine, ir_unop_dFdy_fine, fs_derivative_control) + +ir_function_signature * +builtin_builder::_fwidth(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_oes_derivatives, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx, p)), abs(expr(ir_unop_dFdy, p))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_fwidthCoarse(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_derivative_control, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx_coarse, p)), + abs(expr(ir_unop_dFdy_coarse, p))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_fwidthFine(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(type, fs_derivative_control, 1, p); + + body.emit(ret(add(abs(expr(ir_unop_dFdx_fine, p)), + abs(expr(ir_unop_dFdy_fine, p))))); + + return sig; +} + +ir_function_signature * +builtin_builder::_noise1(const glsl_type *type) +{ + return unop(v110, ir_unop_noise, glsl_type::float_type, type); +} + +ir_function_signature * +builtin_builder::_noise2(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(glsl_type::vec2_type, v110, 1, p); + + ir_constant_data b_offset; + b_offset.f[0] = 601.0f; + b_offset.f[1] = 313.0f; + b_offset.f[2] = 29.0f; + b_offset.f[3] = 277.0f; + + ir_variable *a = body.make_temp(glsl_type::float_type, "a"); + ir_variable *b = body.make_temp(glsl_type::float_type, "b"); + ir_variable *t = body.make_temp(glsl_type::vec2_type, "t"); + body.emit(assign(a, expr(ir_unop_noise, p))); + body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset))))); + body.emit(assign(t, a, WRITEMASK_X)); + body.emit(assign(t, b, WRITEMASK_Y)); + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_noise3(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(glsl_type::vec3_type, v110, 1, p); + + ir_constant_data b_offset; + b_offset.f[0] = 601.0f; + b_offset.f[1] = 313.0f; + b_offset.f[2] = 29.0f; + b_offset.f[3] = 277.0f; + + ir_constant_data c_offset; + c_offset.f[0] = 1559.0f; + c_offset.f[1] = 113.0f; + c_offset.f[2] = 1861.0f; + c_offset.f[3] = 797.0f; + + ir_variable *a = body.make_temp(glsl_type::float_type, "a"); + ir_variable *b = body.make_temp(glsl_type::float_type, "b"); + ir_variable *c = body.make_temp(glsl_type::float_type, "c"); + ir_variable *t = body.make_temp(glsl_type::vec3_type, "t"); + body.emit(assign(a, expr(ir_unop_noise, p))); + body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset))))); + body.emit(assign(c, expr(ir_unop_noise, add(p, imm(type, c_offset))))); + body.emit(assign(t, a, WRITEMASK_X)); + body.emit(assign(t, b, WRITEMASK_Y)); + body.emit(assign(t, c, WRITEMASK_Z)); + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_noise4(const glsl_type *type) +{ + ir_variable *p = in_var(type, "p"); + MAKE_SIG(glsl_type::vec4_type, v110, 1, p); + + ir_variable *_p = body.make_temp(type, "_p"); + + ir_constant_data p_offset; + p_offset.f[0] = 1559.0f; + p_offset.f[1] = 113.0f; + p_offset.f[2] = 1861.0f; + p_offset.f[3] = 797.0f; + + body.emit(assign(_p, add(p, imm(type, p_offset)))); + + ir_constant_data offset; + offset.f[0] = 601.0f; + offset.f[1] = 313.0f; + offset.f[2] = 29.0f; + offset.f[3] = 277.0f; + + ir_variable *a = body.make_temp(glsl_type::float_type, "a"); + ir_variable *b = body.make_temp(glsl_type::float_type, "b"); + ir_variable *c = body.make_temp(glsl_type::float_type, "c"); + ir_variable *d = body.make_temp(glsl_type::float_type, "d"); + ir_variable *t = body.make_temp(glsl_type::vec4_type, "t"); + body.emit(assign(a, expr(ir_unop_noise, p))); + body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, offset))))); + body.emit(assign(c, expr(ir_unop_noise, _p))); + body.emit(assign(d, expr(ir_unop_noise, add(_p, imm(type, offset))))); + body.emit(assign(t, a, WRITEMASK_X)); + body.emit(assign(t, b, WRITEMASK_Y)); + body.emit(assign(t, c, WRITEMASK_Z)); + body.emit(assign(t, d, WRITEMASK_W)); + body.emit(ret(t)); + + return sig; +} + +ir_function_signature * +builtin_builder::_bitfieldExtract(const glsl_type *type) +{ + bool is_uint = type->base_type == GLSL_TYPE_UINT; + ir_variable *value = in_var(type, "value"); + ir_variable *offset = in_var(glsl_type::int_type, "offset"); + ir_variable *bits = in_var(glsl_type::int_type, "bits"); + MAKE_SIG(type, gpu_shader5_or_es31, 3, value, offset, bits); + + operand cast_offset = is_uint ? i2u(offset) : operand(offset); + operand cast_bits = is_uint ? i2u(bits) : operand(bits); + + body.emit(ret(expr(ir_triop_bitfield_extract, value, + swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements), + swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements)))); + + return sig; +} + +ir_function_signature * +builtin_builder::_bitfieldInsert(const glsl_type *type) +{ + bool is_uint = type->base_type == GLSL_TYPE_UINT; + ir_variable *base = in_var(type, "base"); + ir_variable *insert = in_var(type, "insert"); + ir_variable *offset = in_var(glsl_type::int_type, "offset"); + ir_variable *bits = in_var(glsl_type::int_type, "bits"); + MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits); + + operand cast_offset = is_uint ? i2u(offset) : operand(offset); + operand cast_bits = is_uint ? i2u(bits) : operand(bits); + + body.emit(ret(bitfield_insert(base, insert, + swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements), + swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements)))); + + return sig; +} + +UNOP(bitfieldReverse, ir_unop_bitfield_reverse, gpu_shader5_or_es31) + +ir_function_signature * +builtin_builder::_bitCount(const glsl_type *type) +{ + return unop(gpu_shader5_or_es31, ir_unop_bit_count, + glsl_type::ivec(type->vector_elements), type); +} + +ir_function_signature * +builtin_builder::_findLSB(const glsl_type *type) +{ + return unop(gpu_shader5_or_es31, ir_unop_find_lsb, + glsl_type::ivec(type->vector_elements), type); +} + +ir_function_signature * +builtin_builder::_findMSB(const glsl_type *type) +{ + return unop(gpu_shader5_or_es31, ir_unop_find_msb, + glsl_type::ivec(type->vector_elements), type); +} + +ir_function_signature * +builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type) +{ + ir_variable *a = in_var(type, "a"); + ir_variable *b = in_var(type, "b"); + ir_variable *c = in_var(type, "c"); + MAKE_SIG(type, avail, 3, a, b, c); + + body.emit(ret(ir_builder::fma(a, b, c))); + + return sig; +} + +ir_function_signature * +builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type) +{ + return binop(x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, + ir_binop_ldexp, x_type, x_type, exp_type); +} + +ir_function_signature * +builtin_builder::_dfrexp(const glsl_type *x_type, const glsl_type *exp_type) +{ + ir_variable *x = in_var(x_type, "x"); + ir_variable *exponent = out_var(exp_type, "exp"); + MAKE_SIG(x_type, fp64, 2, x, exponent); + + body.emit(assign(exponent, expr(ir_unop_frexp_exp, x))); + + body.emit(ret(expr(ir_unop_frexp_sig, x))); + return sig; +} + +ir_function_signature * +builtin_builder::_frexp(const glsl_type *x_type, const glsl_type *exp_type) +{ + ir_variable *x = in_var(x_type, "x"); + ir_variable *exponent = out_var(exp_type, "exp"); + MAKE_SIG(x_type, gpu_shader5_or_es31, 2, x, exponent); + + const unsigned vec_elem = x_type->vector_elements; + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); + + /* Single-precision floating-point values are stored as + * 1 sign bit; + * 8 exponent bits; + * 23 mantissa bits. + * + * An exponent shift of 23 will shift the mantissa out, leaving only the + * exponent and sign bit (which itself may be zero, if the absolute value + * was taken before the bitcast and shift. + */ + ir_constant *exponent_shift = imm(23); + ir_constant *exponent_bias = imm(-126, vec_elem); + + ir_constant *sign_mantissa_mask = imm(0x807fffffu, vec_elem); + + /* Exponent of floating-point values in the range [0.5, 1.0). */ + ir_constant *exponent_value = imm(0x3f000000u, vec_elem); + + ir_variable *is_not_zero = body.make_temp(bvec, "is_not_zero"); + body.emit(assign(is_not_zero, nequal(abs(x), imm(0.0f, vec_elem)))); + + /* Since abs(x) ensures that the sign bit is zero, we don't need to bitcast + * to unsigned integers to ensure that 1 bits aren't shifted in. + */ + body.emit(assign(exponent, rshift(bitcast_f2i(abs(x)), exponent_shift))); + body.emit(assign(exponent, add(exponent, csel(is_not_zero, exponent_bias, + imm(0, vec_elem))))); + + ir_variable *bits = body.make_temp(uvec, "bits"); + body.emit(assign(bits, bitcast_f2u(x))); + body.emit(assign(bits, bit_and(bits, sign_mantissa_mask))); + body.emit(assign(bits, bit_or(bits, csel(is_not_zero, exponent_value, + imm(0u, vec_elem))))); + body.emit(ret(bitcast_u2f(bits))); + + return sig; +} + +ir_function_signature * +builtin_builder::_uaddCarry(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *carry = out_var(type, "carry"); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, carry); + + body.emit(assign(carry, ir_builder::carry(x, y))); + body.emit(ret(add(x, y))); + + return sig; +} + +ir_function_signature * +builtin_builder::_usubBorrow(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *borrow = out_var(type, "borrow"); + MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, borrow); + + body.emit(assign(borrow, ir_builder::borrow(x, y))); + body.emit(ret(sub(x, y))); + + return sig; +} + +/** + * For both imulExtended() and umulExtended() built-ins. + */ +ir_function_signature * +builtin_builder::_mulExtended(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *msb = out_var(type, "msb"); + ir_variable *lsb = out_var(type, "lsb"); + MAKE_SIG(glsl_type::void_type, gpu_shader5_or_es31, 4, x, y, msb, lsb); + + body.emit(assign(msb, imul_high(x, y))); + body.emit(assign(lsb, mul(x, y))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtCentroid(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + MAKE_SIG(type, fs_gpu_shader5, 1, interpolant); + + body.emit(ret(interpolate_at_centroid(interpolant))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtOffset(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + ir_variable *offset = in_var(glsl_type::vec2_type, "offset"); + MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, offset); + + body.emit(ret(interpolate_at_offset(interpolant, offset))); + + return sig; +} + +ir_function_signature * +builtin_builder::_interpolateAtSample(const glsl_type *type) +{ + ir_variable *interpolant = in_var(type, "interpolant"); + interpolant->data.must_be_shader_input = 1; + ir_variable *sample_num = in_var(glsl_type::int_type, "sample_num"); + MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, sample_num); + + body.emit(ret(interpolate_at_sample(interpolant, sample_num))); + + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail) +{ + ir_variable *counter = in_var(glsl_type::atomic_uint_type, "counter"); + MAKE_INTRINSIC(glsl_type::uint_type, avail, 1, counter); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_intrinsic2(builtin_available_predicate avail, + const glsl_type *type) +{ + ir_variable *atomic = in_var(type, "atomic"); + ir_variable *data = in_var(type, "data"); + MAKE_INTRINSIC(type, avail, 2, atomic, data); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_intrinsic3(builtin_available_predicate avail, + const glsl_type *type) +{ + ir_variable *atomic = in_var(type, "atomic"); + ir_variable *data1 = in_var(type, "data1"); + ir_variable *data2 = in_var(type, "data2"); + MAKE_INTRINSIC(type, avail, 3, atomic, data1, data2); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_counter_op(const char *intrinsic, + builtin_available_predicate avail) +{ + ir_variable *counter = in_var(glsl_type::atomic_uint_type, "atomic_counter"); + MAKE_SIG(glsl_type::uint_type, avail, 1, counter); + + ir_variable *retval = body.make_temp(glsl_type::uint_type, "atomic_retval"); + body.emit(call(shader->symbols->get_function(intrinsic), retval, + sig->parameters)); + body.emit(ret(retval)); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_op2(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type) +{ + ir_variable *atomic = in_var(type, "atomic_var"); + ir_variable *data = in_var(type, "atomic_data"); + MAKE_SIG(type, avail, 2, atomic, data); + + ir_variable *retval = body.make_temp(type, "atomic_retval"); + body.emit(call(shader->symbols->get_function(intrinsic), retval, + sig->parameters)); + body.emit(ret(retval)); + return sig; +} + +ir_function_signature * +builtin_builder::_atomic_op3(const char *intrinsic, + builtin_available_predicate avail, + const glsl_type *type) +{ + ir_variable *atomic = in_var(type, "atomic_var"); + ir_variable *data1 = in_var(type, "atomic_data1"); + ir_variable *data2 = in_var(type, "atomic_data2"); + MAKE_SIG(type, avail, 3, atomic, data1, data2); + + ir_variable *retval = body.make_temp(type, "atomic_retval"); + body.emit(call(shader->symbols->get_function(intrinsic), retval, + sig->parameters)); + body.emit(ret(retval)); + return sig; +} + +ir_function_signature * +builtin_builder::_min3(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *z = in_var(type, "z"); + MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); + + ir_expression *min3 = min2(x, min2(y,z)); + body.emit(ret(min3)); + + return sig; +} + +ir_function_signature * +builtin_builder::_max3(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *z = in_var(type, "z"); + MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); + + ir_expression *max3 = max2(x, max2(y,z)); + body.emit(ret(max3)); + + return sig; +} + +ir_function_signature * +builtin_builder::_mid3(const glsl_type *type) +{ + ir_variable *x = in_var(type, "x"); + ir_variable *y = in_var(type, "y"); + ir_variable *z = in_var(type, "z"); + MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); + + ir_expression *mid3 = max2(min2(x, y), max2(min2(x, z), min2(y, z))); + body.emit(ret(mid3)); + + return sig; +} + +ir_function_signature * +builtin_builder::_image_prototype(const glsl_type *image_type, + unsigned num_arguments, + unsigned flags) +{ + const glsl_type *data_type = glsl_type::get_instance( + image_type->sampler_type, + (flags & IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE ? 4 : 1), + 1); + const glsl_type *ret_type = (flags & IMAGE_FUNCTION_RETURNS_VOID ? + glsl_type::void_type : data_type); + + /* Addressing arguments that are always present. */ + ir_variable *image = in_var(image_type, "image"); + ir_variable *coord = in_var( + glsl_type::ivec(image_type->coordinate_components()), "coord"); + + const builtin_available_predicate avail = + (flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic : + shader_image_load_store); + ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord); + + /* Sample index for multisample images. */ + if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) + sig->parameters.push_tail(in_var(glsl_type::int_type, "sample")); + + /* Data arguments. */ + for (unsigned i = 0; i < num_arguments; ++i) { + char *arg_name = ralloc_asprintf(NULL, "arg%d", i); + sig->parameters.push_tail(in_var(data_type, arg_name)); + ralloc_free(arg_name); + } + + /* Set the maximal set of qualifiers allowed for this image + * built-in. Function calls with arguments having fewer + * qualifiers than present in the prototype are allowed by the + * spec, but not with more, i.e. this will make the compiler + * accept everything that needs to be accepted, and reject cases + * like loads from write-only or stores to read-only images. + */ + image->data.image_read_only = (flags & IMAGE_FUNCTION_READ_ONLY) != 0; + image->data.image_write_only = (flags & IMAGE_FUNCTION_WRITE_ONLY) != 0; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; + + return sig; +} + +ir_function_signature * +builtin_builder::_image_size_prototype(const glsl_type *image_type, + unsigned /* num_arguments */, + unsigned /* flags */) +{ + const glsl_type *ret_type; + unsigned num_components = image_type->coordinate_components(); + + /* From the ARB_shader_image_size extension: + * "Cube images return the dimensions of one face." + */ + if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && + !image_type->sampler_array) { + num_components = 2; + } + + /* FIXME: Add the highp precision qualifier for GLES 3.10 when it is + * supported by mesa. + */ + ret_type = glsl_type::get_instance(GLSL_TYPE_INT, num_components, 1); + + ir_variable *image = in_var(image_type, "image"); + ir_function_signature *sig = new_sig(ret_type, shader_image_size, 1, image); + + /* Set the maximal set of qualifiers allowed for this image + * built-in. Function calls with arguments having fewer + * qualifiers than present in the prototype are allowed by the + * spec, but not with more, i.e. this will make the compiler + * accept everything that needs to be accepted, and reject cases + * like loads from write-only or stores to read-only images. + */ + image->data.image_read_only = true; + image->data.image_write_only = true; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; + + return sig; +} + +ir_function_signature * +builtin_builder::_image_samples_prototype(const glsl_type *image_type, + unsigned /* num_arguments */, + unsigned /* flags */) +{ + ir_variable *image = in_var(image_type, "image"); + ir_function_signature *sig = + new_sig(glsl_type::int_type, shader_samples, 1, image); + + /* Set the maximal set of qualifiers allowed for this image + * built-in. Function calls with arguments having fewer + * qualifiers than present in the prototype are allowed by the + * spec, but not with more, i.e. this will make the compiler + * accept everything that needs to be accepted, and reject cases + * like loads from write-only or stores to read-only images. + */ + image->data.image_read_only = true; + image->data.image_write_only = true; + image->data.image_coherent = true; + image->data.image_volatile = true; + image->data.image_restrict = true; + + return sig; +} + +ir_function_signature * +builtin_builder::_image(image_prototype_ctr prototype, + const glsl_type *image_type, + const char *intrinsic_name, + unsigned num_arguments, + unsigned flags) +{ + ir_function_signature *sig = (this->*prototype)(image_type, + num_arguments, flags); + + if (flags & IMAGE_FUNCTION_EMIT_STUB) { + ir_factory body(&sig->body, mem_ctx); + ir_function *f = shader->symbols->get_function(intrinsic_name); + + if (flags & IMAGE_FUNCTION_RETURNS_VOID) { + body.emit(call(f, NULL, sig->parameters)); + } else { + ir_variable *ret_val = + body.make_temp(sig->return_type, "_ret_val"); + body.emit(call(f, ret_val, sig->parameters)); + body.emit(ret(ret_val)); + } + + sig->is_defined = true; + + } else { + sig->is_intrinsic = true; + } + + return sig; +} + +ir_function_signature * +builtin_builder::_memory_barrier_intrinsic(builtin_available_predicate avail) +{ + MAKE_INTRINSIC(glsl_type::void_type, avail, 0); + return sig; +} + +ir_function_signature * +builtin_builder::_memory_barrier(const char *intrinsic_name, + builtin_available_predicate avail) +{ + MAKE_SIG(glsl_type::void_type, avail, 0); + body.emit(call(shader->symbols->get_function(intrinsic_name), + NULL, sig->parameters)); + return sig; +} + +ir_function_signature * +builtin_builder::_shader_clock_intrinsic(builtin_available_predicate avail, + const glsl_type *type) +{ + MAKE_INTRINSIC(type, avail, 0); + return sig; +} + +ir_function_signature * +builtin_builder::_shader_clock(builtin_available_predicate avail, + const glsl_type *type) +{ + MAKE_SIG(type, avail, 0); + + ir_variable *retval = body.make_temp(type, "clock_retval"); + + body.emit(call(shader->symbols->get_function("__intrinsic_shader_clock"), + retval, sig->parameters)); + body.emit(ret(retval)); + return sig; +} + +/** @} */ + +/******************************************************************************/ + +/* The singleton instance of builtin_builder. */ +static builtin_builder builtins; +static mtx_t builtins_lock = _MTX_INITIALIZER_NP; + +/** + * External API (exposing the built-in module to the rest of the compiler): + * @{ + */ +void +_mesa_glsl_initialize_builtin_functions() +{ + mtx_lock(&builtins_lock); + builtins.initialize(); + mtx_unlock(&builtins_lock); +} + +void +_mesa_glsl_release_builtin_functions() +{ + mtx_lock(&builtins_lock); + builtins.release(); + mtx_unlock(&builtins_lock); +} + +ir_function_signature * +_mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, + const char *name, exec_list *actual_parameters) +{ + ir_function_signature * s; + mtx_lock(&builtins_lock); + s = builtins.find(state, name, actual_parameters); + mtx_unlock(&builtins_lock); + return s; +} + +ir_function * +_mesa_glsl_find_builtin_function_by_name(const char *name) +{ + ir_function *f; + mtx_lock(&builtins_lock); + f = builtins.shader->symbols->get_function(name); + mtx_unlock(&builtins_lock); + return f; +} + +gl_shader * +_mesa_glsl_get_builtin_function_shader() +{ + return builtins.shader; +} + + +/** + * Get the function signature for main from a shader + */ +ir_function_signature * +_mesa_get_main_function_signature(gl_shader *sh) +{ + ir_function *const f = sh->symbols->get_function("main"); + if (f != NULL) { + exec_list void_parameters; + + /* Look for the 'void main()' signature and ensure that it's defined. + * This keeps the linker from accidentally pick a shader that just + * contains a prototype for main. + * + * We don't have to check for multiple definitions of main (in multiple + * shaders) because that would have already been caught above. + */ + ir_function_signature *sig = + f->matching_signature(NULL, &void_parameters, false); + if ((sig != NULL) && sig->is_defined) { + return sig; + } + } + + return NULL; +} + +/** @} */ diff --git a/src/compiler/glsl/builtin_types.cpp b/src/compiler/glsl/builtin_types.cpp new file mode 100644 index 00000000000..ee24bd5e411 --- /dev/null +++ b/src/compiler/glsl/builtin_types.cpp @@ -0,0 +1,394 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file builtin_types.cpp + * + * The glsl_type class has static members to represent all the built-in types + * (such as the glsl_type::_float_type flyweight) as well as convenience pointer + * accessors (such as glsl_type::float_type). Those global variables are + * declared and initialized in this file. + * + * This also contains _mesa_glsl_initialize_types(), a function which populates + * a symbol table with the available built-in types for a particular language + * version and set of enabled extensions. + */ + +#include "compiler/glsl_types.h" +#include "glsl_parser_extras.h" +#include "util/macros.h" + +/** + * Declarations of type flyweights (glsl_type::_foo_type) and + * convenience pointers (glsl_type::foo_type). + * @{ + */ +#define DECL_TYPE(NAME, ...) + +#define STRUCT_TYPE(NAME) \ + const glsl_type glsl_type::_struct_##NAME##_type = \ + glsl_type(NAME##_fields, ARRAY_SIZE(NAME##_fields), #NAME); \ + const glsl_type *const glsl_type::struct_##NAME##_type = \ + &glsl_type::_struct_##NAME##_type; + +static const struct glsl_struct_field gl_DepthRangeParameters_fields[] = { + glsl_struct_field(glsl_type::float_type, "near"), + glsl_struct_field(glsl_type::float_type, "far"), + glsl_struct_field(glsl_type::float_type, "diff"), +}; + +static const struct glsl_struct_field gl_PointParameters_fields[] = { + glsl_struct_field(glsl_type::float_type, "size"), + glsl_struct_field(glsl_type::float_type, "sizeMin"), + glsl_struct_field(glsl_type::float_type, "sizeMax"), + glsl_struct_field(glsl_type::float_type, "fadeThresholdSize"), + glsl_struct_field(glsl_type::float_type, "distanceConstantAttenuation"), + glsl_struct_field(glsl_type::float_type, "distanceLinearAttenuation"), + glsl_struct_field(glsl_type::float_type, "distanceQuadraticAttenuation"), +}; + +static const struct glsl_struct_field gl_MaterialParameters_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "emission"), + glsl_struct_field(glsl_type::vec4_type, "ambient"), + glsl_struct_field(glsl_type::vec4_type, "diffuse"), + glsl_struct_field(glsl_type::vec4_type, "specular"), + glsl_struct_field(glsl_type::float_type, "shininess"), +}; + +static const struct glsl_struct_field gl_LightSourceParameters_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "ambient"), + glsl_struct_field(glsl_type::vec4_type, "diffuse"), + glsl_struct_field(glsl_type::vec4_type, "specular"), + glsl_struct_field(glsl_type::vec4_type, "position"), + glsl_struct_field(glsl_type::vec4_type, "halfVector"), + glsl_struct_field(glsl_type::vec3_type, "spotDirection"), + glsl_struct_field(glsl_type::float_type, "spotExponent"), + glsl_struct_field(glsl_type::float_type, "spotCutoff"), + glsl_struct_field(glsl_type::float_type, "spotCosCutoff"), + glsl_struct_field(glsl_type::float_type, "constantAttenuation"), + glsl_struct_field(glsl_type::float_type, "linearAttenuation"), + glsl_struct_field(glsl_type::float_type, "quadraticAttenuation"), +}; + +static const struct glsl_struct_field gl_LightModelParameters_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "ambient"), +}; + +static const struct glsl_struct_field gl_LightModelProducts_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "sceneColor"), +}; + +static const struct glsl_struct_field gl_LightProducts_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "ambient"), + glsl_struct_field(glsl_type::vec4_type, "diffuse"), + glsl_struct_field(glsl_type::vec4_type, "specular"), +}; + +static const struct glsl_struct_field gl_FogParameters_fields[] = { + glsl_struct_field(glsl_type::vec4_type, "color"), + glsl_struct_field(glsl_type::float_type, "density"), + glsl_struct_field(glsl_type::float_type, "start"), + glsl_struct_field(glsl_type::float_type, "end"), + glsl_struct_field(glsl_type::float_type, "scale"), +}; + +#include "compiler/builtin_type_macros.h" +/** @} */ + +/** + * Code to populate a symbol table with the built-in types available in a + * particular shading language version. The table below contains tags every + * type with the GLSL/GLSL ES versions where it was introduced. + * + * @{ + */ +#define T(TYPE, MIN_GL, MIN_ES) \ + { glsl_type::TYPE##_type, MIN_GL, MIN_ES }, + +static const struct builtin_type_versions { + const glsl_type *const type; + int min_gl; + int min_es; +} builtin_type_versions[] = { + T(void, 110, 100) + T(bool, 110, 100) + T(bvec2, 110, 100) + T(bvec3, 110, 100) + T(bvec4, 110, 100) + T(int, 110, 100) + T(ivec2, 110, 100) + T(ivec3, 110, 100) + T(ivec4, 110, 100) + T(uint, 130, 300) + T(uvec2, 130, 300) + T(uvec3, 130, 300) + T(uvec4, 130, 300) + T(float, 110, 100) + T(vec2, 110, 100) + T(vec3, 110, 100) + T(vec4, 110, 100) + T(mat2, 110, 100) + T(mat3, 110, 100) + T(mat4, 110, 100) + T(mat2x3, 120, 300) + T(mat2x4, 120, 300) + T(mat3x2, 120, 300) + T(mat3x4, 120, 300) + T(mat4x2, 120, 300) + T(mat4x3, 120, 300) + + T(double, 400, 999) + T(dvec2, 400, 999) + T(dvec3, 400, 999) + T(dvec4, 400, 999) + T(dmat2, 400, 999) + T(dmat3, 400, 999) + T(dmat4, 400, 999) + T(dmat2x3, 400, 999) + T(dmat2x4, 400, 999) + T(dmat3x2, 400, 999) + T(dmat3x4, 400, 999) + T(dmat4x2, 400, 999) + T(dmat4x3, 400, 999) + + T(sampler1D, 110, 999) + T(sampler2D, 110, 100) + T(sampler3D, 110, 300) + T(samplerCube, 110, 100) + T(sampler1DArray, 130, 999) + T(sampler2DArray, 130, 300) + T(samplerCubeArray, 400, 999) + T(sampler2DRect, 140, 999) + T(samplerBuffer, 140, 999) + T(sampler2DMS, 150, 310) + T(sampler2DMSArray, 150, 999) + + T(isampler1D, 130, 999) + T(isampler2D, 130, 300) + T(isampler3D, 130, 300) + T(isamplerCube, 130, 300) + T(isampler1DArray, 130, 999) + T(isampler2DArray, 130, 300) + T(isamplerCubeArray, 400, 999) + T(isampler2DRect, 140, 999) + T(isamplerBuffer, 140, 999) + T(isampler2DMS, 150, 310) + T(isampler2DMSArray, 150, 999) + + T(usampler1D, 130, 999) + T(usampler2D, 130, 300) + T(usampler3D, 130, 300) + T(usamplerCube, 130, 300) + T(usampler1DArray, 130, 999) + T(usampler2DArray, 130, 300) + T(usamplerCubeArray, 400, 999) + T(usampler2DRect, 140, 999) + T(usamplerBuffer, 140, 999) + T(usampler2DMS, 150, 310) + T(usampler2DMSArray, 150, 999) + + T(sampler1DShadow, 110, 999) + T(sampler2DShadow, 110, 300) + T(samplerCubeShadow, 130, 300) + T(sampler1DArrayShadow, 130, 999) + T(sampler2DArrayShadow, 130, 300) + T(samplerCubeArrayShadow, 400, 999) + T(sampler2DRectShadow, 140, 999) + + T(struct_gl_DepthRangeParameters, 110, 100) + + T(image1D, 420, 999) + T(image2D, 420, 310) + T(image3D, 420, 310) + T(image2DRect, 420, 999) + T(imageCube, 420, 310) + T(imageBuffer, 420, 999) + T(image1DArray, 420, 999) + T(image2DArray, 420, 310) + T(imageCubeArray, 420, 999) + T(image2DMS, 420, 999) + T(image2DMSArray, 420, 999) + T(iimage1D, 420, 999) + T(iimage2D, 420, 310) + T(iimage3D, 420, 310) + T(iimage2DRect, 420, 999) + T(iimageCube, 420, 310) + T(iimageBuffer, 420, 999) + T(iimage1DArray, 420, 999) + T(iimage2DArray, 420, 310) + T(iimageCubeArray, 420, 999) + T(iimage2DMS, 420, 999) + T(iimage2DMSArray, 420, 999) + T(uimage1D, 420, 999) + T(uimage2D, 420, 310) + T(uimage3D, 420, 310) + T(uimage2DRect, 420, 999) + T(uimageCube, 420, 310) + T(uimageBuffer, 420, 999) + T(uimage1DArray, 420, 999) + T(uimage2DArray, 420, 310) + T(uimageCubeArray, 420, 999) + T(uimage2DMS, 420, 999) + T(uimage2DMSArray, 420, 999) + + T(atomic_uint, 420, 310) +}; + +static const glsl_type *const deprecated_types[] = { + glsl_type::struct_gl_PointParameters_type, + glsl_type::struct_gl_MaterialParameters_type, + glsl_type::struct_gl_LightSourceParameters_type, + glsl_type::struct_gl_LightModelParameters_type, + glsl_type::struct_gl_LightModelProducts_type, + glsl_type::struct_gl_LightProducts_type, + glsl_type::struct_gl_FogParameters_type, +}; + +static inline void +add_type(glsl_symbol_table *symbols, const glsl_type *const type) +{ + symbols->add_type(type->name, type); +} + +/** + * Populate the symbol table with available built-in types. + */ +void +_mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) +{ + struct glsl_symbol_table *symbols = state->symbols; + + for (unsigned i = 0; i < ARRAY_SIZE(builtin_type_versions); i++) { + const struct builtin_type_versions *const t = &builtin_type_versions[i]; + if (state->is_version(t->min_gl, t->min_es)) { + add_type(symbols, t->type); + } + } + + /* Add deprecated structure types. While these were deprecated in 1.30, + * they're still present. We've removed them in 1.40+ (OpenGL 3.1+). + */ + if (!state->es_shader && state->language_version < 140) { + for (unsigned i = 0; i < ARRAY_SIZE(deprecated_types); i++) { + add_type(symbols, deprecated_types[i]); + } + } + + /* Add types for enabled extensions. They may have already been added + * by the version-based loop, but attempting to add them a second time + * is harmless. + */ + if (state->ARB_texture_cube_map_array_enable) { + add_type(symbols, glsl_type::samplerCubeArray_type); + add_type(symbols, glsl_type::samplerCubeArrayShadow_type); + add_type(symbols, glsl_type::isamplerCubeArray_type); + add_type(symbols, glsl_type::usamplerCubeArray_type); + } + + if (state->ARB_texture_multisample_enable || + state->OES_texture_storage_multisample_2d_array_enable) { + add_type(symbols, glsl_type::sampler2DMS_type); + add_type(symbols, glsl_type::isampler2DMS_type); + add_type(symbols, glsl_type::usampler2DMS_type); + add_type(symbols, glsl_type::sampler2DMSArray_type); + add_type(symbols, glsl_type::isampler2DMSArray_type); + add_type(symbols, glsl_type::usampler2DMSArray_type); + } + + if (state->ARB_texture_rectangle_enable) { + add_type(symbols, glsl_type::sampler2DRect_type); + add_type(symbols, glsl_type::sampler2DRectShadow_type); + } + + if (state->EXT_texture_array_enable) { + add_type(symbols, glsl_type::sampler1DArray_type); + add_type(symbols, glsl_type::sampler2DArray_type); + add_type(symbols, glsl_type::sampler1DArrayShadow_type); + add_type(symbols, glsl_type::sampler2DArrayShadow_type); + } + + if (state->OES_EGL_image_external_enable) { + add_type(symbols, glsl_type::samplerExternalOES_type); + } + + if (state->OES_texture_3D_enable) { + add_type(symbols, glsl_type::sampler3D_type); + } + + if (state->ARB_shader_image_load_store_enable) { + add_type(symbols, glsl_type::image1D_type); + add_type(symbols, glsl_type::image2D_type); + add_type(symbols, glsl_type::image3D_type); + add_type(symbols, glsl_type::image2DRect_type); + add_type(symbols, glsl_type::imageCube_type); + add_type(symbols, glsl_type::imageBuffer_type); + add_type(symbols, glsl_type::image1DArray_type); + add_type(symbols, glsl_type::image2DArray_type); + add_type(symbols, glsl_type::imageCubeArray_type); + add_type(symbols, glsl_type::image2DMS_type); + add_type(symbols, glsl_type::image2DMSArray_type); + add_type(symbols, glsl_type::iimage1D_type); + add_type(symbols, glsl_type::iimage2D_type); + add_type(symbols, glsl_type::iimage3D_type); + add_type(symbols, glsl_type::iimage2DRect_type); + add_type(symbols, glsl_type::iimageCube_type); + add_type(symbols, glsl_type::iimageBuffer_type); + add_type(symbols, glsl_type::iimage1DArray_type); + add_type(symbols, glsl_type::iimage2DArray_type); + add_type(symbols, glsl_type::iimageCubeArray_type); + add_type(symbols, glsl_type::iimage2DMS_type); + add_type(symbols, glsl_type::iimage2DMSArray_type); + add_type(symbols, glsl_type::uimage1D_type); + add_type(symbols, glsl_type::uimage2D_type); + add_type(symbols, glsl_type::uimage3D_type); + add_type(symbols, glsl_type::uimage2DRect_type); + add_type(symbols, glsl_type::uimageCube_type); + add_type(symbols, glsl_type::uimageBuffer_type); + add_type(symbols, glsl_type::uimage1DArray_type); + add_type(symbols, glsl_type::uimage2DArray_type); + add_type(symbols, glsl_type::uimageCubeArray_type); + add_type(symbols, glsl_type::uimage2DMS_type); + add_type(symbols, glsl_type::uimage2DMSArray_type); + } + + if (state->has_atomic_counters()) { + add_type(symbols, glsl_type::atomic_uint_type); + } + + if (state->ARB_gpu_shader_fp64_enable) { + add_type(symbols, glsl_type::double_type); + add_type(symbols, glsl_type::dvec2_type); + add_type(symbols, glsl_type::dvec3_type); + add_type(symbols, glsl_type::dvec4_type); + add_type(symbols, glsl_type::dmat2_type); + add_type(symbols, glsl_type::dmat3_type); + add_type(symbols, glsl_type::dmat4_type); + add_type(symbols, glsl_type::dmat2x3_type); + add_type(symbols, glsl_type::dmat2x4_type); + add_type(symbols, glsl_type::dmat3x2_type); + add_type(symbols, glsl_type::dmat3x4_type); + add_type(symbols, glsl_type::dmat4x2_type); + add_type(symbols, glsl_type::dmat4x3_type); + } +} +/** @} */ diff --git a/src/compiler/glsl/builtin_variables.cpp b/src/compiler/glsl/builtin_variables.cpp new file mode 100644 index 00000000000..ccc04c00cea --- /dev/null +++ b/src/compiler/glsl/builtin_variables.cpp @@ -0,0 +1,1394 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_builder.h" +#include "linker.h" +#include "glsl_parser_extras.h" +#include "glsl_symbol_table.h" +#include "main/core.h" +#include "main/uniforms.h" +#include "program/prog_statevars.h" +#include "program/prog_instruction.h" + +using namespace ir_builder; + +static const struct gl_builtin_uniform_element gl_NumSamples_elements[] = { + {NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_XXXX} +}; + +static const struct gl_builtin_uniform_element gl_DepthRange_elements[] = { + {"near", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_XXXX}, + {"far", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_YYYY}, + {"diff", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_ZZZZ}, +}; + +static const struct gl_builtin_uniform_element gl_ClipPlane_elements[] = { + {NULL, {STATE_CLIPPLANE, 0, 0}, SWIZZLE_XYZW} +}; + +static const struct gl_builtin_uniform_element gl_Point_elements[] = { + {"size", {STATE_POINT_SIZE}, SWIZZLE_XXXX}, + {"sizeMin", {STATE_POINT_SIZE}, SWIZZLE_YYYY}, + {"sizeMax", {STATE_POINT_SIZE}, SWIZZLE_ZZZZ}, + {"fadeThresholdSize", {STATE_POINT_SIZE}, SWIZZLE_WWWW}, + {"distanceConstantAttenuation", {STATE_POINT_ATTENUATION}, SWIZZLE_XXXX}, + {"distanceLinearAttenuation", {STATE_POINT_ATTENUATION}, SWIZZLE_YYYY}, + {"distanceQuadraticAttenuation", {STATE_POINT_ATTENUATION}, SWIZZLE_ZZZZ}, +}; + +static const struct gl_builtin_uniform_element gl_FrontMaterial_elements[] = { + {"emission", {STATE_MATERIAL, 0, STATE_EMISSION}, SWIZZLE_XYZW}, + {"ambient", {STATE_MATERIAL, 0, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_MATERIAL, 0, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_MATERIAL, 0, STATE_SPECULAR}, SWIZZLE_XYZW}, + {"shininess", {STATE_MATERIAL, 0, STATE_SHININESS}, SWIZZLE_XXXX}, +}; + +static const struct gl_builtin_uniform_element gl_BackMaterial_elements[] = { + {"emission", {STATE_MATERIAL, 1, STATE_EMISSION}, SWIZZLE_XYZW}, + {"ambient", {STATE_MATERIAL, 1, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_MATERIAL, 1, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_MATERIAL, 1, STATE_SPECULAR}, SWIZZLE_XYZW}, + {"shininess", {STATE_MATERIAL, 1, STATE_SHININESS}, SWIZZLE_XXXX}, +}; + +static const struct gl_builtin_uniform_element gl_LightSource_elements[] = { + {"ambient", {STATE_LIGHT, 0, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_LIGHT, 0, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_LIGHT, 0, STATE_SPECULAR}, SWIZZLE_XYZW}, + {"position", {STATE_LIGHT, 0, STATE_POSITION}, SWIZZLE_XYZW}, + {"halfVector", {STATE_LIGHT, 0, STATE_HALF_VECTOR}, SWIZZLE_XYZW}, + {"spotDirection", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, + MAKE_SWIZZLE4(SWIZZLE_X, + SWIZZLE_Y, + SWIZZLE_Z, + SWIZZLE_Z)}, + {"spotCosCutoff", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW}, + {"spotCutoff", {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX}, + {"spotExponent", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW}, + {"constantAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX}, + {"linearAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY}, + {"quadraticAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ}, +}; + +static const struct gl_builtin_uniform_element gl_LightModel_elements[] = { + {"ambient", {STATE_LIGHTMODEL_AMBIENT, 0}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_FrontLightModelProduct_elements[] = { + {"sceneColor", {STATE_LIGHTMODEL_SCENECOLOR, 0}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_BackLightModelProduct_elements[] = { + {"sceneColor", {STATE_LIGHTMODEL_SCENECOLOR, 1}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_FrontLightProduct_elements[] = { + {"ambient", {STATE_LIGHTPROD, 0, 0, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_LIGHTPROD, 0, 0, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_LIGHTPROD, 0, 0, STATE_SPECULAR}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_BackLightProduct_elements[] = { + {"ambient", {STATE_LIGHTPROD, 0, 1, STATE_AMBIENT}, SWIZZLE_XYZW}, + {"diffuse", {STATE_LIGHTPROD, 0, 1, STATE_DIFFUSE}, SWIZZLE_XYZW}, + {"specular", {STATE_LIGHTPROD, 0, 1, STATE_SPECULAR}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_TextureEnvColor_elements[] = { + {NULL, {STATE_TEXENV_COLOR, 0}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_EyePlaneS_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_S}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_EyePlaneT_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_T}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_EyePlaneR_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_R}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_EyePlaneQ_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_Q}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_ObjectPlaneS_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_S}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_ObjectPlaneT_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_T}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_ObjectPlaneR_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_R}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_ObjectPlaneQ_elements[] = { + {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_Q}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_Fog_elements[] = { + {"color", {STATE_FOG_COLOR}, SWIZZLE_XYZW}, + {"density", {STATE_FOG_PARAMS}, SWIZZLE_XXXX}, + {"start", {STATE_FOG_PARAMS}, SWIZZLE_YYYY}, + {"end", {STATE_FOG_PARAMS}, SWIZZLE_ZZZZ}, + {"scale", {STATE_FOG_PARAMS}, SWIZZLE_WWWW}, +}; + +static const struct gl_builtin_uniform_element gl_NormalScale_elements[] = { + {NULL, {STATE_NORMAL_SCALE}, SWIZZLE_XXXX}, +}; + +static const struct gl_builtin_uniform_element gl_FogParamsOptimizedMESA_elements[] = { + {NULL, {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_CurrentAttribVertMESA_elements[] = { + {NULL, {STATE_INTERNAL, STATE_CURRENT_ATTRIB, 0}, SWIZZLE_XYZW}, +}; + +static const struct gl_builtin_uniform_element gl_CurrentAttribFragMESA_elements[] = { + {NULL, {STATE_INTERNAL, STATE_CURRENT_ATTRIB_MAYBE_VP_CLAMPED, 0}, SWIZZLE_XYZW}, +}; + +#define MATRIX(name, statevar, modifier) \ + static const struct gl_builtin_uniform_element name ## _elements[] = { \ + { NULL, { statevar, 0, 0, 0, modifier}, SWIZZLE_XYZW }, \ + { NULL, { statevar, 0, 1, 1, modifier}, SWIZZLE_XYZW }, \ + { NULL, { statevar, 0, 2, 2, modifier}, SWIZZLE_XYZW }, \ + { NULL, { statevar, 0, 3, 3, modifier}, SWIZZLE_XYZW }, \ + } + +MATRIX(gl_ModelViewMatrix, + STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE); +MATRIX(gl_ModelViewMatrixInverse, + STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS); +MATRIX(gl_ModelViewMatrixTranspose, + STATE_MODELVIEW_MATRIX, 0); +MATRIX(gl_ModelViewMatrixInverseTranspose, + STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE); + +MATRIX(gl_ProjectionMatrix, + STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE); +MATRIX(gl_ProjectionMatrixInverse, + STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS); +MATRIX(gl_ProjectionMatrixTranspose, + STATE_PROJECTION_MATRIX, 0); +MATRIX(gl_ProjectionMatrixInverseTranspose, + STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE); + +MATRIX(gl_ModelViewProjectionMatrix, + STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE); +MATRIX(gl_ModelViewProjectionMatrixInverse, + STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS); +MATRIX(gl_ModelViewProjectionMatrixTranspose, + STATE_MVP_MATRIX, 0); +MATRIX(gl_ModelViewProjectionMatrixInverseTranspose, + STATE_MVP_MATRIX, STATE_MATRIX_INVERSE); + +MATRIX(gl_TextureMatrix, + STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE); +MATRIX(gl_TextureMatrixInverse, + STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS); +MATRIX(gl_TextureMatrixTranspose, + STATE_TEXTURE_MATRIX, 0); +MATRIX(gl_TextureMatrixInverseTranspose, + STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE); + +static const struct gl_builtin_uniform_element gl_NormalMatrix_elements[] = { + { NULL, { STATE_MODELVIEW_MATRIX, 0, 0, 0, STATE_MATRIX_INVERSE}, + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) }, + { NULL, { STATE_MODELVIEW_MATRIX, 0, 1, 1, STATE_MATRIX_INVERSE}, + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) }, + { NULL, { STATE_MODELVIEW_MATRIX, 0, 2, 2, STATE_MATRIX_INVERSE}, + MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) }, +}; + +#undef MATRIX + +#define STATEVAR(name) {#name, name ## _elements, ARRAY_SIZE(name ## _elements)} + +static const struct gl_builtin_uniform_desc _mesa_builtin_uniform_desc[] = { + STATEVAR(gl_NumSamples), + STATEVAR(gl_DepthRange), + STATEVAR(gl_ClipPlane), + STATEVAR(gl_Point), + STATEVAR(gl_FrontMaterial), + STATEVAR(gl_BackMaterial), + STATEVAR(gl_LightSource), + STATEVAR(gl_LightModel), + STATEVAR(gl_FrontLightModelProduct), + STATEVAR(gl_BackLightModelProduct), + STATEVAR(gl_FrontLightProduct), + STATEVAR(gl_BackLightProduct), + STATEVAR(gl_TextureEnvColor), + STATEVAR(gl_EyePlaneS), + STATEVAR(gl_EyePlaneT), + STATEVAR(gl_EyePlaneR), + STATEVAR(gl_EyePlaneQ), + STATEVAR(gl_ObjectPlaneS), + STATEVAR(gl_ObjectPlaneT), + STATEVAR(gl_ObjectPlaneR), + STATEVAR(gl_ObjectPlaneQ), + STATEVAR(gl_Fog), + + STATEVAR(gl_ModelViewMatrix), + STATEVAR(gl_ModelViewMatrixInverse), + STATEVAR(gl_ModelViewMatrixTranspose), + STATEVAR(gl_ModelViewMatrixInverseTranspose), + + STATEVAR(gl_ProjectionMatrix), + STATEVAR(gl_ProjectionMatrixInverse), + STATEVAR(gl_ProjectionMatrixTranspose), + STATEVAR(gl_ProjectionMatrixInverseTranspose), + + STATEVAR(gl_ModelViewProjectionMatrix), + STATEVAR(gl_ModelViewProjectionMatrixInverse), + STATEVAR(gl_ModelViewProjectionMatrixTranspose), + STATEVAR(gl_ModelViewProjectionMatrixInverseTranspose), + + STATEVAR(gl_TextureMatrix), + STATEVAR(gl_TextureMatrixInverse), + STATEVAR(gl_TextureMatrixTranspose), + STATEVAR(gl_TextureMatrixInverseTranspose), + + STATEVAR(gl_NormalMatrix), + STATEVAR(gl_NormalScale), + + STATEVAR(gl_FogParamsOptimizedMESA), + STATEVAR(gl_CurrentAttribVertMESA), + STATEVAR(gl_CurrentAttribFragMESA), + + {NULL, NULL, 0} +}; + + +namespace { + +/** + * Data structure that accumulates fields for the gl_PerVertex interface + * block. + */ +class per_vertex_accumulator +{ +public: + per_vertex_accumulator(); + void add_field(int slot, const glsl_type *type, const char *name); + const glsl_type *construct_interface_instance() const; + +private: + glsl_struct_field fields[10]; + unsigned num_fields; +}; + + +per_vertex_accumulator::per_vertex_accumulator() + : fields(), + num_fields(0) +{ +} + + +void +per_vertex_accumulator::add_field(int slot, const glsl_type *type, + const char *name) +{ + assert(this->num_fields < ARRAY_SIZE(this->fields)); + this->fields[this->num_fields].type = type; + this->fields[this->num_fields].name = name; + this->fields[this->num_fields].matrix_layout = GLSL_MATRIX_LAYOUT_INHERITED; + this->fields[this->num_fields].location = slot; + this->fields[this->num_fields].interpolation = INTERP_QUALIFIER_NONE; + this->fields[this->num_fields].centroid = 0; + this->fields[this->num_fields].sample = 0; + this->fields[this->num_fields].patch = 0; + this->fields[this->num_fields].precision = GLSL_PRECISION_NONE; + this->num_fields++; +} + + +const glsl_type * +per_vertex_accumulator::construct_interface_instance() const +{ + return glsl_type::get_interface_instance(this->fields, this->num_fields, + GLSL_INTERFACE_PACKING_STD140, + "gl_PerVertex"); +} + + +class builtin_variable_generator +{ +public: + builtin_variable_generator(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + void generate_constants(); + void generate_uniforms(); + void generate_vs_special_vars(); + void generate_tcs_special_vars(); + void generate_tes_special_vars(); + void generate_gs_special_vars(); + void generate_fs_special_vars(); + void generate_cs_special_vars(); + void generate_varyings(); + +private: + const glsl_type *array(const glsl_type *base, unsigned elements) + { + return glsl_type::get_array_instance(base, elements); + } + + const glsl_type *type(const char *name) + { + return symtab->get_type(name); + } + + ir_variable *add_input(int slot, const glsl_type *type, const char *name) + { + return add_variable(name, type, ir_var_shader_in, slot); + } + + ir_variable *add_output(int slot, const glsl_type *type, const char *name) + { + return add_variable(name, type, ir_var_shader_out, slot); + } + + ir_variable *add_index_output(int slot, int index, const glsl_type *type, const char *name) + { + return add_index_variable(name, type, ir_var_shader_out, slot, index); + } + + ir_variable *add_system_value(int slot, const glsl_type *type, + const char *name) + { + return add_variable(name, type, ir_var_system_value, slot); + } + + ir_variable *add_variable(const char *name, const glsl_type *type, + enum ir_variable_mode mode, int slot); + ir_variable *add_index_variable(const char *name, const glsl_type *type, + enum ir_variable_mode mode, int slot, int index); + ir_variable *add_uniform(const glsl_type *type, const char *name); + ir_variable *add_const(const char *name, int value); + ir_variable *add_const_ivec3(const char *name, int x, int y, int z); + void add_varying(int slot, const glsl_type *type, const char *name); + + exec_list * const instructions; + struct _mesa_glsl_parse_state * const state; + glsl_symbol_table * const symtab; + + /** + * True if compatibility-profile-only variables should be included. (In + * desktop GL, these are always included when the GLSL version is 1.30 and + * or below). + */ + const bool compatibility; + + const glsl_type * const bool_t; + const glsl_type * const int_t; + const glsl_type * const uint_t; + const glsl_type * const float_t; + const glsl_type * const vec2_t; + const glsl_type * const vec3_t; + const glsl_type * const vec4_t; + const glsl_type * const uvec3_t; + const glsl_type * const mat3_t; + const glsl_type * const mat4_t; + + per_vertex_accumulator per_vertex_in; + per_vertex_accumulator per_vertex_out; +}; + + +builtin_variable_generator::builtin_variable_generator( + exec_list *instructions, struct _mesa_glsl_parse_state *state) + : instructions(instructions), state(state), symtab(state->symbols), + compatibility(!state->is_version(140, 100)), + bool_t(glsl_type::bool_type), int_t(glsl_type::int_type), + uint_t(glsl_type::uint_type), + float_t(glsl_type::float_type), vec2_t(glsl_type::vec2_type), + vec3_t(glsl_type::vec3_type), vec4_t(glsl_type::vec4_type), + uvec3_t(glsl_type::uvec3_type), + mat3_t(glsl_type::mat3_type), mat4_t(glsl_type::mat4_type) +{ +} + +ir_variable * +builtin_variable_generator::add_index_variable(const char *name, + const glsl_type *type, + enum ir_variable_mode mode, int slot, int index) +{ + ir_variable *var = new(symtab) ir_variable(type, name, mode); + var->data.how_declared = ir_var_declared_implicitly; + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_shader_in: + case ir_var_uniform: + case ir_var_system_value: + var->data.read_only = true; + break; + case ir_var_shader_out: + case ir_var_shader_storage: + break; + default: + /* The only variables that are added using this function should be + * uniforms, shader storage, shader inputs, and shader outputs, constants + * (which use ir_var_auto), and system values. + */ + assert(0); + break; + } + + var->data.location = slot; + var->data.explicit_location = (slot >= 0); + var->data.explicit_index = 1; + var->data.index = index; + + /* Once the variable is created an initialized, add it to the symbol table + * and add the declaration to the IR stream. + */ + instructions->push_tail(var); + + symtab->add_variable(var); + return var; +} + +ir_variable * +builtin_variable_generator::add_variable(const char *name, + const glsl_type *type, + enum ir_variable_mode mode, int slot) +{ + ir_variable *var = new(symtab) ir_variable(type, name, mode); + var->data.how_declared = ir_var_declared_implicitly; + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_shader_in: + case ir_var_uniform: + case ir_var_system_value: + var->data.read_only = true; + break; + case ir_var_shader_out: + case ir_var_shader_storage: + break; + default: + /* The only variables that are added using this function should be + * uniforms, shader storage, shader inputs, and shader outputs, constants + * (which use ir_var_auto), and system values. + */ + assert(0); + break; + } + + var->data.location = slot; + var->data.explicit_location = (slot >= 0); + var->data.explicit_index = 0; + + /* Once the variable is created an initialized, add it to the symbol table + * and add the declaration to the IR stream. + */ + instructions->push_tail(var); + + symtab->add_variable(var); + return var; +} + + +ir_variable * +builtin_variable_generator::add_uniform(const glsl_type *type, + const char *name) +{ + ir_variable *const uni = add_variable(name, type, ir_var_uniform, -1); + + unsigned i; + for (i = 0; _mesa_builtin_uniform_desc[i].name != NULL; i++) { + if (strcmp(_mesa_builtin_uniform_desc[i].name, name) == 0) { + break; + } + } + + assert(_mesa_builtin_uniform_desc[i].name != NULL); + const struct gl_builtin_uniform_desc* const statevar = + &_mesa_builtin_uniform_desc[i]; + + const unsigned array_count = type->is_array() ? type->length : 1; + + ir_state_slot *slots = + uni->allocate_state_slots(array_count * statevar->num_elements); + + for (unsigned a = 0; a < array_count; a++) { + for (unsigned j = 0; j < statevar->num_elements; j++) { + const struct gl_builtin_uniform_element *element = + &statevar->elements[j]; + + memcpy(slots->tokens, element->tokens, sizeof(element->tokens)); + if (type->is_array()) { + if (strcmp(name, "gl_CurrentAttribVertMESA") == 0 || + strcmp(name, "gl_CurrentAttribFragMESA") == 0) { + slots->tokens[2] = a; + } else { + slots->tokens[1] = a; + } + } + + slots->swizzle = element->swizzle; + slots++; + } + } + + return uni; +} + + +ir_variable * +builtin_variable_generator::add_const(const char *name, int value) +{ + ir_variable *const var = add_variable(name, glsl_type::int_type, + ir_var_auto, -1); + var->constant_value = new(var) ir_constant(value); + var->constant_initializer = new(var) ir_constant(value); + var->data.has_initializer = true; + return var; +} + + +ir_variable * +builtin_variable_generator::add_const_ivec3(const char *name, int x, int y, + int z) +{ + ir_variable *const var = add_variable(name, glsl_type::ivec3_type, + ir_var_auto, -1); + ir_constant_data data; + memset(&data, 0, sizeof(data)); + data.i[0] = x; + data.i[1] = y; + data.i[2] = z; + var->constant_value = new(var) ir_constant(glsl_type::ivec3_type, &data); + var->constant_initializer = + new(var) ir_constant(glsl_type::ivec3_type, &data); + var->data.has_initializer = true; + return var; +} + + +void +builtin_variable_generator::generate_constants() +{ + add_const("gl_MaxVertexAttribs", state->Const.MaxVertexAttribs); + add_const("gl_MaxVertexTextureImageUnits", + state->Const.MaxVertexTextureImageUnits); + add_const("gl_MaxCombinedTextureImageUnits", + state->Const.MaxCombinedTextureImageUnits); + add_const("gl_MaxTextureImageUnits", state->Const.MaxTextureImageUnits); + add_const("gl_MaxDrawBuffers", state->Const.MaxDrawBuffers); + + /* Max uniforms/varyings: GLSL ES counts these in units of vectors; desktop + * GL counts them in units of "components" or "floats". + */ + if (state->es_shader) { + add_const("gl_MaxVertexUniformVectors", + state->Const.MaxVertexUniformComponents / 4); + add_const("gl_MaxFragmentUniformVectors", + state->Const.MaxFragmentUniformComponents / 4); + + /* In GLSL ES 3.00, gl_MaxVaryingVectors was split out to separate + * vertex and fragment shader constants. + */ + if (state->is_version(0, 300)) { + add_const("gl_MaxVertexOutputVectors", + state->ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4); + add_const("gl_MaxFragmentInputVectors", + state->ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents / 4); + } else { + add_const("gl_MaxVaryingVectors", + state->ctx->Const.MaxVarying); + } + + /* EXT_blend_func_extended brings a built in constant + * for determining number of dual source draw buffers + */ + if (state->EXT_blend_func_extended_enable) { + add_const("gl_MaxDualSourceDrawBuffersEXT", + state->Const.MaxDualSourceDrawBuffers); + } + } else { + add_const("gl_MaxVertexUniformComponents", + state->Const.MaxVertexUniformComponents); + + /* Note: gl_MaxVaryingFloats was deprecated in GLSL 1.30+, but not + * removed + */ + add_const("gl_MaxVaryingFloats", state->ctx->Const.MaxVarying * 4); + + add_const("gl_MaxFragmentUniformComponents", + state->Const.MaxFragmentUniformComponents); + } + + /* Texel offsets were introduced in ARB_shading_language_420pack (which + * requires desktop GLSL version 130), and adopted into desktop GLSL + * version 4.20 and GLSL ES version 3.00. + */ + if ((state->is_version(130, 0) && + state->ARB_shading_language_420pack_enable) || + state->is_version(420, 300)) { + add_const("gl_MinProgramTexelOffset", + state->Const.MinProgramTexelOffset); + add_const("gl_MaxProgramTexelOffset", + state->Const.MaxProgramTexelOffset); + } + + if (state->is_version(130, 0)) { + add_const("gl_MaxClipDistances", state->Const.MaxClipPlanes); + add_const("gl_MaxVaryingComponents", state->ctx->Const.MaxVarying * 4); + } + + if (state->has_geometry_shader()) { + add_const("gl_MaxVertexOutputComponents", + state->Const.MaxVertexOutputComponents); + add_const("gl_MaxGeometryInputComponents", + state->Const.MaxGeometryInputComponents); + add_const("gl_MaxGeometryOutputComponents", + state->Const.MaxGeometryOutputComponents); + add_const("gl_MaxFragmentInputComponents", + state->Const.MaxFragmentInputComponents); + add_const("gl_MaxGeometryTextureImageUnits", + state->Const.MaxGeometryTextureImageUnits); + add_const("gl_MaxGeometryOutputVertices", + state->Const.MaxGeometryOutputVertices); + add_const("gl_MaxGeometryTotalOutputComponents", + state->Const.MaxGeometryTotalOutputComponents); + add_const("gl_MaxGeometryUniformComponents", + state->Const.MaxGeometryUniformComponents); + + /* Note: the GLSL 1.50-4.40 specs require + * gl_MaxGeometryVaryingComponents to be present, and to be at least 64. + * But they do not define what it means (and there does not appear to be + * any corresponding constant in the GL specs). However, + * ARB_geometry_shader4 defines MAX_GEOMETRY_VARYING_COMPONENTS_ARB to + * be the maximum number of components available for use as geometry + * outputs. So we assume this is a synonym for + * gl_MaxGeometryOutputComponents. + */ + add_const("gl_MaxGeometryVaryingComponents", + state->Const.MaxGeometryOutputComponents); + } + + if (compatibility) { + /* Note: gl_MaxLights stopped being listed as an explicit constant in + * GLSL 1.30, however it continues to be referred to (as a minimum size + * for compatibility-mode uniforms) all the way up through GLSL 4.30, so + * this seems like it was probably an oversight. + */ + add_const("gl_MaxLights", state->Const.MaxLights); + + add_const("gl_MaxClipPlanes", state->Const.MaxClipPlanes); + + /* Note: gl_MaxTextureUnits wasn't made compatibility-only until GLSL + * 1.50, however this seems like it was probably an oversight. + */ + add_const("gl_MaxTextureUnits", state->Const.MaxTextureUnits); + + /* Note: gl_MaxTextureCoords was left out of GLSL 1.40, but it was + * re-introduced in GLSL 1.50, so this seems like it was probably an + * oversight. + */ + add_const("gl_MaxTextureCoords", state->Const.MaxTextureCoords); + } + + if (state->has_atomic_counters()) { + add_const("gl_MaxVertexAtomicCounters", + state->Const.MaxVertexAtomicCounters); + add_const("gl_MaxFragmentAtomicCounters", + state->Const.MaxFragmentAtomicCounters); + add_const("gl_MaxCombinedAtomicCounters", + state->Const.MaxCombinedAtomicCounters); + add_const("gl_MaxAtomicCounterBindings", + state->Const.MaxAtomicBufferBindings); + + if (state->has_geometry_shader()) { + add_const("gl_MaxGeometryAtomicCounters", + state->Const.MaxGeometryAtomicCounters); + } + if (!state->es_shader) { + add_const("gl_MaxTessControlAtomicCounters", + state->Const.MaxTessControlAtomicCounters); + add_const("gl_MaxTessEvaluationAtomicCounters", + state->Const.MaxTessEvaluationAtomicCounters); + } + } + + if (state->is_version(420, 310)) { + add_const("gl_MaxVertexAtomicCounterBuffers", + state->Const.MaxVertexAtomicCounterBuffers); + add_const("gl_MaxFragmentAtomicCounterBuffers", + state->Const.MaxFragmentAtomicCounterBuffers); + add_const("gl_MaxCombinedAtomicCounterBuffers", + state->Const.MaxCombinedAtomicCounterBuffers); + add_const("gl_MaxAtomicCounterBufferSize", + state->Const.MaxAtomicCounterBufferSize); + + if (state->has_geometry_shader()) { + add_const("gl_MaxGeometryAtomicCounterBuffers", + state->Const.MaxGeometryAtomicCounterBuffers); + } + if (!state->es_shader) { + add_const("gl_MaxTessControlAtomicCounterBuffers", + state->Const.MaxTessControlAtomicCounterBuffers); + add_const("gl_MaxTessEvaluationAtomicCounterBuffers", + state->Const.MaxTessEvaluationAtomicCounterBuffers); + } + } + + if (state->is_version(430, 310) || state->ARB_compute_shader_enable) { + add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS); + add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS); + add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS); + add_const("gl_MaxComputeTextureImageUnits", MAX_COMPUTE_TEXTURE_IMAGE_UNITS); + add_const("gl_MaxComputeUniformComponents", MAX_COMPUTE_UNIFORM_COMPONENTS); + + add_const_ivec3("gl_MaxComputeWorkGroupCount", + state->Const.MaxComputeWorkGroupCount[0], + state->Const.MaxComputeWorkGroupCount[1], + state->Const.MaxComputeWorkGroupCount[2]); + add_const_ivec3("gl_MaxComputeWorkGroupSize", + state->Const.MaxComputeWorkGroupSize[0], + state->Const.MaxComputeWorkGroupSize[1], + state->Const.MaxComputeWorkGroupSize[2]); + + /* From the GLSL 4.40 spec, section 7.1 (Built-In Language Variables): + * + * The built-in constant gl_WorkGroupSize is a compute-shader + * constant containing the local work-group size of the shader. The + * size of the work group in the X, Y, and Z dimensions is stored in + * the x, y, and z components. The constants values in + * gl_WorkGroupSize will match those specified in the required + * local_size_x, local_size_y, and local_size_z layout qualifiers + * for the current shader. This is a constant so that it can be + * used to size arrays of memory that can be shared within the local + * work group. It is a compile-time error to use gl_WorkGroupSize + * in a shader that does not declare a fixed local group size, or + * before that shader has declared a fixed local group size, using + * local_size_x, local_size_y, and local_size_z. + * + * To prevent the shader from trying to refer to gl_WorkGroupSize before + * the layout declaration, we don't define it here. Intead we define it + * in ast_cs_input_layout::hir(). + */ + } + + if (state->is_version(420, 310) || + state->ARB_shader_image_load_store_enable) { + add_const("gl_MaxImageUnits", + state->Const.MaxImageUnits); + add_const("gl_MaxVertexImageUniforms", + state->Const.MaxVertexImageUniforms); + add_const("gl_MaxFragmentImageUniforms", + state->Const.MaxFragmentImageUniforms); + add_const("gl_MaxCombinedImageUniforms", + state->Const.MaxCombinedImageUniforms); + + if (state->has_geometry_shader()) { + add_const("gl_MaxGeometryImageUniforms", + state->Const.MaxGeometryImageUniforms); + } + + if (!state->es_shader) { + add_const("gl_MaxCombinedImageUnitsAndFragmentOutputs", + state->Const.MaxCombinedShaderOutputResources); + add_const("gl_MaxImageSamples", + state->Const.MaxImageSamples); + } + + if (state->is_version(450, 310)) { + add_const("gl_MaxCombinedShaderOutputResources", + state->Const.MaxCombinedShaderOutputResources); + } + + if (state->is_version(400, 0) || + state->ARB_tessellation_shader_enable) { + add_const("gl_MaxTessControlImageUniforms", + state->Const.MaxTessControlImageUniforms); + add_const("gl_MaxTessEvaluationImageUniforms", + state->Const.MaxTessEvaluationImageUniforms); + } + } + + if (state->is_version(410, 0) || + state->ARB_viewport_array_enable) + add_const("gl_MaxViewports", state->Const.MaxViewports); + + if (state->is_version(400, 0) || + state->ARB_tessellation_shader_enable) { + add_const("gl_MaxPatchVertices", state->Const.MaxPatchVertices); + add_const("gl_MaxTessGenLevel", state->Const.MaxTessGenLevel); + add_const("gl_MaxTessControlInputComponents", state->Const.MaxTessControlInputComponents); + add_const("gl_MaxTessControlOutputComponents", state->Const.MaxTessControlOutputComponents); + add_const("gl_MaxTessControlTextureImageUnits", state->Const.MaxTessControlTextureImageUnits); + add_const("gl_MaxTessEvaluationInputComponents", state->Const.MaxTessEvaluationInputComponents); + add_const("gl_MaxTessEvaluationOutputComponents", state->Const.MaxTessEvaluationOutputComponents); + add_const("gl_MaxTessEvaluationTextureImageUnits", state->Const.MaxTessEvaluationTextureImageUnits); + add_const("gl_MaxTessPatchComponents", state->Const.MaxTessPatchComponents); + add_const("gl_MaxTessControlTotalOutputComponents", state->Const.MaxTessControlTotalOutputComponents); + add_const("gl_MaxTessControlUniformComponents", state->Const.MaxTessControlUniformComponents); + add_const("gl_MaxTessEvaluationUniformComponents", state->Const.MaxTessEvaluationUniformComponents); + } +} + + +/** + * Generate uniform variables (which exist in all types of shaders). + */ +void +builtin_variable_generator::generate_uniforms() +{ + if (state->is_version(400, 0) || state->ARB_sample_shading_enable) + add_uniform(int_t, "gl_NumSamples"); + add_uniform(type("gl_DepthRangeParameters"), "gl_DepthRange"); + add_uniform(array(vec4_t, VERT_ATTRIB_MAX), "gl_CurrentAttribVertMESA"); + add_uniform(array(vec4_t, VARYING_SLOT_MAX), "gl_CurrentAttribFragMESA"); + + if (compatibility) { + add_uniform(mat4_t, "gl_ModelViewMatrix"); + add_uniform(mat4_t, "gl_ProjectionMatrix"); + add_uniform(mat4_t, "gl_ModelViewProjectionMatrix"); + add_uniform(mat3_t, "gl_NormalMatrix"); + add_uniform(mat4_t, "gl_ModelViewMatrixInverse"); + add_uniform(mat4_t, "gl_ProjectionMatrixInverse"); + add_uniform(mat4_t, "gl_ModelViewProjectionMatrixInverse"); + add_uniform(mat4_t, "gl_ModelViewMatrixTranspose"); + add_uniform(mat4_t, "gl_ProjectionMatrixTranspose"); + add_uniform(mat4_t, "gl_ModelViewProjectionMatrixTranspose"); + add_uniform(mat4_t, "gl_ModelViewMatrixInverseTranspose"); + add_uniform(mat4_t, "gl_ProjectionMatrixInverseTranspose"); + add_uniform(mat4_t, "gl_ModelViewProjectionMatrixInverseTranspose"); + add_uniform(float_t, "gl_NormalScale"); + add_uniform(type("gl_LightModelParameters"), "gl_LightModel"); + add_uniform(vec4_t, "gl_FogParamsOptimizedMESA"); + + const glsl_type *const mat4_array_type = + array(mat4_t, state->Const.MaxTextureCoords); + add_uniform(mat4_array_type, "gl_TextureMatrix"); + add_uniform(mat4_array_type, "gl_TextureMatrixInverse"); + add_uniform(mat4_array_type, "gl_TextureMatrixTranspose"); + add_uniform(mat4_array_type, "gl_TextureMatrixInverseTranspose"); + + add_uniform(array(vec4_t, state->Const.MaxClipPlanes), "gl_ClipPlane"); + add_uniform(type("gl_PointParameters"), "gl_Point"); + + const glsl_type *const material_parameters_type = + type("gl_MaterialParameters"); + add_uniform(material_parameters_type, "gl_FrontMaterial"); + add_uniform(material_parameters_type, "gl_BackMaterial"); + + add_uniform(array(type("gl_LightSourceParameters"), + state->Const.MaxLights), + "gl_LightSource"); + + const glsl_type *const light_model_products_type = + type("gl_LightModelProducts"); + add_uniform(light_model_products_type, "gl_FrontLightModelProduct"); + add_uniform(light_model_products_type, "gl_BackLightModelProduct"); + + const glsl_type *const light_products_type = + array(type("gl_LightProducts"), state->Const.MaxLights); + add_uniform(light_products_type, "gl_FrontLightProduct"); + add_uniform(light_products_type, "gl_BackLightProduct"); + + add_uniform(array(vec4_t, state->Const.MaxTextureUnits), + "gl_TextureEnvColor"); + + const glsl_type *const texcoords_vec4 = + array(vec4_t, state->Const.MaxTextureCoords); + add_uniform(texcoords_vec4, "gl_EyePlaneS"); + add_uniform(texcoords_vec4, "gl_EyePlaneT"); + add_uniform(texcoords_vec4, "gl_EyePlaneR"); + add_uniform(texcoords_vec4, "gl_EyePlaneQ"); + add_uniform(texcoords_vec4, "gl_ObjectPlaneS"); + add_uniform(texcoords_vec4, "gl_ObjectPlaneT"); + add_uniform(texcoords_vec4, "gl_ObjectPlaneR"); + add_uniform(texcoords_vec4, "gl_ObjectPlaneQ"); + + add_uniform(type("gl_FogParameters"), "gl_Fog"); + } +} + + +/** + * Generate variables which only exist in vertex shaders. + */ +void +builtin_variable_generator::generate_vs_special_vars() +{ + ir_variable *var; + + if (state->is_version(130, 300)) + add_system_value(SYSTEM_VALUE_VERTEX_ID, int_t, "gl_VertexID"); + if (state->ARB_draw_instanced_enable) + add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB"); + if (state->ARB_draw_instanced_enable || state->is_version(140, 300)) + add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID"); + if (state->ARB_shader_draw_parameters_enable) { + add_system_value(SYSTEM_VALUE_BASE_VERTEX, int_t, "gl_BaseVertexARB"); + add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, "gl_BaseInstanceARB"); + add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB"); + } + if (state->AMD_vertex_shader_layer_enable) { + var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + if (state->AMD_vertex_shader_viewport_index_enable) { + var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + if (compatibility) { + add_input(VERT_ATTRIB_POS, vec4_t, "gl_Vertex"); + add_input(VERT_ATTRIB_NORMAL, vec3_t, "gl_Normal"); + add_input(VERT_ATTRIB_COLOR0, vec4_t, "gl_Color"); + add_input(VERT_ATTRIB_COLOR1, vec4_t, "gl_SecondaryColor"); + add_input(VERT_ATTRIB_TEX0, vec4_t, "gl_MultiTexCoord0"); + add_input(VERT_ATTRIB_TEX1, vec4_t, "gl_MultiTexCoord1"); + add_input(VERT_ATTRIB_TEX2, vec4_t, "gl_MultiTexCoord2"); + add_input(VERT_ATTRIB_TEX3, vec4_t, "gl_MultiTexCoord3"); + add_input(VERT_ATTRIB_TEX4, vec4_t, "gl_MultiTexCoord4"); + add_input(VERT_ATTRIB_TEX5, vec4_t, "gl_MultiTexCoord5"); + add_input(VERT_ATTRIB_TEX6, vec4_t, "gl_MultiTexCoord6"); + add_input(VERT_ATTRIB_TEX7, vec4_t, "gl_MultiTexCoord7"); + add_input(VERT_ATTRIB_FOG, float_t, "gl_FogCoord"); + } +} + + +/** + * Generate variables which only exist in tessellation control shaders. + */ +void +builtin_variable_generator::generate_tcs_special_vars() +{ + add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn"); + add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); + + add_output(VARYING_SLOT_TESS_LEVEL_OUTER, array(float_t, 4), + "gl_TessLevelOuter")->data.patch = 1; + add_output(VARYING_SLOT_TESS_LEVEL_INNER, array(float_t, 2), + "gl_TessLevelInner")->data.patch = 1; +} + + +/** + * Generate variables which only exist in tessellation evaluation shaders. + */ +void +builtin_variable_generator::generate_tes_special_vars() +{ + add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn"); + add_system_value(SYSTEM_VALUE_TESS_COORD, vec3_t, "gl_TessCoord"); + add_system_value(SYSTEM_VALUE_TESS_LEVEL_OUTER, array(float_t, 4), + "gl_TessLevelOuter"); + add_system_value(SYSTEM_VALUE_TESS_LEVEL_INNER, array(float_t, 2), + "gl_TessLevelInner"); +} + + +/** + * Generate variables which only exist in geometry shaders. + */ +void +builtin_variable_generator::generate_gs_special_vars() +{ + ir_variable *var; + + var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + if (state->is_version(410, 0) || state->ARB_viewport_array_enable) { + var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) + add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); + + /* Although gl_PrimitiveID appears in tessellation control and tessellation + * evaluation shaders, it has a different function there than it has in + * geometry shaders, so we treat it (and its counterpart gl_PrimitiveIDIn) + * as special geometry shader variables. + * + * Note that although the general convention of suffixing geometry shader + * input varyings with "In" was not adopted into GLSL 1.50, it is used in + * the specific case of gl_PrimitiveIDIn. So we don't need to treat + * gl_PrimitiveIDIn as an {ARB,EXT}_geometry_shader4-only variable. + */ + var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveIDIn"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + var = add_output(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; +} + + +/** + * Generate variables which only exist in fragment shaders. + */ +void +builtin_variable_generator::generate_fs_special_vars() +{ + ir_variable *var; + + if (this->state->ctx->Const.GLSLFragCoordIsSysVal) + add_system_value(SYSTEM_VALUE_FRAG_COORD, vec4_t, "gl_FragCoord"); + else + add_input(VARYING_SLOT_POS, vec4_t, "gl_FragCoord"); + + if (this->state->ctx->Const.GLSLFrontFacingIsSysVal) + add_system_value(SYSTEM_VALUE_FRONT_FACE, bool_t, "gl_FrontFacing"); + else + add_input(VARYING_SLOT_FACE, bool_t, "gl_FrontFacing"); + + if (state->is_version(120, 100)) + add_input(VARYING_SLOT_PNTC, vec2_t, "gl_PointCoord"); + + if (state->has_geometry_shader()) { + var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + + /* gl_FragColor and gl_FragData were deprecated starting in desktop GLSL + * 1.30, and were relegated to the compatibility profile in GLSL 4.20. + * They were removed from GLSL ES 3.00. + */ + if (compatibility || !state->is_version(420, 300)) { + add_output(FRAG_RESULT_COLOR, vec4_t, "gl_FragColor"); + add_output(FRAG_RESULT_DATA0, + array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData"); + } + + if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) { + /* We make an assumption here that there will only ever be one dual-source draw buffer + * In case this assumption is ever proven to be false, make sure to assert here + * since we don't handle this case. + * In practice, this issue will never arise since no hardware will support it. + */ + assert(state->Const.MaxDualSourceDrawBuffers <= 1); + add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT"); + add_index_output(FRAG_RESULT_DATA0, 1, + array(vec4_t, state->Const.MaxDualSourceDrawBuffers), + "gl_SecondaryFragDataEXT"); + } + + /* gl_FragDepth has always been in desktop GLSL, but did not appear in GLSL + * ES 1.00. + */ + if (state->is_version(110, 300)) + add_output(FRAG_RESULT_DEPTH, float_t, "gl_FragDepth"); + + if (state->ARB_shader_stencil_export_enable) { + ir_variable *const var = + add_output(FRAG_RESULT_STENCIL, int_t, "gl_FragStencilRefARB"); + if (state->ARB_shader_stencil_export_warn) + var->enable_extension_warning("GL_ARB_shader_stencil_export"); + } + + if (state->AMD_shader_stencil_export_enable) { + ir_variable *const var = + add_output(FRAG_RESULT_STENCIL, int_t, "gl_FragStencilRefAMD"); + if (state->AMD_shader_stencil_export_warn) + var->enable_extension_warning("GL_AMD_shader_stencil_export"); + } + + if (state->is_version(400, 0) || state->ARB_sample_shading_enable) { + add_system_value(SYSTEM_VALUE_SAMPLE_ID, int_t, "gl_SampleID"); + add_system_value(SYSTEM_VALUE_SAMPLE_POS, vec2_t, "gl_SamplePosition"); + /* From the ARB_sample_shading specification: + * "The number of elements in the array is ceil(/32), where + * is the maximum number of color samples supported by the + * implementation." + * Since no drivers expose more than 32x MSAA, we can simply set + * the array size to 1 rather than computing it. + */ + add_output(FRAG_RESULT_SAMPLE_MASK, array(int_t, 1), "gl_SampleMask"); + } + + if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) { + add_system_value(SYSTEM_VALUE_SAMPLE_MASK_IN, array(int_t, 1), "gl_SampleMaskIn"); + } + + if (state->is_version(430, 0) || state->ARB_fragment_layer_viewport_enable) { + var = add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + var = add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + + if (state->is_version(450, 310)/* || state->ARB_ES3_1_compatibility_enable*/) + add_system_value(SYSTEM_VALUE_HELPER_INVOCATION, bool_t, "gl_HelperInvocation"); +} + + +/** + * Generate variables which only exist in compute shaders. + */ +void +builtin_variable_generator::generate_cs_special_vars() +{ + add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, uvec3_t, + "gl_LocalInvocationID"); + add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID"); + add_system_value(SYSTEM_VALUE_NUM_WORK_GROUPS, uvec3_t, "gl_NumWorkGroups"); + add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0); + add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0); +} + + +/** + * Add a single "varying" variable. The variable's type and direction (input + * or output) are adjusted as appropriate for the type of shader being + * compiled. + */ +void +builtin_variable_generator::add_varying(int slot, const glsl_type *type, + const char *name) +{ + switch (state->stage) { + case MESA_SHADER_TESS_CTRL: + case MESA_SHADER_TESS_EVAL: + case MESA_SHADER_GEOMETRY: + this->per_vertex_in.add_field(slot, type, name); + /* FALLTHROUGH */ + case MESA_SHADER_VERTEX: + this->per_vertex_out.add_field(slot, type, name); + break; + case MESA_SHADER_FRAGMENT: + add_input(slot, type, name); + break; + case MESA_SHADER_COMPUTE: + /* Compute shaders don't have varyings. */ + break; + } +} + + +/** + * Generate variables that are used to communicate data from one shader stage + * to the next ("varyings"). + */ +void +builtin_variable_generator::generate_varyings() +{ + /* gl_Position and gl_PointSize are not visible from fragment shaders. */ + if (state->stage != MESA_SHADER_FRAGMENT) { + add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position"); + add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize"); + } + + if (state->is_version(130, 0)) { + add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0), + "gl_ClipDistance"); + } + + if (compatibility) { + add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0), "gl_TexCoord"); + add_varying(VARYING_SLOT_FOGC, float_t, "gl_FogFragCoord"); + if (state->stage == MESA_SHADER_FRAGMENT) { + add_varying(VARYING_SLOT_COL0, vec4_t, "gl_Color"); + add_varying(VARYING_SLOT_COL1, vec4_t, "gl_SecondaryColor"); + } else { + add_varying(VARYING_SLOT_CLIP_VERTEX, vec4_t, "gl_ClipVertex"); + add_varying(VARYING_SLOT_COL0, vec4_t, "gl_FrontColor"); + add_varying(VARYING_SLOT_BFC0, vec4_t, "gl_BackColor"); + add_varying(VARYING_SLOT_COL1, vec4_t, "gl_FrontSecondaryColor"); + add_varying(VARYING_SLOT_BFC1, vec4_t, "gl_BackSecondaryColor"); + } + } + + /* Section 7.1 (Built-In Language Variables) of the GLSL 4.00 spec + * says: + * + * "In the tessellation control language, built-in variables are + * intrinsically declared as: + * + * in gl_PerVertex { + * vec4 gl_Position; + * float gl_PointSize; + * float gl_ClipDistance[]; + * } gl_in[gl_MaxPatchVertices];" + */ + if (state->stage == MESA_SHADER_TESS_CTRL || + state->stage == MESA_SHADER_TESS_EVAL) { + const glsl_type *per_vertex_in_type = + this->per_vertex_in.construct_interface_instance(); + add_variable("gl_in", array(per_vertex_in_type, state->Const.MaxPatchVertices), + ir_var_shader_in, -1); + } + if (state->stage == MESA_SHADER_GEOMETRY) { + const glsl_type *per_vertex_in_type = + this->per_vertex_in.construct_interface_instance(); + add_variable("gl_in", array(per_vertex_in_type, 0), + ir_var_shader_in, -1); + } + if (state->stage == MESA_SHADER_TESS_CTRL) { + const glsl_type *per_vertex_out_type = + this->per_vertex_out.construct_interface_instance(); + add_variable("gl_out", array(per_vertex_out_type, 0), + ir_var_shader_out, -1); + } + if (state->stage == MESA_SHADER_VERTEX || + state->stage == MESA_SHADER_TESS_EVAL || + state->stage == MESA_SHADER_GEOMETRY) { + const glsl_type *per_vertex_out_type = + this->per_vertex_out.construct_interface_instance(); + const glsl_struct_field *fields = per_vertex_out_type->fields.structure; + for (unsigned i = 0; i < per_vertex_out_type->length; i++) { + ir_variable *var = + add_variable(fields[i].name, fields[i].type, ir_var_shader_out, + fields[i].location); + var->data.interpolation = fields[i].interpolation; + var->data.centroid = fields[i].centroid; + var->data.sample = fields[i].sample; + var->data.patch = fields[i].patch; + var->data.precision = fields[i].precision; + var->init_interface_type(per_vertex_out_type); + } + } +} + + +}; /* Anonymous namespace */ + + +void +_mesa_glsl_initialize_variables(exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + builtin_variable_generator gen(instructions, state); + + gen.generate_constants(); + gen.generate_uniforms(); + + gen.generate_varyings(); + + switch (state->stage) { + case MESA_SHADER_VERTEX: + gen.generate_vs_special_vars(); + break; + case MESA_SHADER_TESS_CTRL: + gen.generate_tcs_special_vars(); + break; + case MESA_SHADER_TESS_EVAL: + gen.generate_tes_special_vars(); + break; + case MESA_SHADER_GEOMETRY: + gen.generate_gs_special_vars(); + break; + case MESA_SHADER_FRAGMENT: + gen.generate_fs_special_vars(); + break; + case MESA_SHADER_COMPUTE: + gen.generate_cs_special_vars(); + break; + } +} + + +/** + * Initialize compute shader variables with values that are derived from other + * compute shader variable. + */ +static void +initialize_cs_derived_variables(gl_shader *shader, + ir_function_signature *const main_sig) +{ + assert(shader->Stage == MESA_SHADER_COMPUTE); + + ir_variable *gl_GlobalInvocationID = + shader->symbols->get_variable("gl_GlobalInvocationID"); + assert(gl_GlobalInvocationID); + ir_variable *gl_WorkGroupID = + shader->symbols->get_variable("gl_WorkGroupID"); + assert(gl_WorkGroupID); + ir_variable *gl_WorkGroupSize = + shader->symbols->get_variable("gl_WorkGroupSize"); + if (gl_WorkGroupSize == NULL) { + void *const mem_ctx = ralloc_parent(shader->ir); + gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type, + "gl_WorkGroupSize", + ir_var_auto); + gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly; + gl_WorkGroupSize->data.read_only = true; + shader->ir->push_head(gl_WorkGroupSize); + } + ir_variable *gl_LocalInvocationID = + shader->symbols->get_variable("gl_LocalInvocationID"); + assert(gl_LocalInvocationID); + + /* gl_GlobalInvocationID = + * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID + */ + ir_instruction *inst = + assign(gl_GlobalInvocationID, + add(mul(gl_WorkGroupID, gl_WorkGroupSize), + gl_LocalInvocationID)); + main_sig->body.push_head(inst); + + /* gl_LocalInvocationIndex = + * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + + * gl_LocalInvocationID.y * gl_WorkGroupSize.x + + * gl_LocalInvocationID.x; + */ + ir_expression *index_z = + mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)), + swizzle_y(gl_WorkGroupSize)); + ir_expression *index_y = + mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)); + ir_expression *index_y_plus_z = add(index_y, index_z); + operand index_x(swizzle_x(gl_LocalInvocationID)); + ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x); + ir_variable *gl_LocalInvocationIndex = + shader->symbols->get_variable("gl_LocalInvocationIndex"); + assert(gl_LocalInvocationIndex); + inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z); + main_sig->body.push_head(inst); +} + + +/** + * Initialize builtin variables with values based on other builtin variables. + * These are initialized in the main function. + */ +void +_mesa_glsl_initialize_derived_variables(gl_shader *shader) +{ + /* We only need to set CS variables currently. */ + if (shader->Stage != MESA_SHADER_COMPUTE) + return; + + ir_function_signature *const main_sig = + _mesa_get_main_function_signature(shader); + if (main_sig == NULL) + return; + + initialize_cs_derived_variables(shader, main_sig); +} diff --git a/src/compiler/glsl/glcpp/.gitignore b/src/compiler/glsl/glcpp/.gitignore new file mode 100644 index 00000000000..24a7119caa4 --- /dev/null +++ b/src/compiler/glsl/glcpp/.gitignore @@ -0,0 +1,6 @@ +glcpp +glcpp-lex.c +glcpp-parse.output +glcpp-parse.c +glcpp-parse.h +tests/*.out diff --git a/src/compiler/glsl/glcpp/README b/src/compiler/glsl/glcpp/README new file mode 100644 index 00000000000..0637935e28b --- /dev/null +++ b/src/compiler/glsl/glcpp/README @@ -0,0 +1,30 @@ +glcpp -- GLSL "C" preprocessor + +This is a simple preprocessor designed to provide the preprocessing +needs of the GLSL language. The requirements for this preprocessor are +specified in the GLSL 1.30 specification availble from: + +http://www.opengl.org/registry/doc/GLSLangSpec.Full.1.30.10.pdf + +This specification is not precise on some semantics, (for example, +#define and #if), defining these merely "as is standard for C++ +preprocessors". To fill in these details, I've been using a draft of +the C99 standard as available from: + +http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf + +Any downstream compiler accepting output from glcpp should be prepared +to encounter and deal with the following preprocessor macros: + + #line + #pragma + #extension + +All other macros will be handled according to the GLSL specification +and will not appear in the output. + +Known limitations +----------------- +A file that ends with a function-like macro name as the last +non-whitespace token will result in a parse error, (where it should be +passed through as is). \ No newline at end of file diff --git a/src/compiler/glsl/glcpp/glcpp-lex.l b/src/compiler/glsl/glcpp/glcpp-lex.l new file mode 100644 index 00000000000..fa9aa506912 --- /dev/null +++ b/src/compiler/glsl/glcpp/glcpp-lex.l @@ -0,0 +1,577 @@ +%{ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "glcpp.h" +#include "glcpp-parse.h" + +/* Flex annoyingly generates some functions without making them + * static. Let's declare them here. */ +int glcpp_get_column (yyscan_t yyscanner); +void glcpp_set_column (int column_no , yyscan_t yyscanner); + +#ifdef _MSC_VER +#define YY_NO_UNISTD_H +#endif + +#define YY_NO_INPUT + +#define YY_USER_ACTION \ + do { \ + if (parser->has_new_line_number) \ + yylineno = parser->new_line_number; \ + if (parser->has_new_source_number) \ + yylloc->source = parser->new_source_number; \ + yylloc->first_column = yycolumn + 1; \ + yylloc->first_line = yylloc->last_line = yylineno; \ + yycolumn += yyleng; \ + yylloc->last_column = yycolumn + 1; \ + parser->has_new_line_number = 0; \ + parser->has_new_source_number = 0; \ + } while(0); + +#define YY_USER_INIT \ + do { \ + yylineno = 1; \ + yycolumn = 0; \ + yylloc->source = 0; \ + } while(0) + +/* It's ugly to have macros that have return statements inside of + * them, but flex-based lexer generation is all built around the + * return statement. + * + * To mitigate the ugliness, we defer as much of the logic as possible + * to an actual function, not a macro (see + * glcpplex_update_state_per_token) and we make the word RETURN + * prominent in all of the macros which may return. + * + * The most-commonly-used macro is RETURN_TOKEN which will perform all + * necessary state updates based on the provided token,, then + * conditionally return the token. It will not return a token if the + * parser is currently skipping tokens, (such as within #if + * 0...#else). + * + * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that + * makes the token returning unconditional. This is needed for things + * like #if and the tokens of its condition, (since these must be + * evaluated by the parser even when otherwise skipping). + * + * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top + * of RETURN_TOKEN that performs a string copy of yytext before the + * return. + */ +#define RETURN_TOKEN_NEVER_SKIP(token) \ + do { \ + if (glcpp_lex_update_state_per_token (parser, token)) \ + return token; \ + } while (0) + +#define RETURN_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + RETURN_TOKEN_NEVER_SKIP(token); \ + } \ + } while(0) + +#define RETURN_STRING_TOKEN(token) \ + do { \ + if (! parser->skipping) { \ + yylval->str = ralloc_strdup (yyextra, yytext); \ + RETURN_TOKEN_NEVER_SKIP (token); \ + } \ + } while(0) + + +/* Update all state necessary for each token being returned. + * + * Here we'll be tracking newlines and spaces so that the lexer can + * alter its behavior as necessary, (for example, '#' has special + * significance if it is the first non-whitespace, non-comment token + * in a line, but does not otherwise). + * + * NOTE: If this function returns FALSE, then no token should be + * returned at all. This is used to suprress duplicate SPACE tokens. + */ +static int +glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token) +{ + /* After the first non-space token in a line, we won't + * allow any '#' to introduce a directive. */ + if (token == NEWLINE) { + parser->first_non_space_token_this_line = 1; + } else if (token != SPACE) { + parser->first_non_space_token_this_line = 0; + } + + /* Track newlines just to know whether a newline needs + * to be inserted if end-of-file comes early. */ + if (token == NEWLINE) { + parser->last_token_was_newline = 1; + } else { + parser->last_token_was_newline = 0; + } + + /* Track spaces to avoid emitting multiple SPACE + * tokens in a row. */ + if (token == SPACE) { + if (! parser->last_token_was_space) { + parser->last_token_was_space = 1; + return 1; + } else { + parser->last_token_was_space = 1; + return 0; + } + } else { + parser->last_token_was_space = 0; + return 1; + } +} + + +%} + +%option bison-bridge bison-locations reentrant noyywrap +%option extra-type="glcpp_parser_t *" +%option prefix="glcpp_" +%option stack +%option never-interactive +%option warn nodefault + + /* Note: When adding any start conditions to this list, you must also + * update the "Internal compiler error" catch-all rule near the end of + * this file. */ + +%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE + +SPACE [[:space:]] +NONSPACE [^[:space:]] +HSPACE [ \t] +HASH # +NEWLINE (\r\n|\n\r|\r|\n) +IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* +PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])* +PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] + +/* The OTHER class is simply a catch-all for things that the CPP +parser just doesn't care about. Since flex regular expressions that +match longer strings take priority over those matching shorter +strings, we have to be careful to avoid OTHER matching and hiding +something that CPP does care about. So we simply exclude all +characters that appear in any other expressions. */ + +OTHER [^][_#[:space:]#a-zA-Z0-9(){}.&*~!/%<>^|;,=+-] + +DIGITS [0-9][0-9]* +DECIMAL_INTEGER [1-9][0-9]*[uU]? +OCTAL_INTEGER 0[0-7]*[uU]? +HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? + +%% + + glcpp_parser_t *parser = yyextra; + + /* When we lex a multi-line comment, we replace it (as + * specified) with a single space. But if the comment spanned + * multiple lines, then subsequent parsing stages will not + * count correct line numbers. To avoid this problem we keep + * track of all newlines that were commented out by a + * multi-line comment, and we emit a NEWLINE token for each at + * the next legal opportunity, (which is when the lexer would + * be emitting a NEWLINE token anyway). + */ + if (YY_START == NEWLINE_CATCHUP) { + if (parser->commented_newlines) + parser->commented_newlines--; + if (parser->commented_newlines == 0) + BEGIN INITIAL; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); + } + + /* Set up the parser->skipping bit here before doing any lexing. + * + * This bit controls whether tokens are skipped, (as implemented by + * RETURN_TOKEN), such as between "#if 0" and "#endif". + * + * The parser maintains a skip_stack indicating whether we should be + * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will + * push and pop items from the stack. + * + * Here are the rules for determining whether we are skipping: + * + * 1. If the skip stack is NULL, we are outside of all #if blocks + * and we are not skipping. + * + * 2. If the skip stack is non-NULL, the type of the top node in + * the stack determines whether to skip. A type of + * SKIP_NO_SKIP is used for blocks wheere we are emitting + * tokens, (such as between #if 1 and #endif, or after the + * #else of an #if 0, etc.). + * + * 3. The lexing_directive bit overrides the skip stack. This bit + * is set when we are actively lexing the expression for a + * pre-processor condition, (such as #if, #elif, or #else). In + * this case, even if otherwise skipping, we need to emit the + * tokens for this condition so that the parser can evaluate + * the expression. (For, #else, there's no expression, but we + * emit tokens so the parser can generate a nice error message + * if there are any tokens here). + */ + if (parser->skip_stack && + parser->skip_stack->type != SKIP_NO_SKIP && + ! parser->lexing_directive) + { + parser->skipping = 1; + } else { + parser->skipping = 0; + } + + /* Single-line comments */ +"//"[^\r\n]* { +} + + /* Multi-line comments */ +"/*" { yy_push_state(COMMENT, yyscanner); } +[^*\r\n]* +[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } +"*"+[^*/\r\n]* +"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } +"*"+"/" { + yy_pop_state(yyscanner); + /* In the start condition, we don't want any SPACE token. */ + if (yyextra->space_tokens && YY_START != HASH) + RETURN_TOKEN (SPACE); +} + +{HASH} { + + /* If the '#' is the first non-whitespace, non-comment token on this + * line, then it introduces a directive, switch to the start + * condition. + * + * Otherwise, this is just punctuation, so return the HASH_TOKEN + * token. */ + if (parser->first_non_space_token_this_line) { + BEGIN HASH; + } + + RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN); +} + +version{HSPACE}+ { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_STRING_TOKEN (VERSION_TOKEN); +} + + /* Swallow empty #pragma directives, (to avoid confusing the + * downstream compiler). + * + * Note: We use a simple regular expression for the lookahead + * here. Specifically, we cannot use the complete {NEWLINE} expression + * since it uses alternation and we've found that there's a flex bug + * where using alternation in the lookahead portion of a pattern + * triggers a buffer overrun. */ +pragma{HSPACE}*/[\r\n] { + BEGIN INITIAL; +} + + /* glcpp doesn't handle #extension, #version, or #pragma directives. + * Simply pass them through to the main compiler's lexer/parser. */ +(extension|pragma)[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (PRAGMA); +} + +line{HSPACE}+ { + BEGIN INITIAL; + RETURN_TOKEN (LINE); +} + +{NEWLINE} { + BEGIN INITIAL; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); +} + + /* For the pre-processor directives, we return these tokens + * even when we are otherwise skipping. */ +ifdef { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (IFDEF); +} + +ifndef { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (IFNDEF); +} + +if/[^_a-zA-Z0-9] { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (IF); +} + +elif/[^_a-zA-Z0-9] { + BEGIN INITIAL; + yyextra->lexing_directive = 1; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ELIF); +} + +else { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ELSE); +} + +endif { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN_NEVER_SKIP (ENDIF); +} + +error[^\r\n]* { + BEGIN INITIAL; + RETURN_STRING_TOKEN (ERROR_TOKEN); +} + + /* After we see a "#define" we enter the start state + * for the lexer. Within we are looking for the first + * identifier and specifically checking whether the identifier + * is followed by a '(' or not, (to lex either a + * FUNC_IDENTIFIER or an OBJ_IDENITIFIER token). + * + * While in the state we also need to explicitly + * handle a few other things that may appear before the + * identifier: + * + * * Comments, (handled above with the main support for + * comments). + * + * * Whitespace (simply ignored) + * + * * Anything else, (not an identifier, not a comment, + * and not whitespace). This will generate an error. + */ +define{HSPACE}* { + if (! parser->skipping) { + BEGIN DEFINE; + yyextra->space_tokens = 0; + RETURN_TOKEN (DEFINE_TOKEN); + } +} + +undef { + BEGIN INITIAL; + yyextra->space_tokens = 0; + RETURN_TOKEN (UNDEF); +} + +{HSPACE}+ { + /* Nothing to do here. Importantly, don't leave the + * start condition, since it's legal to have space between the + * '#' and the directive.. */ +} + + /* This will catch any non-directive garbage after a HASH */ +{NONSPACE} { + BEGIN INITIAL; + RETURN_TOKEN (GARBAGE); +} + + /* An identifier immediately followed by '(' */ +{IDENTIFIER}/"(" { + BEGIN INITIAL; + RETURN_STRING_TOKEN (FUNC_IDENTIFIER); +} + + /* An identifier not immediately followed by '(' */ +{IDENTIFIER} { + BEGIN INITIAL; + RETURN_STRING_TOKEN (OBJ_IDENTIFIER); +} + + /* Whitespace */ +{HSPACE}+ { + /* Just ignore it. Nothing to do here. */ +} + + /* '/' not followed by '*', so not a comment. This is an error. */ +[/][^*]{NONSPACE}* { + BEGIN INITIAL; + glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); + RETURN_STRING_TOKEN (INTEGER_STRING); +} + + /* A character that can't start an identifier, comment, or + * space. This is an error. */ +[^_a-zA-Z/[:space:]]{NONSPACE}* { + BEGIN INITIAL; + glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); + RETURN_STRING_TOKEN (INTEGER_STRING); +} + +{DECIMAL_INTEGER} { + RETURN_STRING_TOKEN (INTEGER_STRING); +} + +{OCTAL_INTEGER} { + RETURN_STRING_TOKEN (INTEGER_STRING); +} + +{HEXADECIMAL_INTEGER} { + RETURN_STRING_TOKEN (INTEGER_STRING); +} + +"<<" { + RETURN_TOKEN (LEFT_SHIFT); +} + +">>" { + RETURN_TOKEN (RIGHT_SHIFT); +} + +"<=" { + RETURN_TOKEN (LESS_OR_EQUAL); +} + +">=" { + RETURN_TOKEN (GREATER_OR_EQUAL); +} + +"==" { + RETURN_TOKEN (EQUAL); +} + +"!=" { + RETURN_TOKEN (NOT_EQUAL); +} + +"&&" { + RETURN_TOKEN (AND); +} + +"||" { + RETURN_TOKEN (OR); +} + +"++" { + RETURN_TOKEN (PLUS_PLUS); +} + +"--" { + RETURN_TOKEN (MINUS_MINUS); +} + +"##" { + if (! parser->skipping) { + if (parser->is_gles) + glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES"); + RETURN_TOKEN (PASTE); + } +} + +"defined" { + RETURN_TOKEN (DEFINED); +} + +{IDENTIFIER} { + RETURN_STRING_TOKEN (IDENTIFIER); +} + +{PP_NUMBER} { + RETURN_STRING_TOKEN (OTHER); +} + +{PUNCTUATION} { + RETURN_TOKEN (yytext[0]); +} + +{OTHER}+ { + RETURN_STRING_TOKEN (OTHER); +} + +{HSPACE} { + if (yyextra->space_tokens) { + RETURN_TOKEN (SPACE); + } +} + + /* We preserve all newlines, even between #if 0..#endif, so no + skipping.. */ +<*>{NEWLINE} { + if (parser->commented_newlines) { + BEGIN NEWLINE_CATCHUP; + } else { + BEGIN INITIAL; + } + yyextra->space_tokens = 1; + yyextra->lexing_directive = 0; + yylineno++; + yycolumn = 0; + RETURN_TOKEN_NEVER_SKIP (NEWLINE); +} + +<> { + if (YY_START == COMMENT) + glcpp_error(yylloc, yyextra, "Unterminated comment"); + BEGIN DONE; /* Don't keep matching this rule forever. */ + yyextra->lexing_directive = 0; + if (! parser->last_token_was_newline) + RETURN_TOKEN (NEWLINE); +} + + /* This is a catch-all to avoid the annoying default flex action which + * matches any character and prints it. If any input ever matches this + * rule, then we have made a mistake above and need to fix one or more + * of the preceding patterns to match that input. */ + +<*>. { + glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext); + + /* We don't actually use the UNREACHABLE start condition. We + only have this block here so that we can pretend to call some + generated functions, (to avoid "defined but not used" + warnings. */ + if (YY_START == UNREACHABLE) { + unput('.'); + yy_top_state(yyextra); + } +} + +%% + +void +glcpp_lex_set_source_string(glcpp_parser_t *parser, const char *shader) +{ + yy_scan_string(shader, parser->scanner); +} diff --git a/src/compiler/glsl/glcpp/glcpp-parse.y b/src/compiler/glsl/glcpp/glcpp-parse.y new file mode 100644 index 00000000000..ef1a6575aaa --- /dev/null +++ b/src/compiler/glsl/glcpp/glcpp-parse.y @@ -0,0 +1,2557 @@ +%{ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "glcpp.h" +#include "main/core.h" /* for struct gl_extensions */ +#include "main/mtypes.h" /* for gl_api enum */ + +static void +yyerror (YYLTYPE *locp, glcpp_parser_t *parser, const char *error); + +static void +_define_object_macro (glcpp_parser_t *parser, + YYLTYPE *loc, + const char *macro, + token_list_t *replacements); + +static void +_define_function_macro (glcpp_parser_t *parser, + YYLTYPE *loc, + const char *macro, + string_list_t *parameters, + token_list_t *replacements); + +static string_list_t * +_string_list_create (void *ctx); + +static void +_string_list_append_item (string_list_t *list, const char *str); + +static int +_string_list_contains (string_list_t *list, const char *member, int *index); + +static const char * +_string_list_has_duplicate (string_list_t *list); + +static int +_string_list_length (string_list_t *list); + +static int +_string_list_equal (string_list_t *a, string_list_t *b); + +static argument_list_t * +_argument_list_create (void *ctx); + +static void +_argument_list_append (argument_list_t *list, token_list_t *argument); + +static int +_argument_list_length (argument_list_t *list); + +static token_list_t * +_argument_list_member_at (argument_list_t *list, int index); + +/* Note: This function ralloc_steal()s the str pointer. */ +static token_t * +_token_create_str (void *ctx, int type, char *str); + +static token_t * +_token_create_ival (void *ctx, int type, int ival); + +static token_list_t * +_token_list_create (void *ctx); + +static void +_token_list_append (token_list_t *list, token_t *token); + +static void +_token_list_append_list (token_list_t *list, token_list_t *tail); + +static int +_token_list_equal_ignoring_space (token_list_t *a, token_list_t *b); + +static void +_parser_active_list_push (glcpp_parser_t *parser, + const char *identifier, + token_node_t *marker); + +static void +_parser_active_list_pop (glcpp_parser_t *parser); + +static int +_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier); + +typedef enum { + EXPANSION_MODE_IGNORE_DEFINED, + EXPANSION_MODE_EVALUATE_DEFINED +} expansion_mode_t; + +/* Expand list, and begin lexing from the result (after first + * prefixing a token of type 'head_token_type'). + */ +static void +_glcpp_parser_expand_and_lex_from (glcpp_parser_t *parser, + int head_token_type, + token_list_t *list, + expansion_mode_t mode); + +/* Perform macro expansion in-place on the given list. */ +static void +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list, + expansion_mode_t mode); + +static void +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list); + +static void +_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, YYLTYPE *loc, + int condition); + +static void +_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, YYLTYPE *loc, + const char *type, int condition); + +static void +_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser, YYLTYPE *loc); + +static void +_glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t version, + const char *ident, bool explicitly_set); + +static int +glcpp_parser_lex (YYSTYPE *yylval, YYLTYPE *yylloc, glcpp_parser_t *parser); + +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); + +static void +add_builtin_define(glcpp_parser_t *parser, const char *name, int value); + +%} + +%pure-parser +%error-verbose + +%locations +%initial-action { + @$.first_line = 1; + @$.first_column = 1; + @$.last_line = 1; + @$.last_column = 1; + @$.source = 0; +} + +%parse-param {glcpp_parser_t *parser} +%lex-param {glcpp_parser_t *parser} + +%expect 0 + + /* We use HASH_TOKEN, DEFINE_TOKEN and VERSION_TOKEN (as opposed to + * HASH, DEFINE, and VERSION) to avoid conflicts with other symbols, + * (such as the and start conditions in the lexer). */ +%token DEFINED ELIF_EXPANDED HASH_TOKEN DEFINE_TOKEN FUNC_IDENTIFIER OBJ_IDENTIFIER ELIF ELSE ENDIF ERROR_TOKEN IF IFDEF IFNDEF LINE PRAGMA UNDEF VERSION_TOKEN GARBAGE IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING LINE_EXPANDED NEWLINE OTHER PLACEHOLDER SPACE PLUS_PLUS MINUS_MINUS +%token PASTE +%type INTEGER operator SPACE integer_constant +%type expression +%type IDENTIFIER FUNC_IDENTIFIER OBJ_IDENTIFIER INTEGER_STRING OTHER ERROR_TOKEN PRAGMA +%type identifier_list +%type preprocessing_token +%type pp_tokens replacement_list text_line +%left OR +%left AND +%left '|' +%left '^' +%left '&' +%left EQUAL NOT_EQUAL +%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL +%left LEFT_SHIFT RIGHT_SHIFT +%left '+' '-' +%left '*' '/' '%' +%right UNARY + +%debug + +%% + +input: + /* empty */ +| input line +; + +line: + control_line +| SPACE control_line +| text_line { + _glcpp_parser_print_expanded_token_list (parser, $1); + ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n"); + ralloc_free ($1); + } +| expanded_line +; + +expanded_line: + IF_EXPANDED expression NEWLINE { + if (parser->is_gles && $2.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); + _glcpp_parser_skip_stack_push_if (parser, & @1, $2.value); + } +| ELIF_EXPANDED expression NEWLINE { + if (parser->is_gles && $2.undefined_macro) + glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); + _glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value); + } +| LINE_EXPANDED integer_constant NEWLINE { + parser->has_new_line_number = 1; + parser->new_line_number = $2; + ralloc_asprintf_rewrite_tail (&parser->output, + &parser->output_length, + "#line %" PRIiMAX "\n", + $2); + } +| LINE_EXPANDED integer_constant integer_constant NEWLINE { + parser->has_new_line_number = 1; + parser->new_line_number = $2; + parser->has_new_source_number = 1; + parser->new_source_number = $3; + ralloc_asprintf_rewrite_tail (&parser->output, + &parser->output_length, + "#line %" PRIiMAX " %" PRIiMAX "\n", + $2, $3); + } +; + +define: + OBJ_IDENTIFIER replacement_list NEWLINE { + _define_object_macro (parser, & @1, $1, $2); + } +| FUNC_IDENTIFIER '(' ')' replacement_list NEWLINE { + _define_function_macro (parser, & @1, $1, NULL, $4); + } +| FUNC_IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE { + _define_function_macro (parser, & @1, $1, $3, $5); + } +; + +control_line: + control_line_success { + ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n"); + } +| control_line_error +| HASH_TOKEN LINE { + glcpp_parser_resolve_implicit_version(parser); + } pp_tokens NEWLINE { + + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + _glcpp_parser_expand_and_lex_from (parser, + LINE_EXPANDED, $4, + EXPANSION_MODE_IGNORE_DEFINED); + } + } +; + +control_line_success: + HASH_TOKEN DEFINE_TOKEN { + glcpp_parser_resolve_implicit_version(parser); + } define +| HASH_TOKEN UNDEF { + glcpp_parser_resolve_implicit_version(parser); + } IDENTIFIER NEWLINE { + macro_t *macro; + if (strcmp("__LINE__", $4) == 0 + || strcmp("__FILE__", $4) == 0 + || strcmp("__VERSION__", $4) == 0 + || strncmp("GL_", $4, 3) == 0) + glcpp_error(& @1, parser, "Built-in (pre-defined)" + " macro names cannot be undefined."); + + macro = hash_table_find (parser->defines, $4); + if (macro) { + hash_table_remove (parser->defines, $4); + ralloc_free (macro); + } + ralloc_free ($4); + } +| HASH_TOKEN IF { + glcpp_parser_resolve_implicit_version(parser); + } pp_tokens NEWLINE { + /* Be careful to only evaluate the 'if' expression if + * we are not skipping. When we are skipping, we + * simply push a new 0-valued 'if' onto the skip + * stack. + * + * This avoids generating diagnostics for invalid + * expressions that are being skipped. */ + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + _glcpp_parser_expand_and_lex_from (parser, + IF_EXPANDED, $4, + EXPANSION_MODE_EVALUATE_DEFINED); + } + else + { + _glcpp_parser_skip_stack_push_if (parser, & @1, 0); + parser->skip_stack->type = SKIP_TO_ENDIF; + } + } +| HASH_TOKEN IF NEWLINE { + /* #if without an expression is only an error if we + * are not skipping */ + if (parser->skip_stack == NULL || + parser->skip_stack->type == SKIP_NO_SKIP) + { + glcpp_error(& @1, parser, "#if with no expression"); + } + _glcpp_parser_skip_stack_push_if (parser, & @1, 0); + } +| HASH_TOKEN IFDEF { + glcpp_parser_resolve_implicit_version(parser); + } IDENTIFIER junk NEWLINE { + macro_t *macro = hash_table_find (parser->defines, $4); + ralloc_free ($4); + _glcpp_parser_skip_stack_push_if (parser, & @1, macro != NULL); + } +| HASH_TOKEN IFNDEF { + glcpp_parser_resolve_implicit_version(parser); + } IDENTIFIER junk NEWLINE { + macro_t *macro = hash_table_find (parser->defines, $4); + ralloc_free ($4); + _glcpp_parser_skip_stack_push_if (parser, & @3, macro == NULL); + } +| HASH_TOKEN ELIF pp_tokens NEWLINE { + /* Be careful to only evaluate the 'elif' expression + * if we are not skipping. When we are skipping, we + * simply change to a 0-valued 'elif' on the skip + * stack. + * + * This avoids generating diagnostics for invalid + * expressions that are being skipped. */ + if (parser->skip_stack && + parser->skip_stack->type == SKIP_TO_ELSE) + { + _glcpp_parser_expand_and_lex_from (parser, + ELIF_EXPANDED, $3, + EXPANSION_MODE_EVALUATE_DEFINED); + } + else if (parser->skip_stack && + parser->skip_stack->has_else) + { + glcpp_error(& @1, parser, "#elif after #else"); + } + else + { + _glcpp_parser_skip_stack_change_if (parser, & @1, + "elif", 0); + } + } +| HASH_TOKEN ELIF NEWLINE { + /* #elif without an expression is an error unless we + * are skipping. */ + if (parser->skip_stack && + parser->skip_stack->type == SKIP_TO_ELSE) + { + glcpp_error(& @1, parser, "#elif with no expression"); + } + else if (parser->skip_stack && + parser->skip_stack->has_else) + { + glcpp_error(& @1, parser, "#elif after #else"); + } + else + { + _glcpp_parser_skip_stack_change_if (parser, & @1, + "elif", 0); + glcpp_warning(& @1, parser, "ignoring illegal #elif without expression"); + } + } +| HASH_TOKEN ELSE { parser->lexing_directive = 1; } NEWLINE { + if (parser->skip_stack && + parser->skip_stack->has_else) + { + glcpp_error(& @1, parser, "multiple #else"); + } + else + { + _glcpp_parser_skip_stack_change_if (parser, & @1, "else", 1); + if (parser->skip_stack) + parser->skip_stack->has_else = true; + } + } +| HASH_TOKEN ENDIF { + _glcpp_parser_skip_stack_pop (parser, & @1); + } NEWLINE +| HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE { + if (parser->version_resolved) { + glcpp_error(& @1, parser, "#version must appear on the first line"); + } + _glcpp_parser_handle_version_declaration(parser, $3, NULL, true); + } +| HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE { + if (parser->version_resolved) { + glcpp_error(& @1, parser, "#version must appear on the first line"); + } + _glcpp_parser_handle_version_declaration(parser, $3, $4, true); + } +| HASH_TOKEN NEWLINE { + glcpp_parser_resolve_implicit_version(parser); + } +| HASH_TOKEN PRAGMA NEWLINE { + ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "#%s", $2); + } +; + +control_line_error: + HASH_TOKEN ERROR_TOKEN NEWLINE { + glcpp_error(& @1, parser, "#%s", $2); + } +| HASH_TOKEN DEFINE_TOKEN NEWLINE { + glcpp_error (& @1, parser, "#define without macro name"); + } +| HASH_TOKEN GARBAGE pp_tokens NEWLINE { + glcpp_error (& @1, parser, "Illegal non-directive after #"); + } +; + +integer_constant: + INTEGER_STRING { + if (strlen ($1) >= 3 && strncmp ($1, "0x", 2) == 0) { + $$ = strtoll ($1 + 2, NULL, 16); + } else if ($1[0] == '0') { + $$ = strtoll ($1, NULL, 8); + } else { + $$ = strtoll ($1, NULL, 10); + } + } +| INTEGER { + $$ = $1; + } + +expression: + integer_constant { + $$.value = $1; + $$.undefined_macro = NULL; + } +| IDENTIFIER { + $$.value = 0; + if (parser->is_gles) + $$.undefined_macro = ralloc_strdup (parser, $1); + else + $$.undefined_macro = NULL; + } +| expression OR expression { + $$.value = $1.value || $3.value; + + /* Short-circuit: Only flag undefined from right side + * if left side evaluates to false. + */ + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else if (! $1.value) + $$.undefined_macro = $3.undefined_macro; + } +| expression AND expression { + $$.value = $1.value && $3.value; + + /* Short-circuit: Only flag undefined from right-side + * if left side evaluates to true. + */ + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else if ($1.value) + $$.undefined_macro = $3.undefined_macro; + } +| expression '|' expression { + $$.value = $1.value | $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '^' expression { + $$.value = $1.value ^ $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '&' expression { + $$.value = $1.value & $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression NOT_EQUAL expression { + $$.value = $1.value != $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression EQUAL expression { + $$.value = $1.value == $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression GREATER_OR_EQUAL expression { + $$.value = $1.value >= $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression LESS_OR_EQUAL expression { + $$.value = $1.value <= $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '>' expression { + $$.value = $1.value > $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '<' expression { + $$.value = $1.value < $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression RIGHT_SHIFT expression { + $$.value = $1.value >> $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression LEFT_SHIFT expression { + $$.value = $1.value << $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '-' expression { + $$.value = $1.value - $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '+' expression { + $$.value = $1.value + $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '%' expression { + if ($3.value == 0) { + yyerror (& @1, parser, + "zero modulus in preprocessor directive"); + } else { + $$.value = $1.value % $3.value; + } + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '/' expression { + if ($3.value == 0) { + yyerror (& @1, parser, + "division by 0 in preprocessor directive"); + } else { + $$.value = $1.value / $3.value; + } + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| expression '*' expression { + $$.value = $1.value * $3.value; + if ($1.undefined_macro) + $$.undefined_macro = $1.undefined_macro; + else + $$.undefined_macro = $3.undefined_macro; + } +| '!' expression %prec UNARY { + $$.value = ! $2.value; + $$.undefined_macro = $2.undefined_macro; + } +| '~' expression %prec UNARY { + $$.value = ~ $2.value; + $$.undefined_macro = $2.undefined_macro; + } +| '-' expression %prec UNARY { + $$.value = - $2.value; + $$.undefined_macro = $2.undefined_macro; + } +| '+' expression %prec UNARY { + $$.value = + $2.value; + $$.undefined_macro = $2.undefined_macro; + } +| '(' expression ')' { + $$ = $2; + } +; + +identifier_list: + IDENTIFIER { + $$ = _string_list_create (parser); + _string_list_append_item ($$, $1); + ralloc_steal ($$, $1); + } +| identifier_list ',' IDENTIFIER { + $$ = $1; + _string_list_append_item ($$, $3); + ralloc_steal ($$, $3); + } +; + +text_line: + NEWLINE { $$ = NULL; } +| pp_tokens NEWLINE +; + +replacement_list: + /* empty */ { $$ = NULL; } +| pp_tokens +; + +junk: + /* empty */ +| pp_tokens { + glcpp_error(&@1, parser, "extra tokens at end of directive"); + } +; + +pp_tokens: + preprocessing_token { + parser->space_tokens = 1; + $$ = _token_list_create (parser); + _token_list_append ($$, $1); + } +| pp_tokens preprocessing_token { + $$ = $1; + _token_list_append ($$, $2); + } +; + +preprocessing_token: + IDENTIFIER { + $$ = _token_create_str (parser, IDENTIFIER, $1); + $$->location = yylloc; + } +| INTEGER_STRING { + $$ = _token_create_str (parser, INTEGER_STRING, $1); + $$->location = yylloc; + } +| operator { + $$ = _token_create_ival (parser, $1, $1); + $$->location = yylloc; + } +| DEFINED { + $$ = _token_create_ival (parser, DEFINED, DEFINED); + $$->location = yylloc; + } +| OTHER { + $$ = _token_create_str (parser, OTHER, $1); + $$->location = yylloc; + } +| SPACE { + $$ = _token_create_ival (parser, SPACE, SPACE); + $$->location = yylloc; + } +; + +operator: + '[' { $$ = '['; } +| ']' { $$ = ']'; } +| '(' { $$ = '('; } +| ')' { $$ = ')'; } +| '{' { $$ = '{'; } +| '}' { $$ = '}'; } +| '.' { $$ = '.'; } +| '&' { $$ = '&'; } +| '*' { $$ = '*'; } +| '+' { $$ = '+'; } +| '-' { $$ = '-'; } +| '~' { $$ = '~'; } +| '!' { $$ = '!'; } +| '/' { $$ = '/'; } +| '%' { $$ = '%'; } +| LEFT_SHIFT { $$ = LEFT_SHIFT; } +| RIGHT_SHIFT { $$ = RIGHT_SHIFT; } +| '<' { $$ = '<'; } +| '>' { $$ = '>'; } +| LESS_OR_EQUAL { $$ = LESS_OR_EQUAL; } +| GREATER_OR_EQUAL { $$ = GREATER_OR_EQUAL; } +| EQUAL { $$ = EQUAL; } +| NOT_EQUAL { $$ = NOT_EQUAL; } +| '^' { $$ = '^'; } +| '|' { $$ = '|'; } +| AND { $$ = AND; } +| OR { $$ = OR; } +| ';' { $$ = ';'; } +| ',' { $$ = ','; } +| '=' { $$ = '='; } +| PASTE { $$ = PASTE; } +| PLUS_PLUS { $$ = PLUS_PLUS; } +| MINUS_MINUS { $$ = MINUS_MINUS; } +; + +%% + +string_list_t * +_string_list_create (void *ctx) +{ + string_list_t *list; + + list = ralloc (ctx, string_list_t); + list->head = NULL; + list->tail = NULL; + + return list; +} + +void +_string_list_append_item (string_list_t *list, const char *str) +{ + string_node_t *node; + + node = ralloc (list, string_node_t); + node->str = ralloc_strdup (node, str); + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + +int +_string_list_contains (string_list_t *list, const char *member, int *index) +{ + string_node_t *node; + int i; + + if (list == NULL) + return 0; + + for (i = 0, node = list->head; node; i++, node = node->next) { + if (strcmp (node->str, member) == 0) { + if (index) + *index = i; + return 1; + } + } + + return 0; +} + +/* Return duplicate string in list (if any), NULL otherwise. */ +const char * +_string_list_has_duplicate (string_list_t *list) +{ + string_node_t *node, *dup; + + if (list == NULL) + return NULL; + + for (node = list->head; node; node = node->next) { + for (dup = node->next; dup; dup = dup->next) { + if (strcmp (node->str, dup->str) == 0) + return node->str; + } + } + + return NULL; +} + +int +_string_list_length (string_list_t *list) +{ + int length = 0; + string_node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + +int +_string_list_equal (string_list_t *a, string_list_t *b) +{ + string_node_t *node_a, *node_b; + + if (a == NULL && b == NULL) + return 1; + + if (a == NULL || b == NULL) + return 0; + + for (node_a = a->head, node_b = b->head; + node_a && node_b; + node_a = node_a->next, node_b = node_b->next) + { + if (strcmp (node_a->str, node_b->str)) + return 0; + } + + /* Catch the case of lists being different lengths, (which + * would cause the loop above to terminate after the shorter + * list). */ + return node_a == node_b; +} + +argument_list_t * +_argument_list_create (void *ctx) +{ + argument_list_t *list; + + list = ralloc (ctx, argument_list_t); + list->head = NULL; + list->tail = NULL; + + return list; +} + +void +_argument_list_append (argument_list_t *list, token_list_t *argument) +{ + argument_node_t *node; + + node = ralloc (list, argument_node_t); + node->argument = argument; + + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; +} + +int +_argument_list_length (argument_list_t *list) +{ + int length = 0; + argument_node_t *node; + + if (list == NULL) + return 0; + + for (node = list->head; node; node = node->next) + length++; + + return length; +} + +token_list_t * +_argument_list_member_at (argument_list_t *list, int index) +{ + argument_node_t *node; + int i; + + if (list == NULL) + return NULL; + + node = list->head; + for (i = 0; i < index; i++) { + node = node->next; + if (node == NULL) + break; + } + + if (node) + return node->argument; + + return NULL; +} + +/* Note: This function ralloc_steal()s the str pointer. */ +token_t * +_token_create_str (void *ctx, int type, char *str) +{ + token_t *token; + + token = ralloc (ctx, token_t); + token->type = type; + token->value.str = str; + + ralloc_steal (token, str); + + return token; +} + +token_t * +_token_create_ival (void *ctx, int type, int ival) +{ + token_t *token; + + token = ralloc (ctx, token_t); + token->type = type; + token->value.ival = ival; + + return token; +} + +token_list_t * +_token_list_create (void *ctx) +{ + token_list_t *list; + + list = ralloc (ctx, token_list_t); + list->head = NULL; + list->tail = NULL; + list->non_space_tail = NULL; + + return list; +} + +void +_token_list_append (token_list_t *list, token_t *token) +{ + token_node_t *node; + + node = ralloc (list, token_node_t); + node->token = token; + node->next = NULL; + + if (list->head == NULL) { + list->head = node; + } else { + list->tail->next = node; + } + + list->tail = node; + if (token->type != SPACE) + list->non_space_tail = node; +} + +void +_token_list_append_list (token_list_t *list, token_list_t *tail) +{ + if (tail == NULL || tail->head == NULL) + return; + + if (list->head == NULL) { + list->head = tail->head; + } else { + list->tail->next = tail->head; + } + + list->tail = tail->tail; + list->non_space_tail = tail->non_space_tail; +} + +static token_list_t * +_token_list_copy (void *ctx, token_list_t *other) +{ + token_list_t *copy; + token_node_t *node; + + if (other == NULL) + return NULL; + + copy = _token_list_create (ctx); + for (node = other->head; node; node = node->next) { + token_t *new_token = ralloc (copy, token_t); + *new_token = *node->token; + _token_list_append (copy, new_token); + } + + return copy; +} + +static void +_token_list_trim_trailing_space (token_list_t *list) +{ + token_node_t *tail, *next; + + if (list->non_space_tail) { + tail = list->non_space_tail->next; + list->non_space_tail->next = NULL; + list->tail = list->non_space_tail; + + while (tail) { + next = tail->next; + ralloc_free (tail); + tail = next; + } + } +} + +static int +_token_list_is_empty_ignoring_space (token_list_t *l) +{ + token_node_t *n; + + if (l == NULL) + return 1; + + n = l->head; + while (n != NULL && n->token->type == SPACE) + n = n->next; + + return n == NULL; +} + +int +_token_list_equal_ignoring_space (token_list_t *a, token_list_t *b) +{ + token_node_t *node_a, *node_b; + + if (a == NULL || b == NULL) { + int a_empty = _token_list_is_empty_ignoring_space(a); + int b_empty = _token_list_is_empty_ignoring_space(b); + return a_empty == b_empty; + } + + node_a = a->head; + node_b = b->head; + + while (1) + { + if (node_a == NULL && node_b == NULL) + break; + + if (node_a == NULL || node_b == NULL) + return 0; + /* Make sure whitespace appears in the same places in both. + * It need not be exactly the same amount of whitespace, + * though. + */ + if (node_a->token->type == SPACE + && node_b->token->type == SPACE) { + while (node_a && node_a->token->type == SPACE) + node_a = node_a->next; + while (node_b && node_b->token->type == SPACE) + node_b = node_b->next; + continue; + } + + if (node_a->token->type != node_b->token->type) + return 0; + + switch (node_a->token->type) { + case INTEGER: + if (node_a->token->value.ival != + node_b->token->value.ival) + { + return 0; + } + break; + case IDENTIFIER: + case INTEGER_STRING: + case OTHER: + if (strcmp (node_a->token->value.str, + node_b->token->value.str)) + { + return 0; + } + break; + } + + node_a = node_a->next; + node_b = node_b->next; + } + + return 1; +} + +static void +_token_print (char **out, size_t *len, token_t *token) +{ + if (token->type < 256) { + ralloc_asprintf_rewrite_tail (out, len, "%c", token->type); + return; + } + + switch (token->type) { + case INTEGER: + ralloc_asprintf_rewrite_tail (out, len, "%" PRIiMAX, token->value.ival); + break; + case IDENTIFIER: + case INTEGER_STRING: + case OTHER: + ralloc_asprintf_rewrite_tail (out, len, "%s", token->value.str); + break; + case SPACE: + ralloc_asprintf_rewrite_tail (out, len, " "); + break; + case LEFT_SHIFT: + ralloc_asprintf_rewrite_tail (out, len, "<<"); + break; + case RIGHT_SHIFT: + ralloc_asprintf_rewrite_tail (out, len, ">>"); + break; + case LESS_OR_EQUAL: + ralloc_asprintf_rewrite_tail (out, len, "<="); + break; + case GREATER_OR_EQUAL: + ralloc_asprintf_rewrite_tail (out, len, ">="); + break; + case EQUAL: + ralloc_asprintf_rewrite_tail (out, len, "=="); + break; + case NOT_EQUAL: + ralloc_asprintf_rewrite_tail (out, len, "!="); + break; + case AND: + ralloc_asprintf_rewrite_tail (out, len, "&&"); + break; + case OR: + ralloc_asprintf_rewrite_tail (out, len, "||"); + break; + case PASTE: + ralloc_asprintf_rewrite_tail (out, len, "##"); + break; + case PLUS_PLUS: + ralloc_asprintf_rewrite_tail (out, len, "++"); + break; + case MINUS_MINUS: + ralloc_asprintf_rewrite_tail (out, len, "--"); + break; + case DEFINED: + ralloc_asprintf_rewrite_tail (out, len, "defined"); + break; + case PLACEHOLDER: + /* Nothing to print. */ + break; + default: + assert(!"Error: Don't know how to print token."); + + break; + } +} + +/* Return a new token (ralloc()ed off of 'token') formed by pasting + * 'token' and 'other'. Note that this function may return 'token' or + * 'other' directly rather than allocating anything new. + * + * Caution: Only very cursory error-checking is performed to see if + * the final result is a valid single token. */ +static token_t * +_token_paste (glcpp_parser_t *parser, token_t *token, token_t *other) +{ + token_t *combined = NULL; + + /* Pasting a placeholder onto anything makes no change. */ + if (other->type == PLACEHOLDER) + return token; + + /* When 'token' is a placeholder, just return 'other'. */ + if (token->type == PLACEHOLDER) + return other; + + /* A very few single-character punctuators can be combined + * with another to form a multi-character punctuator. */ + switch (token->type) { + case '<': + if (other->type == '<') + combined = _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT); + else if (other->type == '=') + combined = _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL); + break; + case '>': + if (other->type == '>') + combined = _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT); + else if (other->type == '=') + combined = _token_create_ival (token, GREATER_OR_EQUAL, GREATER_OR_EQUAL); + break; + case '=': + if (other->type == '=') + combined = _token_create_ival (token, EQUAL, EQUAL); + break; + case '!': + if (other->type == '=') + combined = _token_create_ival (token, NOT_EQUAL, NOT_EQUAL); + break; + case '&': + if (other->type == '&') + combined = _token_create_ival (token, AND, AND); + break; + case '|': + if (other->type == '|') + combined = _token_create_ival (token, OR, OR); + break; + } + + if (combined != NULL) { + /* Inherit the location from the first token */ + combined->location = token->location; + return combined; + } + + /* Two string-valued (or integer) tokens can usually just be + * mashed together. (We also handle a string followed by an + * integer here as well.) + * + * There are some exceptions here. Notably, if the first token + * is an integer (or a string representing an integer), then + * the second token must also be an integer or must be a + * string representing an integer that begins with a digit. + */ + if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING || token->type == INTEGER) && + (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING || other->type == INTEGER)) + { + char *str; + int combined_type; + + /* Check that pasting onto an integer doesn't create a + * non-integer, (that is, only digits can be + * pasted. */ + if (token->type == INTEGER_STRING || token->type == INTEGER) + { + switch (other->type) { + case INTEGER_STRING: + if (other->value.str[0] < '0' || + other->value.str[0] > '9') + goto FAIL; + break; + case INTEGER: + if (other->value.ival < 0) + goto FAIL; + break; + default: + goto FAIL; + } + } + + if (token->type == INTEGER) + str = ralloc_asprintf (token, "%" PRIiMAX, + token->value.ival); + else + str = ralloc_strdup (token, token->value.str); + + + if (other->type == INTEGER) + ralloc_asprintf_append (&str, "%" PRIiMAX, + other->value.ival); + else + ralloc_strcat (&str, other->value.str); + + /* New token is same type as original token, unless we + * started with an integer, in which case we will be + * creating an integer-string. */ + combined_type = token->type; + if (combined_type == INTEGER) + combined_type = INTEGER_STRING; + + combined = _token_create_str (token, combined_type, str); + combined->location = token->location; + return combined; + } + + FAIL: + glcpp_error (&token->location, parser, ""); + ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "Pasting \""); + _token_print (&parser->info_log, &parser->info_log_length, token); + ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" and \""); + _token_print (&parser->info_log, &parser->info_log_length, other); + ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" does not give a valid preprocessing token.\n"); + + return token; +} + +static void +_token_list_print (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + if (list == NULL) + return; + + for (node = list->head; node; node = node->next) + _token_print (&parser->output, &parser->output_length, node->token); +} + +void +yyerror (YYLTYPE *locp, glcpp_parser_t *parser, const char *error) +{ + glcpp_error(locp, parser, "%s", error); +} + +static void add_builtin_define(glcpp_parser_t *parser, + const char *name, int value) +{ + token_t *tok; + token_list_t *list; + + tok = _token_create_ival (parser, INTEGER, value); + + list = _token_list_create(parser); + _token_list_append(list, tok); + _define_object_macro(parser, NULL, name, list); +} + +glcpp_parser_t * +glcpp_parser_create (const struct gl_extensions *extensions, gl_api api) +{ + glcpp_parser_t *parser; + + parser = ralloc (NULL, glcpp_parser_t); + + glcpp_lex_init_extra (parser, &parser->scanner); + parser->defines = hash_table_ctor (32, hash_table_string_hash, + hash_table_string_compare); + parser->active = NULL; + parser->lexing_directive = 0; + parser->space_tokens = 1; + parser->last_token_was_newline = 0; + parser->last_token_was_space = 0; + parser->first_non_space_token_this_line = 1; + parser->newline_as_space = 0; + parser->in_control_line = 0; + parser->paren_count = 0; + parser->commented_newlines = 0; + + parser->skip_stack = NULL; + parser->skipping = 0; + + parser->lex_from_list = NULL; + parser->lex_from_node = NULL; + + parser->output = ralloc_strdup(parser, ""); + parser->output_length = 0; + parser->info_log = ralloc_strdup(parser, ""); + parser->info_log_length = 0; + parser->error = 0; + + parser->extensions = extensions; + parser->api = api; + parser->version_resolved = false; + + parser->has_new_line_number = 0; + parser->new_line_number = 1; + parser->has_new_source_number = 0; + parser->new_source_number = 0; + + return parser; +} + +void +glcpp_parser_destroy (glcpp_parser_t *parser) +{ + glcpp_lex_destroy (parser->scanner); + hash_table_dtor (parser->defines); + ralloc_free (parser); +} + +typedef enum function_status +{ + FUNCTION_STATUS_SUCCESS, + FUNCTION_NOT_A_FUNCTION, + FUNCTION_UNBALANCED_PARENTHESES +} function_status_t; + +/* Find a set of function-like macro arguments by looking for a + * balanced set of parentheses. + * + * When called, 'node' should be the opening-parenthesis token, (or + * perhaps preceeding SPACE tokens). Upon successful return *last will + * be the last consumed node, (corresponding to the closing right + * parenthesis). + * + * Return values: + * + * FUNCTION_STATUS_SUCCESS: + * + * Successfully parsed a set of function arguments. + * + * FUNCTION_NOT_A_FUNCTION: + * + * Macro name not followed by a '('. This is not an error, but + * simply that the macro name should be treated as a non-macro. + * + * FUNCTION_UNBALANCED_PARENTHESES + * + * Macro name is not followed by a balanced set of parentheses. + */ +static function_status_t +_arguments_parse (argument_list_t *arguments, + token_node_t *node, + token_node_t **last) +{ + token_list_t *argument; + int paren_count; + + node = node->next; + + /* Ignore whitespace before first parenthesis. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || node->token->type != '(') + return FUNCTION_NOT_A_FUNCTION; + + node = node->next; + + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); + + for (paren_count = 1; node; node = node->next) { + if (node->token->type == '(') + { + paren_count++; + } + else if (node->token->type == ')') + { + paren_count--; + if (paren_count == 0) + break; + } + + if (node->token->type == ',' && + paren_count == 1) + { + _token_list_trim_trailing_space (argument); + argument = _token_list_create (arguments); + _argument_list_append (arguments, argument); + } + else { + if (argument->head == NULL) { + /* Don't treat initial whitespace as + * part of the argument. */ + if (node->token->type == SPACE) + continue; + } + _token_list_append (argument, node->token); + } + } + + if (paren_count) + return FUNCTION_UNBALANCED_PARENTHESES; + + *last = node; + + return FUNCTION_STATUS_SUCCESS; +} + +static token_list_t * +_token_list_create_with_one_ival (void *ctx, int type, int ival) +{ + token_list_t *list; + token_t *node; + + list = _token_list_create (ctx); + node = _token_create_ival (list, type, ival); + _token_list_append (list, node); + + return list; +} + +static token_list_t * +_token_list_create_with_one_space (void *ctx) +{ + return _token_list_create_with_one_ival (ctx, SPACE, SPACE); +} + +static token_list_t * +_token_list_create_with_one_integer (void *ctx, int ival) +{ + return _token_list_create_with_one_ival (ctx, INTEGER, ival); +} + +/* Evaluate a DEFINED token node (based on subsequent tokens in the list). + * + * Note: This function must only be called when "node" is a DEFINED token, + * (and will abort with an assertion failure otherwise). + * + * If "node" is followed, (ignoring any SPACE tokens), by an IDENTIFIER token + * (optionally preceded and followed by '(' and ')' tokens) then the following + * occurs: + * + * If the identifier is a defined macro, this function returns 1. + * + * If the identifier is not a defined macro, this function returns 0. + * + * In either case, *last will be updated to the last node in the list + * consumed by the evaluation, (either the token of the identifier or the + * token of the closing parenthesis). + * + * In all other cases, (such as "node is the final node of the list", or + * "missing closing parenthesis", etc.), this function generates a + * preprocessor error, returns -1 and *last will not be set. + */ +static int +_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last) +{ + token_node_t *argument, *defined = node; + + assert (node->token->type == DEFINED); + + node = node->next; + + /* Ignore whitespace after DEFINED token. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL) + goto FAIL; + + if (node->token->type == IDENTIFIER || node->token->type == OTHER) { + argument = node; + } else if (node->token->type == '(') { + node = node->next; + + /* Ignore whitespace after '(' token. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || (node->token->type != IDENTIFIER && + node->token->type != OTHER)) + { + goto FAIL; + } + + argument = node; + + node = node->next; + + /* Ignore whitespace after identifier, before ')' token. */ + while (node && node->token->type == SPACE) + node = node->next; + + if (node == NULL || node->token->type != ')') + goto FAIL; + } else { + goto FAIL; + } + + *last = node; + + return hash_table_find (parser->defines, + argument->token->value.str) ? 1 : 0; + +FAIL: + glcpp_error (&defined->token->location, parser, + "\"defined\" not followed by an identifier"); + return -1; +} + +/* Evaluate all DEFINED nodes in a given list, modifying the list in place. + */ +static void +_glcpp_parser_evaluate_defined_in_list (glcpp_parser_t *parser, + token_list_t *list) +{ + token_node_t *node, *node_prev, *replacement, *last = NULL; + int value; + + if (list == NULL) + return; + + node_prev = NULL; + node = list->head; + + while (node) { + + if (node->token->type != DEFINED) + goto NEXT; + + value = _glcpp_parser_evaluate_defined (parser, node, &last); + if (value == -1) + goto NEXT; + + replacement = ralloc (list, token_node_t); + replacement->token = _token_create_ival (list, INTEGER, value); + + /* Splice replacement node into list, replacing from "node" + * through "last". */ + if (node_prev) + node_prev->next = replacement; + else + list->head = replacement; + replacement->next = last->next; + if (last == list->tail) + list->tail = replacement; + + node = replacement; + + NEXT: + node_prev = node; + node = node->next; + } +} + +/* Perform macro expansion on 'list', placing the resulting tokens + * into a new list which is initialized with a first token of type + * 'head_token_type'. Then begin lexing from the resulting list, + * (return to the current lexing source when this list is exhausted). + * + * See the documentation of _glcpp_parser_expand_token_list for a description + * of the "mode" parameter. + */ +static void +_glcpp_parser_expand_and_lex_from (glcpp_parser_t *parser, + int head_token_type, + token_list_t *list, + expansion_mode_t mode) +{ + token_list_t *expanded; + token_t *token; + + expanded = _token_list_create (parser); + token = _token_create_ival (parser, head_token_type, head_token_type); + _token_list_append (expanded, token); + _glcpp_parser_expand_token_list (parser, list, mode); + _token_list_append_list (expanded, list); + glcpp_parser_lex_from (parser, expanded); +} + +static void +_glcpp_parser_apply_pastes (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + node = list->head; + while (node) + { + token_node_t *next_non_space; + + /* Look ahead for a PASTE token, skipping space. */ + next_non_space = node->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) + break; + + if (next_non_space->token->type != PASTE) { + node = next_non_space; + continue; + } + + /* Now find the next non-space token after the PASTE. */ + next_non_space = next_non_space->next; + while (next_non_space && next_non_space->token->type == SPACE) + next_non_space = next_non_space->next; + + if (next_non_space == NULL) { + yyerror (&node->token->location, parser, "'##' cannot appear at either end of a macro expansion\n"); + return; + } + + node->token = _token_paste (parser, node->token, next_non_space->token); + node->next = next_non_space->next; + if (next_non_space == list->tail) + list->tail = node; + } + + list->non_space_tail = list->tail; +} + +/* This is a helper function that's essentially part of the + * implementation of _glcpp_parser_expand_node. It shouldn't be called + * except for by that function. + * + * Returns NULL if node is a simple token with no expansion, (that is, + * although 'node' corresponds to an identifier defined as a + * function-like macro, it is not followed with a parenthesized + * argument list). + * + * Compute the complete expansion of node (which is a function-like + * macro) and subsequent nodes which are arguments. + * + * Returns the token list that results from the expansion and sets + * *last to the last node in the list that was consumed by the + * expansion. Specifically, *last will be set as follows: as the + * token of the closing right parenthesis. + * + * See the documentation of _glcpp_parser_expand_token_list for a description + * of the "mode" parameter. + */ +static token_list_t * +_glcpp_parser_expand_function (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last, + expansion_mode_t mode) +{ + macro_t *macro; + const char *identifier; + argument_list_t *arguments; + function_status_t status; + token_list_t *substituted; + int parameter_index; + + identifier = node->token->value.str; + + macro = hash_table_find (parser->defines, identifier); + + assert (macro->is_function); + + arguments = _argument_list_create (parser); + status = _arguments_parse (arguments, node, last); + + switch (status) { + case FUNCTION_STATUS_SUCCESS: + break; + case FUNCTION_NOT_A_FUNCTION: + return NULL; + case FUNCTION_UNBALANCED_PARENTHESES: + glcpp_error (&node->token->location, parser, "Macro %s call has unbalanced parentheses\n", identifier); + return NULL; + } + + /* Replace a macro defined as empty with a SPACE token. */ + if (macro->replacements == NULL) { + ralloc_free (arguments); + return _token_list_create_with_one_space (parser); + } + + if (! ((_argument_list_length (arguments) == + _string_list_length (macro->parameters)) || + (_string_list_length (macro->parameters) == 0 && + _argument_list_length (arguments) == 1 && + arguments->head->argument->head == NULL))) + { + glcpp_error (&node->token->location, parser, + "Error: macro %s invoked with %d arguments (expected %d)\n", + identifier, + _argument_list_length (arguments), + _string_list_length (macro->parameters)); + return NULL; + } + + /* Perform argument substitution on the replacement list. */ + substituted = _token_list_create (arguments); + + for (node = macro->replacements->head; node; node = node->next) + { + if (node->token->type == IDENTIFIER && + _string_list_contains (macro->parameters, + node->token->value.str, + ¶meter_index)) + { + token_list_t *argument; + argument = _argument_list_member_at (arguments, + parameter_index); + /* Before substituting, we expand the argument + * tokens, or append a placeholder token for + * an empty argument. */ + if (argument->head) { + token_list_t *expanded_argument; + expanded_argument = _token_list_copy (parser, + argument); + _glcpp_parser_expand_token_list (parser, + expanded_argument, + mode); + _token_list_append_list (substituted, + expanded_argument); + } else { + token_t *new_token; + + new_token = _token_create_ival (substituted, + PLACEHOLDER, + PLACEHOLDER); + _token_list_append (substituted, new_token); + } + } else { + _token_list_append (substituted, node->token); + } + } + + /* After argument substitution, and before further expansion + * below, implement token pasting. */ + + _token_list_trim_trailing_space (substituted); + + _glcpp_parser_apply_pastes (parser, substituted); + + return substituted; +} + +/* Compute the complete expansion of node, (and subsequent nodes after + * 'node' in the case that 'node' is a function-like macro and + * subsequent nodes are arguments). + * + * Returns NULL if node is a simple token with no expansion. + * + * Otherwise, returns the token list that results from the expansion + * and sets *last to the last node in the list that was consumed by + * the expansion. Specifically, *last will be set as follows: + * + * As 'node' in the case of object-like macro expansion. + * + * As the token of the closing right parenthesis in the case of + * function-like macro expansion. + * + * See the documentation of _glcpp_parser_expand_token_list for a description + * of the "mode" parameter. + */ +static token_list_t * +_glcpp_parser_expand_node (glcpp_parser_t *parser, + token_node_t *node, + token_node_t **last, + expansion_mode_t mode) +{ + token_t *token = node->token; + const char *identifier; + macro_t *macro; + + /* We only expand identifiers */ + if (token->type != IDENTIFIER) { + return NULL; + } + + *last = node; + identifier = token->value.str; + + /* Special handling for __LINE__ and __FILE__, (not through + * the hash table). */ + if (strcmp(identifier, "__LINE__") == 0) + return _token_list_create_with_one_integer (parser, node->token->location.first_line); + + if (strcmp(identifier, "__FILE__") == 0) + return _token_list_create_with_one_integer (parser, node->token->location.source); + + /* Look up this identifier in the hash table. */ + macro = hash_table_find (parser->defines, identifier); + + /* Not a macro, so no expansion needed. */ + if (macro == NULL) + return NULL; + + /* Finally, don't expand this macro if we're already actively + * expanding it, (to avoid infinite recursion). */ + if (_parser_active_list_contains (parser, identifier)) { + /* We change the token type here from IDENTIFIER to + * OTHER to prevent any future expansion of this + * unexpanded token. */ + char *str; + token_list_t *expansion; + token_t *final; + + str = ralloc_strdup (parser, token->value.str); + final = _token_create_str (parser, OTHER, str); + expansion = _token_list_create (parser); + _token_list_append (expansion, final); + return expansion; + } + + if (! macro->is_function) + { + token_list_t *replacement; + + /* Replace a macro defined as empty with a SPACE token. */ + if (macro->replacements == NULL) + return _token_list_create_with_one_space (parser); + + replacement = _token_list_copy (parser, macro->replacements); + _glcpp_parser_apply_pastes (parser, replacement); + return replacement; + } + + return _glcpp_parser_expand_function (parser, node, last, mode); +} + +/* Push a new identifier onto the parser's active list. + * + * Here, 'marker' is the token node that appears in the list after the + * expansion of 'identifier'. That is, when the list iterator begins + * examining 'marker', then it is time to pop this node from the + * active stack. + */ +static void +_parser_active_list_push (glcpp_parser_t *parser, + const char *identifier, + token_node_t *marker) +{ + active_list_t *node; + + node = ralloc (parser->active, active_list_t); + node->identifier = ralloc_strdup (node, identifier); + node->marker = marker; + node->next = parser->active; + + parser->active = node; +} + +static void +_parser_active_list_pop (glcpp_parser_t *parser) +{ + active_list_t *node = parser->active; + + if (node == NULL) { + parser->active = NULL; + return; + } + + node = parser->active->next; + ralloc_free (parser->active); + + parser->active = node; +} + +static int +_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier) +{ + active_list_t *node; + + if (parser->active == NULL) + return 0; + + for (node = parser->active; node; node = node->next) + if (strcmp (node->identifier, identifier) == 0) + return 1; + + return 0; +} + +/* Walk over the token list replacing nodes with their expansion. + * Whenever nodes are expanded the walking will walk over the new + * nodes, continuing to expand as necessary. The results are placed in + * 'list' itself. + * + * The "mode" argument controls the handling of any DEFINED tokens that + * result from expansion as follows: + * + * EXPANSION_MODE_IGNORE_DEFINED: Any resulting DEFINED tokens will be + * left in the final list, unevaluated. This is the correct mode + * for expanding any list in any context other than a + * preprocessor conditional, (#if or #elif). + * + * EXPANSION_MODE_EVALUATE_DEFINED: Any resulting DEFINED tokens will be + * evaluated to 0 or 1 tokens depending on whether the following + * token is the name of a defined macro. If the DEFINED token is + * not followed by an (optionally parenthesized) identifier, then + * an error will be generated. This the correct mode for + * expanding any list in the context of a preprocessor + * conditional, (#if or #elif). + */ +static void +_glcpp_parser_expand_token_list (glcpp_parser_t *parser, + token_list_t *list, + expansion_mode_t mode) +{ + token_node_t *node_prev; + token_node_t *node, *last = NULL; + token_list_t *expansion; + active_list_t *active_initial = parser->active; + + if (list == NULL) + return; + + _token_list_trim_trailing_space (list); + + node_prev = NULL; + node = list->head; + + if (mode == EXPANSION_MODE_EVALUATE_DEFINED) + _glcpp_parser_evaluate_defined_in_list (parser, list); + + while (node) { + + while (parser->active && parser->active->marker == node) + _parser_active_list_pop (parser); + + expansion = _glcpp_parser_expand_node (parser, node, &last, mode); + if (expansion) { + token_node_t *n; + + if (mode == EXPANSION_MODE_EVALUATE_DEFINED) { + _glcpp_parser_evaluate_defined_in_list (parser, + expansion); + } + + for (n = node; n != last->next; n = n->next) + while (parser->active && + parser->active->marker == n) + { + _parser_active_list_pop (parser); + } + + _parser_active_list_push (parser, + node->token->value.str, + last->next); + + /* Splice expansion into list, supporting a + * simple deletion if the expansion is + * empty. */ + if (expansion->head) { + if (node_prev) + node_prev->next = expansion->head; + else + list->head = expansion->head; + expansion->tail->next = last->next; + if (last == list->tail) + list->tail = expansion->tail; + } else { + if (node_prev) + node_prev->next = last->next; + else + list->head = last->next; + if (last == list->tail) + list->tail = NULL; + } + } else { + node_prev = node; + } + node = node_prev ? node_prev->next : list->head; + } + + /* Remove any lingering effects of this invocation on the + * active list. That is, pop until the list looks like it did + * at the beginning of this function. */ + while (parser->active && parser->active != active_initial) + _parser_active_list_pop (parser); + + list->non_space_tail = list->tail; +} + +void +_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, + token_list_t *list) +{ + if (list == NULL) + return; + + _glcpp_parser_expand_token_list (parser, list, EXPANSION_MODE_IGNORE_DEFINED); + + _token_list_trim_trailing_space (list); + + _token_list_print (parser, list); +} + +static void +_check_for_reserved_macro_name (glcpp_parser_t *parser, YYLTYPE *loc, + const char *identifier) +{ + /* Section 3.3 (Preprocessor) of the GLSL 1.30 spec (and later) and + * the GLSL ES spec (all versions) say: + * + * "All macro names containing two consecutive underscores ( __ ) + * are reserved for future use as predefined macro names. All + * macro names prefixed with "GL_" ("GL" followed by a single + * underscore) are also reserved." + * + * The intention is that names containing __ are reserved for internal + * use by the implementation, and names prefixed with GL_ are reserved + * for use by Khronos. Since every extension adds a name prefixed + * with GL_ (i.e., the name of the extension), that should be an + * error. Names simply containing __ are dangerous to use, but should + * be allowed. + * + * A future version of the GLSL specification will clarify this. + */ + if (strstr(identifier, "__")) { + glcpp_warning(loc, parser, + "Macro names containing \"__\" are reserved " + "for use by the implementation.\n"); + } + if (strncmp(identifier, "GL_", 3) == 0) { + glcpp_error (loc, parser, "Macro names starting with \"GL_\" are reserved.\n"); + } +} + +static int +_macro_equal (macro_t *a, macro_t *b) +{ + if (a->is_function != b->is_function) + return 0; + + if (a->is_function) { + if (! _string_list_equal (a->parameters, b->parameters)) + return 0; + } + + return _token_list_equal_ignoring_space (a->replacements, + b->replacements); +} + +void +_define_object_macro (glcpp_parser_t *parser, + YYLTYPE *loc, + const char *identifier, + token_list_t *replacements) +{ + macro_t *macro, *previous; + + /* We define pre-defined macros before we've started parsing the + * actual file. So if there's no location defined yet, that's what + * were doing and we don't want to generate an error for using the + * reserved names. */ + if (loc != NULL) + _check_for_reserved_macro_name(parser, loc, identifier); + + macro = ralloc (parser, macro_t); + + macro->is_function = 0; + macro->parameters = NULL; + macro->identifier = ralloc_strdup (macro, identifier); + macro->replacements = replacements; + ralloc_steal (macro, replacements); + + previous = hash_table_find (parser->defines, identifier); + if (previous) { + if (_macro_equal (macro, previous)) { + ralloc_free (macro); + return; + } + glcpp_error (loc, parser, "Redefinition of macro %s\n", + identifier); + } + + hash_table_insert (parser->defines, macro, identifier); +} + +void +_define_function_macro (glcpp_parser_t *parser, + YYLTYPE *loc, + const char *identifier, + string_list_t *parameters, + token_list_t *replacements) +{ + macro_t *macro, *previous; + const char *dup; + + _check_for_reserved_macro_name(parser, loc, identifier); + + /* Check for any duplicate parameter names. */ + if ((dup = _string_list_has_duplicate (parameters)) != NULL) { + glcpp_error (loc, parser, "Duplicate macro parameter \"%s\"", + dup); + } + + macro = ralloc (parser, macro_t); + ralloc_steal (macro, parameters); + ralloc_steal (macro, replacements); + + macro->is_function = 1; + macro->parameters = parameters; + macro->identifier = ralloc_strdup (macro, identifier); + macro->replacements = replacements; + previous = hash_table_find (parser->defines, identifier); + if (previous) { + if (_macro_equal (macro, previous)) { + ralloc_free (macro); + return; + } + glcpp_error (loc, parser, "Redefinition of macro %s\n", + identifier); + } + + hash_table_insert (parser->defines, macro, identifier); +} + +static int +glcpp_parser_lex (YYSTYPE *yylval, YYLTYPE *yylloc, glcpp_parser_t *parser) +{ + token_node_t *node; + int ret; + + if (parser->lex_from_list == NULL) { + ret = glcpp_lex (yylval, yylloc, parser->scanner); + + /* XXX: This ugly block of code exists for the sole + * purpose of converting a NEWLINE token into a SPACE + * token, but only in the case where we have seen a + * function-like macro name, but have not yet seen its + * closing parenthesis. + * + * There's perhaps a more compact way to do this with + * mid-rule actions in the grammar. + * + * I'm definitely not pleased with the complexity of + * this code here. + */ + if (parser->newline_as_space) + { + if (ret == '(') { + parser->paren_count++; + } else if (ret == ')') { + parser->paren_count--; + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } else if (ret == NEWLINE) { + ret = SPACE; + } else if (ret != SPACE) { + if (parser->paren_count == 0) + parser->newline_as_space = 0; + } + } + else if (parser->in_control_line) + { + if (ret == NEWLINE) + parser->in_control_line = 0; + } + else if (ret == DEFINE_TOKEN || + ret == UNDEF || ret == IF || + ret == IFDEF || ret == IFNDEF || + ret == ELIF || ret == ELSE || + ret == ENDIF || ret == HASH_TOKEN) + { + parser->in_control_line = 1; + } + else if (ret == IDENTIFIER) + { + macro_t *macro; + macro = hash_table_find (parser->defines, + yylval->str); + if (macro && macro->is_function) { + parser->newline_as_space = 1; + parser->paren_count = 0; + } + } + + return ret; + } + + node = parser->lex_from_node; + + if (node == NULL) { + ralloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + return NEWLINE; + } + + *yylval = node->token->value; + ret = node->token->type; + + parser->lex_from_node = node->next; + + return ret; +} + +static void +glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list) +{ + token_node_t *node; + + assert (parser->lex_from_list == NULL); + + /* Copy list, eliminating any space tokens. */ + parser->lex_from_list = _token_list_create (parser); + + for (node = list->head; node; node = node->next) { + if (node->token->type == SPACE) + continue; + _token_list_append (parser->lex_from_list, node->token); + } + + ralloc_free (list); + + parser->lex_from_node = parser->lex_from_list->head; + + /* It's possible the list consisted of nothing but whitespace. */ + if (parser->lex_from_node == NULL) { + ralloc_free (parser->lex_from_list); + parser->lex_from_list = NULL; + } +} + +static void +_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, YYLTYPE *loc, + int condition) +{ + skip_type_t current = SKIP_NO_SKIP; + skip_node_t *node; + + if (parser->skip_stack) + current = parser->skip_stack->type; + + node = ralloc (parser, skip_node_t); + node->loc = *loc; + + if (current == SKIP_NO_SKIP) { + if (condition) + node->type = SKIP_NO_SKIP; + else + node->type = SKIP_TO_ELSE; + } else { + node->type = SKIP_TO_ENDIF; + } + + node->has_else = false; + node->next = parser->skip_stack; + parser->skip_stack = node; +} + +static void +_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, YYLTYPE *loc, + const char *type, int condition) +{ + if (parser->skip_stack == NULL) { + glcpp_error (loc, parser, "#%s without #if\n", type); + return; + } + + if (parser->skip_stack->type == SKIP_TO_ELSE) { + if (condition) + parser->skip_stack->type = SKIP_NO_SKIP; + } else { + parser->skip_stack->type = SKIP_TO_ENDIF; + } +} + +static void +_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser, YYLTYPE *loc) +{ + skip_node_t *node; + + if (parser->skip_stack == NULL) { + glcpp_error (loc, parser, "#endif without #if\n"); + return; + } + + node = parser->skip_stack; + parser->skip_stack = node->next; + ralloc_free (node); +} + +static void +_glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t version, + const char *es_identifier, + bool explicitly_set) +{ + const struct gl_extensions *extensions = parser->extensions; + + if (parser->version_resolved) + return; + + parser->version_resolved = true; + + add_builtin_define (parser, "__VERSION__", version); + + parser->is_gles = (version == 100) || + (es_identifier && + (strcmp(es_identifier, "es") == 0)); + + /* Add pre-defined macros. */ + if (parser->is_gles) { + add_builtin_define(parser, "GL_ES", 1); + add_builtin_define(parser, "GL_EXT_separate_shader_objects", 1); + add_builtin_define(parser, "GL_EXT_draw_buffers", 1); + + if (extensions != NULL) { + if (extensions->OES_EGL_image_external) + add_builtin_define(parser, "GL_OES_EGL_image_external", 1); + if (extensions->OES_standard_derivatives) + add_builtin_define(parser, "GL_OES_standard_derivatives", 1); + if (extensions->ARB_texture_multisample) + add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1); + if (extensions->ARB_blend_func_extended) + add_builtin_define(parser, "GL_EXT_blend_func_extended", 1); + } + } else { + add_builtin_define(parser, "GL_ARB_draw_buffers", 1); + add_builtin_define(parser, "GL_ARB_enhanced_layouts", 1); + add_builtin_define(parser, "GL_ARB_separate_shader_objects", 1); + add_builtin_define(parser, "GL_ARB_texture_rectangle", 1); + add_builtin_define(parser, "GL_AMD_shader_trinary_minmax", 1); + + + if (extensions != NULL) { + if (extensions->EXT_texture_array) + add_builtin_define(parser, "GL_EXT_texture_array", 1); + + if (extensions->ARB_arrays_of_arrays) + add_builtin_define(parser, "GL_ARB_arrays_of_arrays", 1); + + if (extensions->ARB_fragment_coord_conventions) + add_builtin_define(parser, "GL_ARB_fragment_coord_conventions", + 1); + + if (extensions->ARB_fragment_layer_viewport) + add_builtin_define(parser, "GL_ARB_fragment_layer_viewport", 1); + + if (extensions->ARB_explicit_attrib_location) + add_builtin_define(parser, "GL_ARB_explicit_attrib_location", 1); + + if (extensions->ARB_explicit_uniform_location) + add_builtin_define(parser, "GL_ARB_explicit_uniform_location", 1); + + if (extensions->ARB_shader_texture_lod) + add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1); + + if (extensions->ARB_draw_instanced) + add_builtin_define(parser, "GL_ARB_draw_instanced", 1); + + if (extensions->ARB_conservative_depth) { + add_builtin_define(parser, "GL_AMD_conservative_depth", 1); + add_builtin_define(parser, "GL_ARB_conservative_depth", 1); + } + + if (extensions->ARB_shader_bit_encoding) + add_builtin_define(parser, "GL_ARB_shader_bit_encoding", 1); + + if (extensions->ARB_shader_clock) + add_builtin_define(parser, "GL_ARB_shader_clock", 1); + + if (extensions->ARB_uniform_buffer_object) + add_builtin_define(parser, "GL_ARB_uniform_buffer_object", 1); + + if (extensions->ARB_texture_cube_map_array) + add_builtin_define(parser, "GL_ARB_texture_cube_map_array", 1); + + if (extensions->ARB_shading_language_packing) + add_builtin_define(parser, "GL_ARB_shading_language_packing", 1); + + if (extensions->ARB_texture_multisample) + add_builtin_define(parser, "GL_ARB_texture_multisample", 1); + + if (extensions->ARB_texture_query_levels) + add_builtin_define(parser, "GL_ARB_texture_query_levels", 1); + + if (extensions->ARB_texture_query_lod) + add_builtin_define(parser, "GL_ARB_texture_query_lod", 1); + + if (extensions->ARB_gpu_shader5) + add_builtin_define(parser, "GL_ARB_gpu_shader5", 1); + + if (extensions->ARB_gpu_shader_fp64) + add_builtin_define(parser, "GL_ARB_gpu_shader_fp64", 1); + + if (extensions->ARB_vertex_attrib_64bit) + add_builtin_define(parser, "GL_ARB_vertex_attrib_64bit", 1); + + if (extensions->AMD_vertex_shader_layer) + add_builtin_define(parser, "GL_AMD_vertex_shader_layer", 1); + + if (extensions->AMD_vertex_shader_viewport_index) + add_builtin_define(parser, "GL_AMD_vertex_shader_viewport_index", 1); + + if (extensions->ARB_shading_language_420pack) + add_builtin_define(parser, "GL_ARB_shading_language_420pack", 1); + + if (extensions->ARB_sample_shading) + add_builtin_define(parser, "GL_ARB_sample_shading", 1); + + if (extensions->ARB_texture_gather) + add_builtin_define(parser, "GL_ARB_texture_gather", 1); + + if (extensions->ARB_shader_atomic_counters) + add_builtin_define(parser, "GL_ARB_shader_atomic_counters", 1); + + if (extensions->ARB_viewport_array) + add_builtin_define(parser, "GL_ARB_viewport_array", 1); + + if (extensions->ARB_compute_shader) + add_builtin_define(parser, "GL_ARB_compute_shader", 1); + + if (extensions->ARB_shader_image_load_store) + add_builtin_define(parser, "GL_ARB_shader_image_load_store", 1); + + if (extensions->ARB_shader_image_size) + add_builtin_define(parser, "GL_ARB_shader_image_size", 1); + + if (extensions->ARB_shader_texture_image_samples) + add_builtin_define(parser, "GL_ARB_shader_texture_image_samples", 1); + + if (extensions->ARB_derivative_control) + add_builtin_define(parser, "GL_ARB_derivative_control", 1); + + if (extensions->ARB_shader_precision) + add_builtin_define(parser, "GL_ARB_shader_precision", 1); + + if (extensions->ARB_shader_storage_buffer_object) + add_builtin_define(parser, "GL_ARB_shader_storage_buffer_object", 1); + + if (extensions->ARB_tessellation_shader) + add_builtin_define(parser, "GL_ARB_tessellation_shader", 1); + + if (extensions->ARB_shader_subroutine) + add_builtin_define(parser, "GL_ARB_shader_subroutine", 1); + + if (extensions->ARB_shader_draw_parameters) + add_builtin_define(parser, "GL_ARB_shader_draw_parameters", 1); + } + } + + if (extensions != NULL) { + if (extensions->EXT_shader_integer_mix) + add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1); + + if (extensions->EXT_shader_samples_identical) + add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1); + } + + if (version >= 150) + add_builtin_define(parser, "GL_core_profile", 1); + + /* Currently, all ES2/ES3 implementations support highp in the + * fragment shader, so we always define this macro in ES2/ES3. + * If we ever get a driver that doesn't support highp, we'll + * need to add a flag to the gl_context and check that here. + */ + if (version >= 130 || parser->is_gles) + add_builtin_define (parser, "GL_FRAGMENT_PRECISION_HIGH", 1); + + if (explicitly_set) { + ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, + "#version %" PRIiMAX "%s%s", version, + es_identifier ? " " : "", + es_identifier ? es_identifier : ""); + } +} + +/* GLSL version if no version is explicitly specified. */ +#define IMPLICIT_GLSL_VERSION 110 + +/* GLSL ES version if no version is explicitly specified. */ +#define IMPLICIT_GLSL_ES_VERSION 100 + +void +glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser) +{ + int language_version = parser->api == API_OPENGLES2 ? + IMPLICIT_GLSL_ES_VERSION : + IMPLICIT_GLSL_VERSION; + + _glcpp_parser_handle_version_declaration(parser, language_version, + NULL, false); +} diff --git a/src/compiler/glsl/glcpp/glcpp.c b/src/compiler/glsl/glcpp/glcpp.c new file mode 100644 index 00000000000..c62f4efec9d --- /dev/null +++ b/src/compiler/glsl/glcpp/glcpp.c @@ -0,0 +1,182 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include + +#include "glcpp.h" +#include "main/mtypes.h" +#include "main/shaderobj.h" +#include "util/strtod.h" + +extern int glcpp_parser_debug; + +void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh) +{ + (void) ctx; + *ptr = sh; +} + +/* Read from fp until EOF and return a string of everything read. + */ +static char * +load_text_fp (void *ctx, FILE *fp) +{ +#define CHUNK 4096 + char *text = NULL; + size_t text_size = 0; + size_t total_read = 0; + size_t bytes; + + while (1) { + if (total_read + CHUNK + 1 > text_size) { + text_size = text_size ? text_size * 2 : CHUNK + 1; + text = reralloc_size (ctx, text, text_size); + if (text == NULL) { + fprintf (stderr, "Out of memory\n"); + return NULL; + } + } + bytes = fread (text + total_read, 1, CHUNK, fp); + total_read += bytes; + + if (bytes < CHUNK) { + break; + } + } + + text[total_read] = '\0'; + + return text; +} + +static char * +load_text_file(void *ctx, const char *filename) +{ + char *text; + FILE *fp; + + if (filename == NULL || strcmp (filename, "-") == 0) + return load_text_fp (ctx, stdin); + + fp = fopen (filename, "r"); + if (fp == NULL) { + fprintf (stderr, "Failed to open file %s: %s\n", + filename, strerror (errno)); + return NULL; + } + + text = load_text_fp (ctx, fp); + + fclose(fp); + + return text; +} + +/* Initialize only those things that glcpp cares about. + */ +static void +init_fake_gl_context (struct gl_context *gl_ctx) +{ + gl_ctx->API = API_OPENGL_COMPAT; + gl_ctx->Const.DisableGLSLLineContinuations = false; +} + +static void +usage (void) +{ + fprintf (stderr, + "Usage: glcpp [OPTIONS] [--] []\n" + "\n" + "Pre-process the given filename (stdin if no filename given).\n" + "The following options are supported:\n" + " --disable-line-continuations Do not interpret lines ending with a\n" + " backslash ('\\') as a line continuation.\n"); +} + +enum { + DISABLE_LINE_CONTINUATIONS_OPT = CHAR_MAX + 1 +}; + +static const struct option +long_options[] = { + {"disable-line-continuations", no_argument, 0, DISABLE_LINE_CONTINUATIONS_OPT }, + {"debug", no_argument, 0, 'd'}, + {0, 0, 0, 0 } +}; + +int +main (int argc, char *argv[]) +{ + char *filename = NULL; + void *ctx = ralloc(NULL, void*); + char *info_log = ralloc_strdup(ctx, ""); + const char *shader; + int ret; + struct gl_context gl_ctx; + int c; + + init_fake_gl_context (&gl_ctx); + + while ((c = getopt_long(argc, argv, "d", long_options, NULL)) != -1) { + switch (c) { + case DISABLE_LINE_CONTINUATIONS_OPT: + gl_ctx.Const.DisableGLSLLineContinuations = true; + break; + case 'd': + glcpp_parser_debug = 1; + break; + default: + usage (); + exit (1); + } + } + + if (optind + 1 < argc) { + printf ("Unexpected argument: %s\n", argv[optind+1]); + usage (); + exit (1); + } + if (optind < argc) { + filename = argv[optind]; + } + + shader = load_text_file (ctx, filename); + if (shader == NULL) + return 1; + + _mesa_locale_init(); + + ret = glcpp_preprocess(ctx, &shader, &info_log, NULL, &gl_ctx); + + printf("%s", shader); + fprintf(stderr, "%s", info_log); + + ralloc_free(ctx); + + return ret; +} diff --git a/src/compiler/glsl/glcpp/glcpp.h b/src/compiler/glsl/glcpp/glcpp.h new file mode 100644 index 00000000000..70aa14b6ec0 --- /dev/null +++ b/src/compiler/glsl/glcpp/glcpp.h @@ -0,0 +1,251 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef GLCPP_H +#define GLCPP_H + +#include +#include + +#include "main/mtypes.h" + +#include "util/ralloc.h" + +#include "program/hash_table.h" + +#define yyscan_t void* + +/* Some data types used for parser values. */ + +typedef struct expression_value { + intmax_t value; + char *undefined_macro; +} expression_value_t; + + +typedef struct string_node { + const char *str; + struct string_node *next; +} string_node_t; + +typedef struct string_list { + string_node_t *head; + string_node_t *tail; +} string_list_t; + +typedef struct token token_t; +typedef struct token_list token_list_t; + +typedef union YYSTYPE +{ + intmax_t ival; + expression_value_t expression_value; + char *str; + string_list_t *string_list; + token_t *token; + token_list_t *token_list; +} YYSTYPE; + +# define YYSTYPE_IS_TRIVIAL 1 +# define YYSTYPE_IS_DECLARED 1 + +typedef struct YYLTYPE { + int first_line; + int first_column; + int last_line; + int last_column; + unsigned source; +} YYLTYPE; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 + +# define YYLLOC_DEFAULT(Current, Rhs, N) \ +do { \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC(Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC(Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC(Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC(Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC(Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC(Rhs, 0).last_column; \ + } \ + (Current).source = 0; \ +} while (0) + +struct token { + int type; + YYSTYPE value; + YYLTYPE location; +}; + +typedef struct token_node { + token_t *token; + struct token_node *next; +} token_node_t; + +struct token_list { + token_node_t *head; + token_node_t *tail; + token_node_t *non_space_tail; +}; + +typedef struct argument_node { + token_list_t *argument; + struct argument_node *next; +} argument_node_t; + +typedef struct argument_list { + argument_node_t *head; + argument_node_t *tail; +} argument_list_t; + +typedef struct glcpp_parser glcpp_parser_t; + +typedef enum { + TOKEN_CLASS_IDENTIFIER, + TOKEN_CLASS_IDENTIFIER_FINALIZED, + TOKEN_CLASS_FUNC_MACRO, + TOKEN_CLASS_OBJ_MACRO +} token_class_t; + +token_class_t +glcpp_parser_classify_token (glcpp_parser_t *parser, + const char *identifier, + int *parameter_index); + +typedef struct { + int is_function; + string_list_t *parameters; + const char *identifier; + token_list_t *replacements; +} macro_t; + +typedef struct expansion_node { + macro_t *macro; + token_node_t *replacements; + struct expansion_node *next; +} expansion_node_t; + +typedef enum skip_type { + SKIP_NO_SKIP, + SKIP_TO_ELSE, + SKIP_TO_ENDIF +} skip_type_t; + +typedef struct skip_node { + skip_type_t type; + bool has_else; + YYLTYPE loc; /* location of the initial #if/#elif/... */ + struct skip_node *next; +} skip_node_t; + +typedef struct active_list { + const char *identifier; + token_node_t *marker; + struct active_list *next; +} active_list_t; + +struct glcpp_parser { + yyscan_t scanner; + struct hash_table *defines; + active_list_t *active; + int lexing_directive; + int space_tokens; + int last_token_was_newline; + int last_token_was_space; + int first_non_space_token_this_line; + int newline_as_space; + int in_control_line; + int paren_count; + int commented_newlines; + skip_node_t *skip_stack; + int skipping; + token_list_t *lex_from_list; + token_node_t *lex_from_node; + char *output; + char *info_log; + size_t output_length; + size_t info_log_length; + int error; + const struct gl_extensions *extensions; + gl_api api; + bool version_resolved; + bool has_new_line_number; + int new_line_number; + bool has_new_source_number; + int new_source_number; + bool is_gles; +}; + +struct gl_extensions; + +glcpp_parser_t * +glcpp_parser_create (const struct gl_extensions *extensions, gl_api api); + +int +glcpp_parser_parse (glcpp_parser_t *parser); + +void +glcpp_parser_destroy (glcpp_parser_t *parser); + +void +glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser); + +int +glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log, + const struct gl_extensions *extensions, struct gl_context *g_ctx); + +/* Functions for writing to the info log */ + +void +glcpp_error (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...); + +void +glcpp_warning (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...); + +/* Generated by glcpp-lex.l to glcpp-lex.c */ + +int +glcpp_lex_init_extra (glcpp_parser_t *parser, yyscan_t* scanner); + +void +glcpp_lex_set_source_string(glcpp_parser_t *parser, const char *shader); + +int +glcpp_lex (YYSTYPE *lvalp, YYLTYPE *llocp, yyscan_t scanner); + +int +glcpp_lex_destroy (yyscan_t scanner); + +/* Generated by glcpp-parse.y to glcpp-parse.c */ + +int +yyparse (glcpp_parser_t *parser); + +#endif diff --git a/src/compiler/glsl/glcpp/pp.c b/src/compiler/glsl/glcpp/pp.c new file mode 100644 index 00000000000..160c6662ff6 --- /dev/null +++ b/src/compiler/glsl/glcpp/pp.c @@ -0,0 +1,241 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include "glcpp.h" + +void +glcpp_error (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...) +{ + va_list ap; + + parser->error = 1; + ralloc_asprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, + "%u:%u(%u): " + "preprocessor error: ", + locp->source, + locp->first_line, + locp->first_column); + va_start(ap, fmt); + ralloc_vasprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, + fmt, ap); + va_end(ap); + ralloc_asprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, "\n"); +} + +void +glcpp_warning (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...) +{ + va_list ap; + + ralloc_asprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, + "%u:%u(%u): " + "preprocessor warning: ", + locp->source, + locp->first_line, + locp->first_column); + va_start(ap, fmt); + ralloc_vasprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, + fmt, ap); + va_end(ap); + ralloc_asprintf_rewrite_tail(&parser->info_log, + &parser->info_log_length, "\n"); +} + +/* Given str, (that's expected to start with a newline terminator of some + * sort), return a pointer to the first character in str after the newline. + * + * A newline terminator can be any of the following sequences: + * + * "\r\n" + * "\n\r" + * "\n" + * "\r" + * + * And the longest such sequence will be skipped. + */ +static const char * +skip_newline (const char *str) +{ + const char *ret = str; + + if (ret == NULL) + return ret; + + if (*ret == '\0') + return ret; + + if (*ret == '\r') { + ret++; + if (*ret && *ret == '\n') + ret++; + } else if (*ret == '\n') { + ret++; + if (*ret && *ret == '\r') + ret++; + } + + return ret; +} + +/* Remove any line continuation characters in the shader, (whether in + * preprocessing directives or in GLSL code). + */ +static char * +remove_line_continuations(glcpp_parser_t *ctx, const char *shader) +{ + char *clean = ralloc_strdup(ctx, ""); + const char *backslash, *newline, *search_start; + const char *cr, *lf; + char newline_separator[3]; + int collapsed_newlines = 0; + + search_start = shader; + + /* Determine what flavor of newlines this shader is using. GLSL + * provides for 4 different possible ways to separate lines, (using + * one or two characters): + * + * "\n" (line-feed, like Linux, Unix, and new Mac OS) + * "\r" (carriage-return, like old Mac files) + * "\r\n" (carriage-return + line-feed, like DOS files) + * "\n\r" (line-feed + carriage-return, like nothing, really) + * + * This code explicitly supports a shader that uses a mixture of + * newline terminators and will properly handle line continuation + * backslashes followed by any of the above. + * + * But, since we must also insert additional newlines in the output + * (for any collapsed lines) we attempt to maintain consistency by + * examining the first encountered newline terminator, and using the + * same terminator for any newlines we insert. + */ + cr = strchr(search_start, '\r'); + lf = strchr(search_start, '\n'); + + newline_separator[0] = '\n'; + newline_separator[1] = '\0'; + newline_separator[2] = '\0'; + + if (cr == NULL) { + /* Nothing to do. */ + } else if (lf == NULL) { + newline_separator[0] = '\r'; + } else if (lf == cr + 1) { + newline_separator[0] = '\r'; + newline_separator[1] = '\n'; + } else if (cr == lf + 1) { + newline_separator[0] = '\n'; + newline_separator[1] = '\r'; + } + + while (true) { + backslash = strchr(search_start, '\\'); + + /* If we have previously collapsed any line-continuations, + * then we want to insert additional newlines at the next + * occurrence of a newline character to avoid changing any + * line numbers. + */ + if (collapsed_newlines) { + cr = strchr (search_start, '\r'); + lf = strchr (search_start, '\n'); + if (cr && lf) + newline = cr < lf ? cr : lf; + else if (cr) + newline = cr; + else + newline = lf; + if (newline && + (backslash == NULL || newline < backslash)) + { + ralloc_strncat(&clean, shader, + newline - shader + 1); + while (collapsed_newlines) { + ralloc_strcat(&clean, newline_separator); + collapsed_newlines--; + } + shader = skip_newline (newline); + search_start = shader; + } + } + + search_start = backslash + 1; + + if (backslash == NULL) + break; + + /* At each line continuation, (backslash followed by a + * newline), copy all preceding text to the output, then + * advance the shader pointer to the character after the + * newline. + */ + if (backslash[1] == '\r' || backslash[1] == '\n') + { + collapsed_newlines++; + ralloc_strncat(&clean, shader, backslash - shader); + shader = skip_newline (backslash + 1); + search_start = shader; + } + } + + ralloc_strcat(&clean, shader); + + return clean; +} + +int +glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log, + const struct gl_extensions *extensions, struct gl_context *gl_ctx) +{ + int errors; + glcpp_parser_t *parser = glcpp_parser_create (extensions, gl_ctx->API); + + if (! gl_ctx->Const.DisableGLSLLineContinuations) + *shader = remove_line_continuations(parser, *shader); + + glcpp_lex_set_source_string (parser, *shader); + + glcpp_parser_parse (parser); + + if (parser->skip_stack) + glcpp_error (&parser->skip_stack->loc, parser, "Unterminated #if\n"); + + glcpp_parser_resolve_implicit_version(parser); + + ralloc_strcat(info_log, parser->info_log); + + ralloc_steal(ralloc_ctx, parser->output); + *shader = parser->output; + + errors = parser->error; + glcpp_parser_destroy (parser); + return errors; +} diff --git a/src/compiler/glsl/glcpp/tests/.gitignore b/src/compiler/glsl/glcpp/tests/.gitignore new file mode 100644 index 00000000000..3802c850a3e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/.gitignore @@ -0,0 +1,4 @@ +subtest-cr/ +subtest-lf/ +subtest-cr-lf/ +subtest-lf-cr/ diff --git a/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c b/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c new file mode 100644 index 00000000000..1f2320e6fc1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c @@ -0,0 +1 @@ + this is four tokens with spaces diff --git a/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c.expected b/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c.expected new file mode 100644 index 00000000000..00791910ed5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/000-content-with-spaces.c.expected @@ -0,0 +1 @@ + this is four tokens with spaces diff --git a/src/compiler/glsl/glcpp/tests/001-define.c b/src/compiler/glsl/glcpp/tests/001-define.c new file mode 100644 index 00000000000..cbf2fee0e75 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/001-define.c @@ -0,0 +1,2 @@ +#define foo 1 +foo diff --git a/src/compiler/glsl/glcpp/tests/001-define.c.expected b/src/compiler/glsl/glcpp/tests/001-define.c.expected new file mode 100644 index 00000000000..a464d9da742 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/001-define.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/src/compiler/glsl/glcpp/tests/002-define-chain.c b/src/compiler/glsl/glcpp/tests/002-define-chain.c new file mode 100644 index 00000000000..87d75c68751 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/002-define-chain.c @@ -0,0 +1,3 @@ +#define foo 1 +#define bar foo +bar diff --git a/src/compiler/glsl/glcpp/tests/002-define-chain.c.expected b/src/compiler/glsl/glcpp/tests/002-define-chain.c.expected new file mode 100644 index 00000000000..c6c9ee38a9e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/002-define-chain.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c b/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c new file mode 100644 index 00000000000..a18b724eca0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c @@ -0,0 +1,3 @@ +#define bar foo +#define foo 1 +bar diff --git a/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c.expected b/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c.expected new file mode 100644 index 00000000000..c6c9ee38a9e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/003-define-chain-reverse.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/src/compiler/glsl/glcpp/tests/004-define-recursive.c b/src/compiler/glsl/glcpp/tests/004-define-recursive.c new file mode 100644 index 00000000000..2ac56ea3dcf --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/004-define-recursive.c @@ -0,0 +1,6 @@ +#define foo bar +#define bar baz +#define baz foo +foo +bar +baz diff --git a/src/compiler/glsl/glcpp/tests/004-define-recursive.c.expected b/src/compiler/glsl/glcpp/tests/004-define-recursive.c.expected new file mode 100644 index 00000000000..2d07687f8ca --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/004-define-recursive.c.expected @@ -0,0 +1,6 @@ + + + +foo +bar +baz diff --git a/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c b/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c new file mode 100644 index 00000000000..f5521df968d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c @@ -0,0 +1,3 @@ +#define foo 1 +#define bar a foo +bar diff --git a/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c.expected b/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c.expected new file mode 100644 index 00000000000..892975c268c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/005-define-composite-chain.c.expected @@ -0,0 +1,3 @@ + + +a 1 diff --git a/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c b/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c new file mode 100644 index 00000000000..4bb91a1221a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c @@ -0,0 +1,3 @@ +#define bar a foo +#define foo 1 +bar diff --git a/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected b/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected new file mode 100644 index 00000000000..892975c268c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected @@ -0,0 +1,3 @@ + + +a 1 diff --git a/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c b/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c new file mode 100644 index 00000000000..5784565bdf3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c @@ -0,0 +1,6 @@ +#define foo a bar +#define bar b baz +#define baz c foo +foo +bar +baz diff --git a/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c.expected b/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c.expected new file mode 100644 index 00000000000..0b0b477d9df --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/007-define-composite-recursive.c.expected @@ -0,0 +1,6 @@ + + + +a b c foo +b c a bar +c a b baz diff --git a/src/compiler/glsl/glcpp/tests/008-define-empty.c b/src/compiler/glsl/glcpp/tests/008-define-empty.c new file mode 100644 index 00000000000..b1bd17ec215 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/008-define-empty.c @@ -0,0 +1,2 @@ +#define foo +foo diff --git a/src/compiler/glsl/glcpp/tests/008-define-empty.c.expected b/src/compiler/glsl/glcpp/tests/008-define-empty.c.expected new file mode 100644 index 00000000000..d148bc8e800 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/008-define-empty.c.expected @@ -0,0 +1,2 @@ + + diff --git a/src/compiler/glsl/glcpp/tests/009-undef.c b/src/compiler/glsl/glcpp/tests/009-undef.c new file mode 100644 index 00000000000..3fc1fb44243 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/009-undef.c @@ -0,0 +1,4 @@ +#define foo 1 +foo +#undef foo +foo diff --git a/src/compiler/glsl/glcpp/tests/009-undef.c.expected b/src/compiler/glsl/glcpp/tests/009-undef.c.expected new file mode 100644 index 00000000000..9c0b35a4518 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/009-undef.c.expected @@ -0,0 +1,4 @@ + +1 + +foo diff --git a/src/compiler/glsl/glcpp/tests/010-undef-re-define.c b/src/compiler/glsl/glcpp/tests/010-undef-re-define.c new file mode 100644 index 00000000000..32ff73798b1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/010-undef-re-define.c @@ -0,0 +1,6 @@ +#define foo 1 +foo +#undef foo +foo +#define foo 2 +foo diff --git a/src/compiler/glsl/glcpp/tests/010-undef-re-define.c.expected b/src/compiler/glsl/glcpp/tests/010-undef-re-define.c.expected new file mode 100644 index 00000000000..5970f49028e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/010-undef-re-define.c.expected @@ -0,0 +1,6 @@ + +1 + +foo + +2 diff --git a/src/compiler/glsl/glcpp/tests/011-define-func-empty.c b/src/compiler/glsl/glcpp/tests/011-define-func-empty.c new file mode 100644 index 00000000000..d9ce13c2284 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/011-define-func-empty.c @@ -0,0 +1,2 @@ +#define foo() +foo() diff --git a/src/compiler/glsl/glcpp/tests/011-define-func-empty.c.expected b/src/compiler/glsl/glcpp/tests/011-define-func-empty.c.expected new file mode 100644 index 00000000000..d148bc8e800 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/011-define-func-empty.c.expected @@ -0,0 +1,2 @@ + + diff --git a/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c b/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c new file mode 100644 index 00000000000..c2bb730b115 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c @@ -0,0 +1,2 @@ +#define foo() bar +foo() diff --git a/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c.expected b/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c.expected new file mode 100644 index 00000000000..9f075f26004 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/012-define-func-no-args.c.expected @@ -0,0 +1,2 @@ + +bar diff --git a/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c b/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c new file mode 100644 index 00000000000..f78fb8b118a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c @@ -0,0 +1,2 @@ +#define foo(x) 1 +foo(bar) diff --git a/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected b/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected new file mode 100644 index 00000000000..a464d9da742 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c b/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c new file mode 100644 index 00000000000..11feb2624b7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c @@ -0,0 +1,2 @@ +#define foo(x,y) 1 +foo(bar,baz) diff --git a/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected b/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected new file mode 100644 index 00000000000..a464d9da742 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected @@ -0,0 +1,2 @@ + +1 diff --git a/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c b/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c new file mode 100644 index 00000000000..558da9c617b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c @@ -0,0 +1,4 @@ +#define foo ()1 +foo() +#define bar ()2 +bar() diff --git a/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c.expected b/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c.expected new file mode 100644 index 00000000000..a70321a4c51 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/015-define-object-with-parens.c.expected @@ -0,0 +1,4 @@ + +()1() + +()2() diff --git a/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c b/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c new file mode 100644 index 00000000000..a2e2404c7c1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c @@ -0,0 +1,2 @@ +#define foo(x) ((x)+1) +foo(bar) diff --git a/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c.expected b/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c.expected new file mode 100644 index 00000000000..6bfe04f7381 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/016-define-func-1-arg.c.expected @@ -0,0 +1,2 @@ + +((bar)+1) diff --git a/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c b/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c new file mode 100644 index 00000000000..c7253835278 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c @@ -0,0 +1,2 @@ +#define foo(x,y) ((x)*(y)) +foo(bar,baz) diff --git a/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c.expected b/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c.expected new file mode 100644 index 00000000000..f7a2b8c26cb --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/017-define-func-2-args.c.expected @@ -0,0 +1,2 @@ + +((bar)*(baz)) diff --git a/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c b/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c new file mode 100644 index 00000000000..668130b8f9b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c @@ -0,0 +1,3 @@ +#define x 0 +#define foo(x) x +foo(1) diff --git a/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected b/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected new file mode 100644 index 00000000000..c6c9ee38a9e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected @@ -0,0 +1,3 @@ + + +1 diff --git a/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c b/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c new file mode 100644 index 00000000000..c4e62b25508 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c @@ -0,0 +1,2 @@ +#define foo(x) (x) +foo(this is more than one word) diff --git a/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected b/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected new file mode 100644 index 00000000000..1e89b8cfd0c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected @@ -0,0 +1,2 @@ + +(this is more than one word) diff --git a/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c b/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c new file mode 100644 index 00000000000..3049ad15465 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c @@ -0,0 +1,2 @@ +#define foo(x,y) x,two fish,red fish,y +foo(one fish, blue fish) diff --git a/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected b/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected new file mode 100644 index 00000000000..19f59f5ecb7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected @@ -0,0 +1,2 @@ + +one fish,two fish,red fish,blue fish diff --git a/src/compiler/glsl/glcpp/tests/021-define-func-compose.c b/src/compiler/glsl/glcpp/tests/021-define-func-compose.c new file mode 100644 index 00000000000..21ddd0e65f9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/021-define-func-compose.c @@ -0,0 +1,3 @@ +#define bar(x) (1+(x)) +#define foo(y) (2*(y)) +foo(bar(3)) diff --git a/src/compiler/glsl/glcpp/tests/021-define-func-compose.c.expected b/src/compiler/glsl/glcpp/tests/021-define-func-compose.c.expected new file mode 100644 index 00000000000..87f51f0baca --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/021-define-func-compose.c.expected @@ -0,0 +1,3 @@ + + +(2*((1+(3)))) diff --git a/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c b/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c new file mode 100644 index 00000000000..c20d73a4a28 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c @@ -0,0 +1,2 @@ +#define foo(x) (x) +foo(argument(including parens)for the win) diff --git a/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected b/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected new file mode 100644 index 00000000000..1dfc6698bb7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected @@ -0,0 +1,2 @@ + +(argument(including parens)for the win) diff --git a/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c b/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c new file mode 100644 index 00000000000..7ebfed6516c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c @@ -0,0 +1,8 @@ +#define noargs() 1 +# define onearg(foo) foo + # define twoargs( x , y ) x y + # define threeargs( a , b , c ) a b c +noargs ( ) +onearg ( 2 ) +twoargs ( 3 , 4 ) +threeargs ( 5 , 6 , 7 ) diff --git a/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c.expected b/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c.expected new file mode 100644 index 00000000000..9c58275d0f9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/023-define-extra-whitespace.c.expected @@ -0,0 +1,8 @@ + + + + +1 +2 +3 4 +5 6 7 diff --git a/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c b/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c new file mode 100644 index 00000000000..e788adce30c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c @@ -0,0 +1,3 @@ +#define foo foo +#define bar foo +bar diff --git a/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected b/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected new file mode 100644 index 00000000000..15600af546b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected @@ -0,0 +1,3 @@ + + +foo diff --git a/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c b/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c new file mode 100644 index 00000000000..b433671d1bf --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo bar diff --git a/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected b/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected new file mode 100644 index 00000000000..4a59f0520e3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected @@ -0,0 +1,2 @@ + +foo bar diff --git a/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c b/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c new file mode 100644 index 00000000000..0d837405309 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c @@ -0,0 +1,6 @@ +#define foo(a) bar + +foo +( +1 +) diff --git a/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected b/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected new file mode 100644 index 00000000000..5e3c70f2cc5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected @@ -0,0 +1,3 @@ + + +bar diff --git a/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c b/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c new file mode 100644 index 00000000000..5ccb52caba5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure() +foo diff --git a/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected b/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected new file mode 100644 index 00000000000..94c15f95059 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c b/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c new file mode 100644 index 00000000000..44962a71876 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c @@ -0,0 +1,3 @@ +#define success() failure +#define foo success +foo diff --git a/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected b/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected new file mode 100644 index 00000000000..94c15f95059 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c b/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c new file mode 100644 index 00000000000..261f7d28fc2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c @@ -0,0 +1,3 @@ +#define bar(failure) failure +#define foo bar(success) +foo diff --git a/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected b/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected new file mode 100644 index 00000000000..94c15f95059 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c b/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c new file mode 100644 index 00000000000..e56fbefd62d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c @@ -0,0 +1,4 @@ +#define baz(failure) failure +#define bar(failure) failure +#define foo bar(baz(success)) +foo diff --git a/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected b/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected new file mode 100644 index 00000000000..bed826e7831 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c b/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c new file mode 100644 index 00000000000..3f4c8744dff --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c @@ -0,0 +1,4 @@ +#define baz(failure) failure +#define bar(failure) failure +#define foo() bar(baz(success)) +foo() diff --git a/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected b/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected new file mode 100644 index 00000000000..bed826e7831 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c b/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c new file mode 100644 index 00000000000..b3ac70f499c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c @@ -0,0 +1,2 @@ +#define foo(a) foo(2*(a)) +foo(3) diff --git a/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c.expected b/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c.expected new file mode 100644 index 00000000000..983f9417401 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/032-define-func-self-recurse.c.expected @@ -0,0 +1,2 @@ + +foo(2*(3)) diff --git a/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c b/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c new file mode 100644 index 00000000000..f65e48286cf --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c @@ -0,0 +1,2 @@ +#define foo(a) foo(2*(a)) +foo(foo(3)) diff --git a/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c.expected b/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c.expected new file mode 100644 index 00000000000..08183623643 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/033-define-func-self-compose.c.expected @@ -0,0 +1,2 @@ + +foo(2*(foo(2*(3)))) diff --git a/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c b/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c new file mode 100644 index 00000000000..209a5f7e07c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo(foo) diff --git a/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected b/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected new file mode 100644 index 00000000000..3f808fe665d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected @@ -0,0 +1,2 @@ + +foo diff --git a/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c b/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c new file mode 100644 index 00000000000..c307fbe830f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c @@ -0,0 +1,2 @@ +#define foo(bar) bar +foo(1+foo) diff --git a/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected b/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected new file mode 100644 index 00000000000..09dfdd64e9b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected @@ -0,0 +1,2 @@ + +1+foo diff --git a/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c b/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c new file mode 100644 index 00000000000..b21ff336738 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c @@ -0,0 +1,3 @@ +#define bar success +#define foo(x) x +foo(more bar) diff --git a/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected b/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected new file mode 100644 index 00000000000..580ed9599c5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected @@ -0,0 +1,3 @@ + + +more success diff --git a/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c b/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c new file mode 100644 index 00000000000..b3a2f37f1b9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c @@ -0,0 +1,3 @@ +#define expand(x) expand(x once) +#define foo(x) x +foo(expand(just)) diff --git a/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected b/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected new file mode 100644 index 00000000000..e804d7e4f9f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected @@ -0,0 +1,3 @@ + + +expand(just once) diff --git a/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c b/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c new file mode 100644 index 00000000000..1407c7d6e3c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c @@ -0,0 +1,2 @@ +#define foo(x) success +foo(argument (with,embedded , commas) -- tricky) diff --git a/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c.expected b/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c.expected new file mode 100644 index 00000000000..6544adb3a25 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/038-func-arg-with-commas.c.expected @@ -0,0 +1,2 @@ + +success diff --git a/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c b/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c new file mode 100644 index 00000000000..a7c053bb402 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c @@ -0,0 +1,24 @@ +/* This works. */ +#define foo(a) (a) +#define bar two,words +foo(bar) + +/* So does this. */ +#define foo2(a,b) (a separate b) +#define foo2_wrap(a) foo2(a) +foo2_wrap(bar) + +/* But this generates an error. */ +#define foo_wrap(a) foo(a) +foo_wrap(bar) + +/* Adding parentheses to foo_wrap fixes it. */ +#define foo_wrap_parens(a) foo((a)) +foo_wrap_parens(bar) + +/* As does adding parentheses to bar */ +#define bar_parens (two,words) +foo_wrap(bar_parens) +foo_wrap_parens(bar_parens) + + diff --git a/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected b/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected new file mode 100644 index 00000000000..4cc795338b2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected @@ -0,0 +1,26 @@ +0:12(21): preprocessor error: Error: macro foo invoked with 2 arguments (expected 1) + + + + +(two,words) + + + + +(two separate words) + + + +foo(two,words) + + + +((two,words)) + + + +((two,words)) +(((two,words))) + + diff --git a/src/compiler/glsl/glcpp/tests/040-token-pasting.c b/src/compiler/glsl/glcpp/tests/040-token-pasting.c new file mode 100644 index 00000000000..caab3ba7368 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/040-token-pasting.c @@ -0,0 +1,2 @@ +#define paste(a,b) a ## b +paste(one , token) diff --git a/src/compiler/glsl/glcpp/tests/040-token-pasting.c.expected b/src/compiler/glsl/glcpp/tests/040-token-pasting.c.expected new file mode 100644 index 00000000000..48e836ec3fa --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/040-token-pasting.c.expected @@ -0,0 +1,2 @@ + +onetoken diff --git a/src/compiler/glsl/glcpp/tests/041-if-0.c b/src/compiler/glsl/glcpp/tests/041-if-0.c new file mode 100644 index 00000000000..2cab677d3e8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/041-if-0.c @@ -0,0 +1,5 @@ +success_1 +#if 0 +failure +#endif +success_2 diff --git a/src/compiler/glsl/glcpp/tests/041-if-0.c.expected b/src/compiler/glsl/glcpp/tests/041-if-0.c.expected new file mode 100644 index 00000000000..8b506b32d55 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/041-if-0.c.expected @@ -0,0 +1,5 @@ +success_1 + + + +success_2 diff --git a/src/compiler/glsl/glcpp/tests/042-if-1.c b/src/compiler/glsl/glcpp/tests/042-if-1.c new file mode 100644 index 00000000000..874a25cf41b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/042-if-1.c @@ -0,0 +1,5 @@ +success_1 +#if 1 +success_2 +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/042-if-1.c.expected b/src/compiler/glsl/glcpp/tests/042-if-1.c.expected new file mode 100644 index 00000000000..a6ae9465a97 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/042-if-1.c.expected @@ -0,0 +1,5 @@ +success_1 + +success_2 + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/043-if-0-else.c b/src/compiler/glsl/glcpp/tests/043-if-0-else.c new file mode 100644 index 00000000000..323351f9dbf --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/043-if-0-else.c @@ -0,0 +1,7 @@ +success_1 +#if 0 +failure +#else +success_2 +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/043-if-0-else.c.expected b/src/compiler/glsl/glcpp/tests/043-if-0-else.c.expected new file mode 100644 index 00000000000..3d7e6be96c8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/043-if-0-else.c.expected @@ -0,0 +1,7 @@ +success_1 + + + +success_2 + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/044-if-1-else.c b/src/compiler/glsl/glcpp/tests/044-if-1-else.c new file mode 100644 index 00000000000..28dfc25c6f0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/044-if-1-else.c @@ -0,0 +1,7 @@ +success_1 +#if 1 +success_2 +#else +failure +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/044-if-1-else.c.expected b/src/compiler/glsl/glcpp/tests/044-if-1-else.c.expected new file mode 100644 index 00000000000..4a31e1cfa9e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/044-if-1-else.c.expected @@ -0,0 +1,7 @@ +success_1 + +success_2 + + + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/045-if-0-elif.c b/src/compiler/glsl/glcpp/tests/045-if-0-elif.c new file mode 100644 index 00000000000..e50f686d461 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/045-if-0-elif.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#elif 0 +failure_2 +#elif 1 +success_3 +#elif 1 +failure_3 +#endif +success_4 diff --git a/src/compiler/glsl/glcpp/tests/045-if-0-elif.c.expected b/src/compiler/glsl/glcpp/tests/045-if-0-elif.c.expected new file mode 100644 index 00000000000..a9bb1588e4f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/045-if-0-elif.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + +success_3 + + + +success_4 diff --git a/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c b/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c new file mode 100644 index 00000000000..130515a01ea --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c @@ -0,0 +1,11 @@ +success_1 +#if 1 +success_2 +#elif 0 +failure_1 +#elif 1 +failure_2 +#elif 0 +failure_3 +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c.expected b/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c.expected new file mode 100644 index 00000000000..a4995713ca5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/046-if-1-elsif.c.expected @@ -0,0 +1,11 @@ +success_1 + +success_2 + + + + + + + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/047-if-elif-else.c b/src/compiler/glsl/glcpp/tests/047-if-elif-else.c new file mode 100644 index 00000000000..e8f0838a9ed --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/047-if-elif-else.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#elif 0 +failure_2 +#elif 0 +failure_3 +#else +success_2 +#endif +success_3 diff --git a/src/compiler/glsl/glcpp/tests/047-if-elif-else.c.expected b/src/compiler/glsl/glcpp/tests/047-if-elif-else.c.expected new file mode 100644 index 00000000000..54d30861197 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/047-if-elif-else.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + + + +success_2 + +success_3 diff --git a/src/compiler/glsl/glcpp/tests/048-if-nested.c b/src/compiler/glsl/glcpp/tests/048-if-nested.c new file mode 100644 index 00000000000..fc4679c3be4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/048-if-nested.c @@ -0,0 +1,11 @@ +success_1 +#if 0 +failure_1 +#if 1 +failure_2 +#else +failure_3 +#endif +failure_4 +#endif +success_2 diff --git a/src/compiler/glsl/glcpp/tests/048-if-nested.c.expected b/src/compiler/glsl/glcpp/tests/048-if-nested.c.expected new file mode 100644 index 00000000000..8beb9c32c37 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/048-if-nested.c.expected @@ -0,0 +1,11 @@ +success_1 + + + + + + + + + +success_2 diff --git a/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c b/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c new file mode 100644 index 00000000000..833ea03882a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c @@ -0,0 +1,5 @@ +#if 1 + 2 * 3 + - (25 % 17 - + 1) +failure with operator precedence +#else +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c.expected b/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c.expected new file mode 100644 index 00000000000..729bdd15f80 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/049-if-expression-precedence.c.expected @@ -0,0 +1,5 @@ + + + +success + diff --git a/src/compiler/glsl/glcpp/tests/050-if-defined.c b/src/compiler/glsl/glcpp/tests/050-if-defined.c new file mode 100644 index 00000000000..34f0f95140e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/050-if-defined.c @@ -0,0 +1,17 @@ +#if defined foo +failure_1 +#else +success_1 +#endif +#define foo +#if defined foo +success_2 +#else +failure_2 +#endif +#undef foo +#if defined foo +failure_3 +#else +success_3 +#endif diff --git a/src/compiler/glsl/glcpp/tests/050-if-defined.c.expected b/src/compiler/glsl/glcpp/tests/050-if-defined.c.expected new file mode 100644 index 00000000000..737eb8d9403 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/050-if-defined.c.expected @@ -0,0 +1,17 @@ + + + +success_1 + + + +success_2 + + + + + + + +success_3 + diff --git a/src/compiler/glsl/glcpp/tests/051-if-relational.c b/src/compiler/glsl/glcpp/tests/051-if-relational.c new file mode 100644 index 00000000000..c3db488e0de --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/051-if-relational.c @@ -0,0 +1,35 @@ +#if 3 < 2 +failure_1 +#else +success_1 +#endif + +#if 3 >= 2 +success_2 +#else +failure_2 +#endif + +#if 2 + 3 <= 5 +success_3 +#else +failure_3 +#endif + +#if 3 - 2 == 1 +success_3 +#else +failure_3 +#endif + +#if 1 > 3 +failure_4 +#else +success_4 +#endif + +#if 1 != 5 +success_5 +#else +failure_5 +#endif diff --git a/src/compiler/glsl/glcpp/tests/051-if-relational.c.expected b/src/compiler/glsl/glcpp/tests/051-if-relational.c.expected new file mode 100644 index 00000000000..652fefdd43b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/051-if-relational.c.expected @@ -0,0 +1,35 @@ + + + +success_1 + + + +success_2 + + + + + +success_3 + + + + + +success_3 + + + + + + + +success_4 + + + +success_5 + + + diff --git a/src/compiler/glsl/glcpp/tests/052-if-bitwise.c b/src/compiler/glsl/glcpp/tests/052-if-bitwise.c new file mode 100644 index 00000000000..2d8e45eb61e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/052-if-bitwise.c @@ -0,0 +1,20 @@ +#if (0xaaaaaaaa | 0x55555555) != 4294967295 +failure_1 +#else +success_1 +#endif +#if (0x12345678 ^ 0xfdecba98) == 4023971040 +success_2 +#else +failure_2 +#endif +#if (~ 0xdeadbeef) != -3735928560 +failure_3 +#else +success_3 +#endif +#if (0667 & 0733) == 403 +success_4 +#else +failure_4 +#endif diff --git a/src/compiler/glsl/glcpp/tests/052-if-bitwise.c.expected b/src/compiler/glsl/glcpp/tests/052-if-bitwise.c.expected new file mode 100644 index 00000000000..44e52b206e5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/052-if-bitwise.c.expected @@ -0,0 +1,20 @@ + + + +success_1 + + +success_2 + + + + + + +success_3 + + +success_4 + + + diff --git a/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c b/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c new file mode 100644 index 00000000000..d24c54a88d1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c @@ -0,0 +1,15 @@ +#if (15 / 2) != 7 +failure_1 +#else +success_1 +#endif +#if (1 << 12) == 4096 +success_2 +#else +failure_2 +#endif +#if (31762 >> 8) != 124 +failure_3 +#else +success_3 +#endif diff --git a/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c.expected b/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c.expected new file mode 100644 index 00000000000..7e78e0454e0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/053-if-divide-and-shift.c.expected @@ -0,0 +1,15 @@ + + + +success_1 + + +success_2 + + + + + + +success_3 + diff --git a/src/compiler/glsl/glcpp/tests/054-if-with-macros.c b/src/compiler/glsl/glcpp/tests/054-if-with-macros.c new file mode 100644 index 00000000000..3da79a0d96e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/054-if-with-macros.c @@ -0,0 +1,34 @@ +#define one 1 +#define two 2 +#define three 3 +#define five 5 +#if five < two +failure_1 +#else +success_1 +#endif +#if three >= two +success_2 +#else +failure_2 +#endif +#if two + three <= five +success_3 +#else +failure_3 +#endif +#if five - two == three +success_4 +#else +failure_4 +#endif +#if one > three +failure_5 +#else +success_5 +#endif +#if one != five +success_6 +#else +failure_6 +#endif diff --git a/src/compiler/glsl/glcpp/tests/054-if-with-macros.c.expected b/src/compiler/glsl/glcpp/tests/054-if-with-macros.c.expected new file mode 100644 index 00000000000..70f737c90a9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/054-if-with-macros.c.expected @@ -0,0 +1,34 @@ + + + + + + + +success_1 + + +success_2 + + + + +success_3 + + + + +success_4 + + + + + + +success_5 + + +success_6 + + + diff --git a/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c b/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c new file mode 100644 index 00000000000..00f2c2346d6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c @@ -0,0 +1,3 @@ +#define failure() success +#define foo failure +foo() diff --git a/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected b/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected new file mode 100644 index 00000000000..94c15f95059 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected @@ -0,0 +1,3 @@ + + +success diff --git a/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c b/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c new file mode 100644 index 00000000000..58701d1f25b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c @@ -0,0 +1,4 @@ +#define bar with,embedded,commas +#define function(x) success +#define foo function +foo(bar) diff --git a/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected b/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected new file mode 100644 index 00000000000..bed826e7831 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected @@ -0,0 +1,4 @@ + + + +success diff --git a/src/compiler/glsl/glcpp/tests/057-empty-arguments.c b/src/compiler/glsl/glcpp/tests/057-empty-arguments.c new file mode 100644 index 00000000000..6140232865d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/057-empty-arguments.c @@ -0,0 +1,6 @@ +#define zero() success +zero() +#define one(x) success +one() +#define two(x,y) success +two(,) diff --git a/src/compiler/glsl/glcpp/tests/057-empty-arguments.c.expected b/src/compiler/glsl/glcpp/tests/057-empty-arguments.c.expected new file mode 100644 index 00000000000..7d97e15e29d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/057-empty-arguments.c.expected @@ -0,0 +1,6 @@ + +success + +success + +success diff --git a/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c b/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c new file mode 100644 index 00000000000..8ac260c76b6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c @@ -0,0 +1,5 @@ +#define paste(x,y) x ## y +paste(a,b) +paste(a,) +paste(,b) +paste(,) diff --git a/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected b/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected new file mode 100644 index 00000000000..e0967a1b951 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected @@ -0,0 +1,5 @@ + +ab +a +b + diff --git a/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c b/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c new file mode 100644 index 00000000000..37b895a4237 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c @@ -0,0 +1,4 @@ +#define paste(x,y) x ## y +paste(1,2) +paste(1,000) +paste(identifier,2) diff --git a/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c.expected b/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c.expected new file mode 100644 index 00000000000..f1288aa7cb7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/059-token-pasting-integer.c.expected @@ -0,0 +1,4 @@ + +12 +1000 +identifier2 diff --git a/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c b/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c new file mode 100644 index 00000000000..ed80ea879ce --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c @@ -0,0 +1,3 @@ +#define double(a) a*2 +#define foo double( +foo 5) diff --git a/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected b/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected new file mode 100644 index 00000000000..3e5501aa6e8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected @@ -0,0 +1,3 @@ + + +5*2 diff --git a/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c b/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c new file mode 100644 index 00000000000..6dbfd1f62d1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c @@ -0,0 +1,5 @@ +#define foo(x) success +#define bar foo +#define baz bar +#define joe baz +joe (failure) diff --git a/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected b/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected new file mode 100644 index 00000000000..15eb64b97f1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected @@ -0,0 +1,5 @@ + + + + +success diff --git a/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c b/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c new file mode 100644 index 00000000000..d9e439bb890 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c @@ -0,0 +1,5 @@ +#define foo(a,b) +#if 0 +foo(bar) +foo( +#endif diff --git a/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected b/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected new file mode 100644 index 00000000000..3f2ff2d6cc8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected @@ -0,0 +1,5 @@ + + + + + diff --git a/src/compiler/glsl/glcpp/tests/063-comments.c b/src/compiler/glsl/glcpp/tests/063-comments.c new file mode 100644 index 00000000000..e641d2f0f9e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/063-comments.c @@ -0,0 +1,20 @@ +/* this is a comment */ +// so is this +// */ +f = g/**//h; +/*//*/l(); +m = n//**/o ++ p; +/* this +comment spans +multiple lines and +contains *** stars +and slashes / *** / +and other stuff. +****/ +more code here +/* Test that /* nested + comments */ +are not treated like comments. +/*/ this is a comment */ +/*/*/ diff --git a/src/compiler/glsl/glcpp/tests/063-comments.c.expected b/src/compiler/glsl/glcpp/tests/063-comments.c.expected new file mode 100644 index 00000000000..f6e10ce0377 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/063-comments.c.expected @@ -0,0 +1,20 @@ + + + +f = g /h; + l(); +m = n ++ p; + + + + + + + +more code here + + +are not treated like comments. + + diff --git a/src/compiler/glsl/glcpp/tests/064-version.c b/src/compiler/glsl/glcpp/tests/064-version.c new file mode 100644 index 00000000000..21326481b87 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/064-version.c @@ -0,0 +1,2 @@ +#version 130 +#define FOO diff --git a/src/compiler/glsl/glcpp/tests/064-version.c.expected b/src/compiler/glsl/glcpp/tests/064-version.c.expected new file mode 100644 index 00000000000..4036b1ee374 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/064-version.c.expected @@ -0,0 +1,2 @@ +#version 130 + diff --git a/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c b/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c new file mode 100644 index 00000000000..48aa0f8c3ef --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c @@ -0,0 +1,17 @@ +#if defined(foo) +failure_1 +#else +success_1 +#endif +#define foo +#if defined ( foo ) +success_2 +#else +failure_2 +#endif +#undef foo +#if defined (foo) +failure_3 +#else +success_3 +#endif diff --git a/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c.expected b/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c.expected new file mode 100644 index 00000000000..737eb8d9403 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/065-if-defined-parens.c.expected @@ -0,0 +1,17 @@ + + + +success_1 + + + +success_2 + + + + + + + +success_3 + diff --git a/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c b/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c new file mode 100644 index 00000000000..3b0b47349d0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c @@ -0,0 +1,3 @@ +#if(1) +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c.expected b/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c.expected new file mode 100644 index 00000000000..5a28fb3b66c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/066-if-nospace-expression.c.expected @@ -0,0 +1,3 @@ + +success + diff --git a/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c b/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c new file mode 100644 index 00000000000..f46cce4e60a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c @@ -0,0 +1,40 @@ +#define D1 +#define D2 + +#define result success + +#ifdef U1 +#ifdef U2 +#undef result +#define result failure +#endif +#endif +result + +#ifndef D1 +#ifndef D2 +#undef result +#define result failure +#endif +#endif +result + +#undef result +#define result failure +#ifdef D1 +#ifdef D2 +#undef result +#define result success +#endif +#endif +result + +#undef result +#define result failure +#ifndef U1 +#ifndef U2 +#undef result +#define result success +#endif +#endif +result diff --git a/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected b/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected new file mode 100644 index 00000000000..9a5ed2eb2d3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected @@ -0,0 +1,40 @@ + + + + + + + + + + + +success + + + + + + + +success + + + + + + + + + +success + + + + + + + + + +success diff --git a/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c b/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c new file mode 100644 index 00000000000..699ac5144e5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c @@ -0,0 +1,11 @@ +#define empty +empty> +>empty= +=empty= +!empty= +&empty& +|empty| ++empty+ +-empty- diff --git a/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c.expected b/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c.expected new file mode 100644 index 00000000000..27582cda5e8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/068-accidental-pasting.c.expected @@ -0,0 +1,11 @@ + +< < +< = +> > +> = += = +! = +& & +| | ++ + +- - diff --git a/src/compiler/glsl/glcpp/tests/069-repeated-argument.c b/src/compiler/glsl/glcpp/tests/069-repeated-argument.c new file mode 100644 index 00000000000..2b46ead294c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/069-repeated-argument.c @@ -0,0 +1,2 @@ +#define double(x) x x +double(1) diff --git a/src/compiler/glsl/glcpp/tests/069-repeated-argument.c.expected b/src/compiler/glsl/glcpp/tests/069-repeated-argument.c.expected new file mode 100644 index 00000000000..8b4b095e488 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/069-repeated-argument.c.expected @@ -0,0 +1,2 @@ + +1 1 diff --git a/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c b/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c new file mode 100644 index 00000000000..d15a4840b01 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c @@ -0,0 +1,5 @@ +#if UNDEFINED_MACRO +Failure +#else +Success +#endif diff --git a/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected b/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected new file mode 100644 index 00000000000..44b93a434fa --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected @@ -0,0 +1,5 @@ + + + +Success + diff --git a/src/compiler/glsl/glcpp/tests/071-punctuator.c b/src/compiler/glsl/glcpp/tests/071-punctuator.c new file mode 100644 index 00000000000..959d6825988 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/071-punctuator.c @@ -0,0 +1 @@ +a = b diff --git a/src/compiler/glsl/glcpp/tests/071-punctuator.c.expected b/src/compiler/glsl/glcpp/tests/071-punctuator.c.expected new file mode 100644 index 00000000000..959d6825988 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/071-punctuator.c.expected @@ -0,0 +1 @@ +a = b diff --git a/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c b/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c new file mode 100644 index 00000000000..e421e9d5e29 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c @@ -0,0 +1,2 @@ +#define paste(x) success_ ## x +paste(1) paste(2) paste(3) diff --git a/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c.expected b/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c.expected new file mode 100644 index 00000000000..7b80af7e465 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/072-token-pasting-same-line.c.expected @@ -0,0 +1,2 @@ + +success_1 success_2 success_3 diff --git a/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c b/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c new file mode 100644 index 00000000000..61a48097ca3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c @@ -0,0 +1,4 @@ +#ifdef UNDEF +#if UNDEF > 1 +#endif +#endif diff --git a/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c.expected b/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c.expected new file mode 100644 index 00000000000..fd40910d9e7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/073-if-in-ifdef.c.expected @@ -0,0 +1,4 @@ + + + + diff --git a/src/compiler/glsl/glcpp/tests/074-elif-undef.c b/src/compiler/glsl/glcpp/tests/074-elif-undef.c new file mode 100644 index 00000000000..67aac8977e0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/074-elif-undef.c @@ -0,0 +1,3 @@ +#ifndef UNDEF +#elif UNDEF < 0 +#endif diff --git a/src/compiler/glsl/glcpp/tests/074-elif-undef.c.expected b/src/compiler/glsl/glcpp/tests/074-elif-undef.c.expected new file mode 100644 index 00000000000..b28b04f6431 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/074-elif-undef.c.expected @@ -0,0 +1,3 @@ + + + diff --git a/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c b/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c new file mode 100644 index 00000000000..264bc4f10ee --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c @@ -0,0 +1,4 @@ +#ifndef UNDEF +#elif UNDEF < 0 +#elif UNDEF == 3 +#endif diff --git a/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c.expected b/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c.expected new file mode 100644 index 00000000000..fd40910d9e7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/075-elif-elif-undef.c.expected @@ -0,0 +1,4 @@ + + + + diff --git a/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c b/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c new file mode 100644 index 00000000000..ebd550ed005 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c @@ -0,0 +1,5 @@ +#ifdef UNDEF +#if UNDEF == 4 +#elif UNDEF == 5 +#endif +#endif diff --git a/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c.expected b/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c.expected new file mode 100644 index 00000000000..3f2ff2d6cc8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/076-elif-undef-nested.c.expected @@ -0,0 +1,5 @@ + + + + + diff --git a/src/compiler/glsl/glcpp/tests/077-else-without-if.c b/src/compiler/glsl/glcpp/tests/077-else-without-if.c new file mode 100644 index 00000000000..81f00bfe278 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/077-else-without-if.c @@ -0,0 +1 @@ +#else diff --git a/src/compiler/glsl/glcpp/tests/077-else-without-if.c.expected b/src/compiler/glsl/glcpp/tests/077-else-without-if.c.expected new file mode 100644 index 00000000000..69f34047033 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/077-else-without-if.c.expected @@ -0,0 +1,3 @@ +0:1(1): preprocessor error: #else without #if + + diff --git a/src/compiler/glsl/glcpp/tests/078-elif-without-if.c b/src/compiler/glsl/glcpp/tests/078-elif-without-if.c new file mode 100644 index 00000000000..60466b3890a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/078-elif-without-if.c @@ -0,0 +1 @@ +#elif defined FOO diff --git a/src/compiler/glsl/glcpp/tests/078-elif-without-if.c.expected b/src/compiler/glsl/glcpp/tests/078-elif-without-if.c.expected new file mode 100644 index 00000000000..b8e40ecc09b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/078-elif-without-if.c.expected @@ -0,0 +1,3 @@ +0:1(1): preprocessor error: #elif without #if + + diff --git a/src/compiler/glsl/glcpp/tests/079-endif-without-if.c b/src/compiler/glsl/glcpp/tests/079-endif-without-if.c new file mode 100644 index 00000000000..69331c3ca9d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/079-endif-without-if.c @@ -0,0 +1 @@ +#endif diff --git a/src/compiler/glsl/glcpp/tests/079-endif-without-if.c.expected b/src/compiler/glsl/glcpp/tests/079-endif-without-if.c.expected new file mode 100644 index 00000000000..7ae579dd25e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/079-endif-without-if.c.expected @@ -0,0 +1,3 @@ +0:1(1): preprocessor error: #endif without #if + + diff --git a/src/compiler/glsl/glcpp/tests/080-if-without-expression.c b/src/compiler/glsl/glcpp/tests/080-if-without-expression.c new file mode 100644 index 00000000000..a27ba36a366 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/080-if-without-expression.c @@ -0,0 +1,4 @@ +/* Error message for unskipped #if with no expression. */ +#if +#endif + diff --git a/src/compiler/glsl/glcpp/tests/080-if-without-expression.c.expected b/src/compiler/glsl/glcpp/tests/080-if-without-expression.c.expected new file mode 100644 index 00000000000..2e4cd7323c5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/080-if-without-expression.c.expected @@ -0,0 +1,5 @@ +0:2(1): preprocessor error: #if with no expression + + + + diff --git a/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c b/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c new file mode 100644 index 00000000000..79c78663dd3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c @@ -0,0 +1,3 @@ +#if 0 +#elif +#endif diff --git a/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c.expected b/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c.expected new file mode 100644 index 00000000000..b607b849068 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/081-elif-without-expression.c.expected @@ -0,0 +1,4 @@ +0:2(1): preprocessor error: #elif with no expression + + + diff --git a/src/compiler/glsl/glcpp/tests/082-invalid-paste.c b/src/compiler/glsl/glcpp/tests/082-invalid-paste.c new file mode 100644 index 00000000000..8b84d50c3a0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/082-invalid-paste.c @@ -0,0 +1,7 @@ +#define PASTE(x,y) x ## y +PASTE(<,>) +PASTE(0,abc) +PASTE(1,=) +PASTE(2,@) +PASTE(3,-4) +PASTE(4,+5.2) diff --git a/src/compiler/glsl/glcpp/tests/082-invalid-paste.c.expected b/src/compiler/glsl/glcpp/tests/082-invalid-paste.c.expected new file mode 100644 index 00000000000..b48a2d6d296 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/082-invalid-paste.c.expected @@ -0,0 +1,19 @@ +0:2(7): preprocessor error: +Pasting "<" and ">" does not give a valid preprocessing token. +0:3(7): preprocessor error: +Pasting "0" and "abc" does not give a valid preprocessing token. +0:4(7): preprocessor error: +Pasting "1" and "=" does not give a valid preprocessing token. +0:5(7): preprocessor error: +Pasting "2" and "@" does not give a valid preprocessing token. +0:6(7): preprocessor error: +Pasting "3" and "-" does not give a valid preprocessing token. +0:7(7): preprocessor error: +Pasting "4" and "+" does not give a valid preprocessing token. + +< +0 +1 +2 +34 +45.2 diff --git a/src/compiler/glsl/glcpp/tests/083-unterminated-if.c b/src/compiler/glsl/glcpp/tests/083-unterminated-if.c new file mode 100644 index 00000000000..91806350927 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/083-unterminated-if.c @@ -0,0 +1,2 @@ +#if 1 + diff --git a/src/compiler/glsl/glcpp/tests/083-unterminated-if.c.expected b/src/compiler/glsl/glcpp/tests/083-unterminated-if.c.expected new file mode 100644 index 00000000000..4659ab6fe67 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/083-unterminated-if.c.expected @@ -0,0 +1,4 @@ +0:1(6): preprocessor error: Unterminated #if + + + diff --git a/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c b/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c new file mode 100644 index 00000000000..0789ba5e525 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c @@ -0,0 +1,2 @@ +#define FUNC(x) (2*(x)) +FUNC(23 diff --git a/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected b/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected new file mode 100644 index 00000000000..af49a37369d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected @@ -0,0 +1,2 @@ +0:2(8): preprocessor error: syntax error, unexpected $end + diff --git a/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c b/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c new file mode 100644 index 00000000000..91bea600612 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c @@ -0,0 +1,5 @@ +#define MULT(x,y) ((x)*(y)) +MULT() +MULT(1) +MULT(1,2,3) + diff --git a/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c.expected b/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c.expected new file mode 100644 index 00000000000..d23845bfd49 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/085-incorrect-argument-count.c.expected @@ -0,0 +1,11 @@ +0:2(1): preprocessor error: Error: macro MULT invoked with 1 arguments (expected 2) + +0:3(1): preprocessor error: Error: macro MULT invoked with 1 arguments (expected 2) + +0:4(1): preprocessor error: Error: macro MULT invoked with 3 arguments (expected 2) + + +MULT() +MULT(1) +MULT(1,2,3) + diff --git a/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c b/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c new file mode 100644 index 00000000000..a6b7201f95d --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c @@ -0,0 +1,3 @@ +#define __BAD reserved +#define GL_ALSO_BAD() also reserved +#define THIS__TOO__IS__BAD reserved diff --git a/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c.expected b/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c.expected new file mode 100644 index 00000000000..38b089daec3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/086-reserved-macro-names.c.expected @@ -0,0 +1,9 @@ +0:1(9): preprocessor warning: Macro names containing "__" are reserved for use by the implementation. + +0:2(9): preprocessor error: Macro names starting with "GL_" are reserved. + +0:3(9): preprocessor warning: Macro names containing "__" are reserved for use by the implementation. + + + + diff --git a/src/compiler/glsl/glcpp/tests/087-if-comments.c b/src/compiler/glsl/glcpp/tests/087-if-comments.c new file mode 100644 index 00000000000..ce8dc43057f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/087-if-comments.c @@ -0,0 +1,5 @@ +#if (1 == 0) // dangerous comment +fail +#else +win +#endif diff --git a/src/compiler/glsl/glcpp/tests/087-if-comments.c.expected b/src/compiler/glsl/glcpp/tests/087-if-comments.c.expected new file mode 100644 index 00000000000..2783a9c14d5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/087-if-comments.c.expected @@ -0,0 +1,5 @@ + + + +win + diff --git a/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c b/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c new file mode 100644 index 00000000000..422c6546414 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c @@ -0,0 +1,5 @@ +#define abc 123 +#define abc 123 + +#define foo(x) ( x ) + 23 +#define foo(x) ( x ) + 23 diff --git a/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected b/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected new file mode 100644 index 00000000000..3f2ff2d6cc8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected @@ -0,0 +1,5 @@ + + + + + diff --git a/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c b/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c new file mode 100644 index 00000000000..b3d1391e160 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c @@ -0,0 +1,17 @@ +#define x y +#define x z + +#define abc 123 +#define abc() 123 + +#define foo() bar +#define foo(x) bar + +#define bar() baz +#define bar baz + +#define biff(a,b) a+b +#define biff(a,b,c) a+b + +#define oper(a,b) a+b +#define oper(a,b) a*b diff --git a/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c.expected b/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c.expected new file mode 100644 index 00000000000..a945161497f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/089-redefine-macro-error.c.expected @@ -0,0 +1,29 @@ +0:2(9): preprocessor error: Redefinition of macro x + +0:5(9): preprocessor error: Redefinition of macro abc + +0:8(9): preprocessor error: Redefinition of macro foo + +0:11(9): preprocessor error: Redefinition of macro bar + +0:14(9): preprocessor error: Redefinition of macro biff + +0:17(9): preprocessor error: Redefinition of macro oper + + + + + + + + + + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/090-hash-error.c b/src/compiler/glsl/glcpp/tests/090-hash-error.c new file mode 100644 index 00000000000..d19bb7faed8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/090-hash-error.c @@ -0,0 +1 @@ +#error human error diff --git a/src/compiler/glsl/glcpp/tests/090-hash-error.c.expected b/src/compiler/glsl/glcpp/tests/090-hash-error.c.expected new file mode 100644 index 00000000000..876a6ea9cc5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/090-hash-error.c.expected @@ -0,0 +1 @@ +0:1(1): preprocessor error: #error human error diff --git a/src/compiler/glsl/glcpp/tests/091-hash-line.c b/src/compiler/glsl/glcpp/tests/091-hash-line.c new file mode 100644 index 00000000000..26d70382a89 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/091-hash-line.c @@ -0,0 +1,14 @@ +#line 0 +#error line 0 error +#line 25 +#error line 25 error +#line 0 1 +#error source 1, line 0 error +#line 30 2 +#error source 2, line 30 error +#line 45 2 /* A line with a comment */ +#define NINETY 90 +#define TWO 2 +#line NINETY TWO /* A #line line with macro expansion */ +#define FUNCTION_LIKE_MACRO(source, line) source line +#line FUNCTION_LIKE_MACRO(180,2) diff --git a/src/compiler/glsl/glcpp/tests/091-hash-line.c.expected b/src/compiler/glsl/glcpp/tests/091-hash-line.c.expected new file mode 100644 index 00000000000..ac9ab252f1e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/091-hash-line.c.expected @@ -0,0 +1,14 @@ +0:0(1): preprocessor error: #error line 0 error +0:25(1): preprocessor error: #error line 25 error +1:0(1): preprocessor error: #error source 1, line 0 error +2:30(1): preprocessor error: #error source 2, line 30 error +#line 0 +#line 25 +#line 0 1 +#line 30 2 +#line 45 2 + + +#line 90 2 + +#line 180 2 diff --git a/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c b/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c new file mode 100644 index 00000000000..3c161a5c501 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c @@ -0,0 +1,5 @@ +#define A +#define A 1 + +#define B 1 +#define B diff --git a/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected b/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected new file mode 100644 index 00000000000..698294d91f2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected @@ -0,0 +1,9 @@ +0:2(9): preprocessor error: Redefinition of macro A + +0:5(9): preprocessor error: Redefinition of macro B + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c b/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c new file mode 100644 index 00000000000..bf65d4f5271 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c @@ -0,0 +1,2 @@ +#if (1 / 0) +#endif diff --git a/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c.expected b/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c.expected new file mode 100644 index 00000000000..a858870b794 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/093-divide-by-zero.c.expected @@ -0,0 +1,3 @@ +0:1(12): preprocessor error: division by 0 in preprocessor directive + + diff --git a/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c b/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c new file mode 100644 index 00000000000..04497b17913 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c @@ -0,0 +1,13 @@ +/* glcpp is generating a division-by-zero error for this case. It's + * easy to argue that it should be short-circuiting the evaluation and + * not generating the diagnostic (which happens to be what gcc does). + * But it doesn't seem like we should force this behavior on our + * pre-processor, (and, as always, the GLSL specification of the + * pre-processor is too vague on this point). + * + * If a short-circuit evaluation optimization does get added to the + * pre-processor then it would legitimate to update the expected file + * for this test. +*/ +#if 1 || (1 / 0) +#endif diff --git a/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected b/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected new file mode 100644 index 00000000000..570952b2454 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected @@ -0,0 +1,14 @@ +0:12(17): preprocessor error: division by 0 in preprocessor directive + + + + + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/095-recursive-define.c b/src/compiler/glsl/glcpp/tests/095-recursive-define.c new file mode 100644 index 00000000000..801d90ce2e3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/095-recursive-define.c @@ -0,0 +1,3 @@ +#define A(a, b) B(a, b) +#define C A(0, C) +C diff --git a/src/compiler/glsl/glcpp/tests/095-recursive-define.c.expected b/src/compiler/glsl/glcpp/tests/095-recursive-define.c.expected new file mode 100644 index 00000000000..493ab091be7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/095-recursive-define.c.expected @@ -0,0 +1,3 @@ + + +B(0, C) diff --git a/src/compiler/glsl/glcpp/tests/096-paste-twice.c b/src/compiler/glsl/glcpp/tests/096-paste-twice.c new file mode 100644 index 00000000000..8da756fcba4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/096-paste-twice.c @@ -0,0 +1,3 @@ +#define paste_twice(a,b,c) a ## b ## c +paste_twice(just, one, token) + diff --git a/src/compiler/glsl/glcpp/tests/096-paste-twice.c.expected b/src/compiler/glsl/glcpp/tests/096-paste-twice.c.expected new file mode 100644 index 00000000000..96c57d2a6ec --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/096-paste-twice.c.expected @@ -0,0 +1,3 @@ + +justonetoken + diff --git a/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c b/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c new file mode 100644 index 00000000000..0f46835c29e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c @@ -0,0 +1,3 @@ +#define PASTE_MACRO one ## token +PASTE_MACRO + diff --git a/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected b/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected new file mode 100644 index 00000000000..36f66992539 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected @@ -0,0 +1,3 @@ + +onetoken + diff --git a/src/compiler/glsl/glcpp/tests/098-elif-undefined.c b/src/compiler/glsl/glcpp/tests/098-elif-undefined.c new file mode 100644 index 00000000000..1f520d4d432 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/098-elif-undefined.c @@ -0,0 +1,7 @@ +#if 0 +Not this +#elif UNDEFINED_MACRO +Nor this +#else +Yes, this. +#endif diff --git a/src/compiler/glsl/glcpp/tests/098-elif-undefined.c.expected b/src/compiler/glsl/glcpp/tests/098-elif-undefined.c.expected new file mode 100644 index 00000000000..c6ef689ce4b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/098-elif-undefined.c.expected @@ -0,0 +1,7 @@ + + + + + +Yes, this. + diff --git a/src/compiler/glsl/glcpp/tests/099-c99-example.c b/src/compiler/glsl/glcpp/tests/099-c99-example.c new file mode 100644 index 00000000000..d1976b1f265 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/099-c99-example.c @@ -0,0 +1,17 @@ +#define x 3 +#define f(a) f(x * (a)) +#undef x +#define x 2 +#define g f +#define z z[0] +#define h g(~ +#define m(a) a(w) +#define w 0,1 +#define t(a) a +#define p() int +#define q(x) x +#define r(x,y) x ## y +f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); +g(x +(3,4)-w) | h 5) & m + (f)^m(m); +p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,)}; diff --git a/src/compiler/glsl/glcpp/tests/099-c99-example.c.expected b/src/compiler/glsl/glcpp/tests/099-c99-example.c.expected new file mode 100644 index 00000000000..352bbff48f5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/099-c99-example.c.expected @@ -0,0 +1,16 @@ + + + + + + + + + + + + + +f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1); +f(2 * (2 +(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1); +int i[] = { 1, 23, 4, 5, }; diff --git a/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c b/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c new file mode 100644 index 00000000000..31dbb9a9edc --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c @@ -0,0 +1,7 @@ +#define one 1 +#define two 2 + +switch (1) { + case one + two: + break; +} diff --git a/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c.expected b/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c.expected new file mode 100644 index 00000000000..09f1f417bdd --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/100-macro-with-colon.c.expected @@ -0,0 +1,7 @@ + + + +switch (1) { + case 1 + 2: + break; +} diff --git a/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c b/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c new file mode 100644 index 00000000000..e1693805b6a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c @@ -0,0 +1,16 @@ +#define object 1 +#define function(x) 1 + +#if object +once +#endif +#if object +twice +#endif + +#if function(0) +once +#endif +#if function(0) +once again +#endif diff --git a/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c.expected b/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c.expected new file mode 100644 index 00000000000..1e0b30696cb --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/101-macros-used-twice.c.expected @@ -0,0 +1,16 @@ + + + + +once + + +twice + + + +once + + +once again + diff --git a/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c b/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c new file mode 100644 index 00000000000..301779eb948 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c @@ -0,0 +1,2 @@ +#if 0 +#endif garbage diff --git a/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c.expected b/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c.expected new file mode 100644 index 00000000000..d9f3bdc9465 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/102-garbage-after-endif.c.expected @@ -0,0 +1,2 @@ +0:2(8): preprocessor error: syntax error, unexpected IDENTIFIER, expecting NEWLINE + diff --git a/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c b/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c new file mode 100644 index 00000000000..c460feadf57 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c @@ -0,0 +1,3 @@ +#if 0 +#else garbage +#endif diff --git a/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c.expected b/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c.expected new file mode 100644 index 00000000000..b053b399775 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/103-garbage-after-else-0.c.expected @@ -0,0 +1,4 @@ +0:2(7): preprocessor error: syntax error, unexpected IDENTIFIER, expecting NEWLINE +0:1(6): preprocessor error: Unterminated #if + + diff --git a/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c b/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c new file mode 100644 index 00000000000..3fbeec48e4c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c @@ -0,0 +1,2 @@ +#line 2 +int foo(); diff --git a/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected b/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected new file mode 100644 index 00000000000..3fbeec48e4c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected @@ -0,0 +1,2 @@ +#line 2 +int foo(); diff --git a/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c b/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c new file mode 100644 index 00000000000..da156c6a596 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c @@ -0,0 +1,5 @@ +#define X(x) x +#line X( \ + 1 \ + ) +#line 2 diff --git a/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c.expected b/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c.expected new file mode 100644 index 00000000000..814cef1b8c5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/105-multiline-hash-line.c.expected @@ -0,0 +1,5 @@ + +#line 1 + + +#line 2 diff --git a/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c b/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c new file mode 100644 index 00000000000..929e93e782f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c @@ -0,0 +1,6 @@ +#define X(x) x +#if X( \ + 1 \ + ) +int foo(); +#endif diff --git a/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c.expected b/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c.expected new file mode 100644 index 00000000000..1c0cbc970f5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/106-multiline-hash-if.c.expected @@ -0,0 +1,6 @@ + + + + +int foo(); + diff --git a/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c b/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c new file mode 100644 index 00000000000..8c1c67a4d6b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c @@ -0,0 +1,7 @@ +#define X(x) x +#if 0 +#elif X( \ + 1 \ + ) +int foo(); +#endif diff --git a/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c.expected b/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c.expected new file mode 100644 index 00000000000..b0601d7ee42 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/107-multiline-hash-elif.c.expected @@ -0,0 +1,7 @@ + + + + + +int foo(); + diff --git a/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c b/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c new file mode 100644 index 00000000000..0ce36f2eb1b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c @@ -0,0 +1 @@ +#version110 diff --git a/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected b/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected new file mode 100644 index 00000000000..4f4243f947f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected @@ -0,0 +1 @@ +0:1(1): preprocessor error: Illegal non-directive after # diff --git a/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c b/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c new file mode 100644 index 00000000000..f52966a8e80 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c @@ -0,0 +1 @@ +#line2 diff --git a/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected b/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected new file mode 100644 index 00000000000..4f4243f947f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected @@ -0,0 +1 @@ +0:1(1): preprocessor error: Illegal non-directive after # diff --git a/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c b/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c new file mode 100644 index 00000000000..6d7d0f38d9a --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c @@ -0,0 +1,3 @@ +#if 1 +#elif110 +#endif diff --git a/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected b/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected new file mode 100644 index 00000000000..4d93de41dd3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected @@ -0,0 +1,3 @@ +0:2(1): preprocessor error: Illegal non-directive after # + + diff --git a/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c b/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c new file mode 100644 index 00000000000..b3413371ec2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c @@ -0,0 +1,19 @@ +#if(1) +success +#endif + +#if+1 +success +#endif + +#if-1 +success +#endif + +#if!1 +success +#endif + +#if~1 +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected b/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected new file mode 100644 index 00000000000..5c005c393a9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected @@ -0,0 +1,19 @@ + +success + + + +success + + + +success + + + + + + + +success + diff --git a/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c b/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c new file mode 100644 index 00000000000..e8221bc49c3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c @@ -0,0 +1,24 @@ +#if 0 +#elif(1) +success +#endif + +#if 0 +#elif+1 +success +#endif + +#if 0 +#elif-1 +success +#endif + +#if 0 +#elif!1 +success +#endif + +#if 0 +#elif~1 +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected b/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected new file mode 100644 index 00000000000..86b37036b65 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected @@ -0,0 +1,24 @@ + + +success + + + + +success + + + + +success + + + + + + + + + +success + diff --git a/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c b/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c new file mode 100644 index 00000000000..369c4879260 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c @@ -0,0 +1,7 @@ +1. Number of dalmations: __LINE__ __FILE__ __LINE__ +2. Nominal visual acuity: __LINE__ __FILE__ / __LINE__ __FILE__ +3. Battle of Thermopylae, as film: __LINE__ __FILE__ __FILE__ +4. HTTP code for "Not Found": __LINE__ __FILE__ __LINE__ +5. Hexadecimal for 20560: __LINE__ __FILE__ __LINE__ __FILE__ +6: Zip code for Nortonville, KS: __LINE__ __LINE__ __FILE__ __LINE__ __FILE__ +7. James Bond, as a number: __FILE__ __FILE__ __LINE__ diff --git a/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c.expected b/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c.expected new file mode 100644 index 00000000000..55bc788ffd7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/113-line-and-file-macros.c.expected @@ -0,0 +1,7 @@ +1. Number of dalmations: 1 0 1 +2. Nominal visual acuity: 2 0 / 2 0 +3. Battle of Thermopylae, as film: 3 0 0 +4. HTTP code for "Not Found": 4 0 4 +5. Hexadecimal for 20560: 5 0 5 0 +6: Zip code for Nortonville, KS: 6 6 0 6 0 +7. James Bond, as a number: 0 0 7 diff --git a/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c b/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c new file mode 100644 index 00000000000..d80d9c7ef4b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c @@ -0,0 +1,7 @@ +#define PASTE3(a,b,c) a ## b ## c +#define PASTE4(a,b,c,d) a ## b ## c ## d +#define PASTE5(a,b,c,d,e) a ## b ## c ## d ## e +4. HTTP code for "Not Found": PASTE3(__LINE__, __FILE__ , __LINE__) +5. Hexadecimal for 20560: PASTE4(__LINE__, __FILE__, __LINE__, __FILE__) +6: Zip code for Nortonville, KS: PASTE5(__LINE__, __LINE__, __FILE__, __LINE__, __FILE__) +7. James Bond, as a number: PASTE3(__FILE__, __FILE__, __LINE__) diff --git a/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c.expected b/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c.expected new file mode 100644 index 00000000000..aa9711034a6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/114-paste-integer-tokens.c.expected @@ -0,0 +1,7 @@ + + + +4. HTTP code for "Not Found": 404 +5. Hexadecimal for 20560: 5050 +6: Zip code for Nortonville, KS: 66060 +7. James Bond, as a number: 007 diff --git a/src/compiler/glsl/glcpp/tests/115-line-continuations.c b/src/compiler/glsl/glcpp/tests/115-line-continuations.c new file mode 100644 index 00000000000..105590d85ed --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/115-line-continuations.c @@ -0,0 +1,9 @@ +// This comment continues to the next line, hiding the define \ +#define CONTINUATION_UNSUPPORTED + +#ifdef CONTINUATION_UNSUPPORTED +failure +#else +success +#endif + diff --git a/src/compiler/glsl/glcpp/tests/115-line-continuations.c.expected b/src/compiler/glsl/glcpp/tests/115-line-continuations.c.expected new file mode 100644 index 00000000000..428b5e822c5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/115-line-continuations.c.expected @@ -0,0 +1,9 @@ + + + + + + +success + + diff --git a/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c b/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c new file mode 100644 index 00000000000..83d5ddf681b --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c @@ -0,0 +1,13 @@ +// glcpp-args: --disable-line-continuations + +// This comments ends with a backslash \\ +#define NO_CONTINUATION + +#ifdef NO_CONTINUATION +success +#else +failure +#endif + + + diff --git a/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c.expected b/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c.expected new file mode 100644 index 00000000000..5ca78928a61 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/116-disable-line-continuations.c.expected @@ -0,0 +1,13 @@ + + + + + + +success + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c b/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c new file mode 100644 index 00000000000..6a6f2829883 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c @@ -0,0 +1,12 @@ +/* This test case is the minimal case to replicate the bug reported here: + * + * https://bugs.freedesktop.org/show_bug.cgi?id=65112 + * + * To trigger the bug, there must be a line-continuation sequence + * (backslash newline), then an additional newline character, and + * finally another backslash that is not part of a line-continuation + * sequence. + */ +\ + +/* \ */ diff --git a/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected b/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected new file mode 100644 index 00000000000..8aaa04d28e4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c b/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c new file mode 100644 index 00000000000..53e80394ab6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c @@ -0,0 +1,4 @@ +#define FOO first/* +*/second + +FOO diff --git a/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c.expected b/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c.expected new file mode 100644 index 00000000000..1fa8135cb13 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/118-comment-becomes-space.c.expected @@ -0,0 +1,4 @@ + + + +first second diff --git a/src/compiler/glsl/glcpp/tests/119-elif-after-else.c b/src/compiler/glsl/glcpp/tests/119-elif-after-else.c new file mode 100644 index 00000000000..9b9e9233bcb --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/119-elif-after-else.c @@ -0,0 +1,6 @@ +#if 0 +#else +int foo; +#elif 0 +int bar; +#endif diff --git a/src/compiler/glsl/glcpp/tests/119-elif-after-else.c.expected b/src/compiler/glsl/glcpp/tests/119-elif-after-else.c.expected new file mode 100644 index 00000000000..636956799cd --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/119-elif-after-else.c.expected @@ -0,0 +1,7 @@ +0:4(1): preprocessor error: #elif after #else + + +int foo; + +int bar; + diff --git a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c new file mode 100644 index 00000000000..49e7696613e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c @@ -0,0 +1,3 @@ +#undef __LINE__ +#undef __FILE__ +#undef __VERSION__ diff --git a/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected new file mode 100644 index 00000000000..3b736df378e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/120-undef-builtin.c.expected @@ -0,0 +1,6 @@ +0:1(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. +0:2(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. +0:3(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. + + + diff --git a/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c b/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c new file mode 100644 index 00000000000..67ebe73e5cf --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c @@ -0,0 +1,2 @@ +/* + */ // diff --git a/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c.expected b/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c.expected new file mode 100644 index 00000000000..8cb7cb9891f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/121-comment-bug-72686.c.expected @@ -0,0 +1,2 @@ + + diff --git a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c new file mode 100644 index 00000000000..ae7ea09f67e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c @@ -0,0 +1,16 @@ +/* Original definitions. */ +#define TWO ( 1+1 ) +#define FOUR (2 + 2) +#define SIX (3 + 3) + +/* Redefinitions with whitespace in same places, but different amounts, (so no + * error). */ +#define TWO ( 1+1 ) +#define FOUR (2 + 2) +#define SIX (3/*comment is whitespace*/+ /* collapsed */ /* to */ /* one */ /* space */ 3) + +/* Redefinitions with whitespace in different places. Each of these should + * trigger an error. */ +#define TWO (1 + 1) +#define FOUR ( 2+2 ) +#define SIX (/*not*/3 + 3/*expected*/) diff --git a/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected new file mode 100644 index 00000000000..602bdef94c2 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/122-redefine-whitespace.c.expected @@ -0,0 +1,22 @@ +0:14(9): preprocessor error: Redefinition of macro TWO + +0:15(9): preprocessor error: Redefinition of macro FOUR + +0:16(9): preprocessor error: Redefinition of macro SIX + + + + + + + + + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c b/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c new file mode 100644 index 00000000000..0b341a381f8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c @@ -0,0 +1,3 @@ +#if 1 +#else garbage +#endif diff --git a/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c.expected b/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c.expected new file mode 100644 index 00000000000..b053b399775 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/123-garbage-after-else-1.c.expected @@ -0,0 +1,4 @@ +0:2(7): preprocessor error: syntax error, unexpected IDENTIFIER, expecting NEWLINE +0:1(6): preprocessor error: Unterminated #if + + diff --git a/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c b/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c new file mode 100644 index 00000000000..947ba1885ec --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c @@ -0,0 +1,37 @@ +#define e THIS_SHOULD_NOT_BE_EXPANDED +#define E NOR_THIS +#define p NOT_THIS_EITHER +#define P AND_SURELY_NOT_THIS +#define OK CRAZY_BUT_TRUE_THIS_NEITHER + +/* This one is actually meant to be expanded */ +#define MUST_EXPAND GO + +/* The following are "preprocessing numbers" and should not trigger macro + * expansion. */ +1e +1OK + +/* These are also "preprocessing numbers", so no expansion */ +123e+OK +.23E+OK +1.3e-OK +12.E-OK +123p+OK +.23P+OK +1.3p-OK +12.P-OK +123..OK +.23.OK.OK + +/* Importantly, just before the MUST_EXPAND in each of these, the preceding + * "preprocessing number" ends and we have an actual expression. So the + * MUST_EXPAND macro must be expanded (who would have though?) in each case. */ +123ef+MUST_EXPAND +.23E3-MUST_EXPAND +1.3e--MUST_EXPAND +12.E-&MUST_EXPAND +123p+OK+MUST_EXPAND +.23P+OK;MUST_EXPAND +1.3p-OK-MUST_EXPAND +12.P-OK&MUST_EXPAND diff --git a/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c.expected b/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c.expected new file mode 100644 index 00000000000..6ec588862d6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/124-preprocessing-numbers.c.expected @@ -0,0 +1,37 @@ + + + + + + + + + + + +1e +1OK + + +123e+OK +.23E+OK +1.3e-OK +12.E-OK +123p+OK +.23P+OK +1.3p-OK +12.P-OK +123..OK +.23.OK.OK + + + + +123ef+GO +.23E3-GO +1.3e--GO +12.E-&GO +123p+OK+GO +.23P+OK;GO +1.3p-OK-GO +12.P-OK&GO diff --git a/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c b/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c new file mode 100644 index 00000000000..4ee29f6d93f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c @@ -0,0 +1,27 @@ +/* For GLSL in OpenGL ES, an undefined macro appearing in an #if or #elif + * expression, (other than as an argument to defined) is an error. + * + * Except in the case of a short-circuiting && or || operator, where the + * specification explicitly mandates that there be no error. + */ +#version 300 es + +/* These yield errors */ +#if NOT_DEFINED +#endif + +#if 0 +#elif ALSO_NOT_DEFINED +#endif + +/* But these yield no errors */ +#if 1 || STILL_NOT_DEFINED +Success +#endif + +#if 0 +#elif 0 && WILL_ANYONE_DEFINE_ANYTHING +#else +More success +#endif + diff --git a/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected b/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected new file mode 100644 index 00000000000..616aa912e2f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected @@ -0,0 +1,29 @@ +0:10(16): preprocessor error: undefined macro NOT_DEFINED in expression (illegal in GLES) +0:14(23): preprocessor error: undefined macro ALSO_NOT_DEFINED in expression (illegal in GLES) + + + + + + +#version 300 es + + + + + + + + + + + +Success + + + + + +More success + + diff --git a/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c b/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c new file mode 100644 index 00000000000..4c0d29000a3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c @@ -0,0 +1,5 @@ +#ifdef MACRO garbage +#endif + +#ifndef MORE garbage +#endif diff --git a/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c.expected b/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c.expected new file mode 100644 index 00000000000..82a06f8a3b3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/126-garbage-after-directive.c.expected @@ -0,0 +1,7 @@ +0:1(14): preprocessor error: extra tokens at end of directive +0:4(14): preprocessor error: extra tokens at end of directive + + + + + diff --git a/src/compiler/glsl/glcpp/tests/127-pragma-empty.c b/src/compiler/glsl/glcpp/tests/127-pragma-empty.c new file mode 100644 index 00000000000..0f9b0b3d38f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/127-pragma-empty.c @@ -0,0 +1,3 @@ +/* It seems an odd (and particularly useless) thing to have an empty pragma, + * but we probably shouldn't trigger an error in this case. */ +#pragma diff --git a/src/compiler/glsl/glcpp/tests/127-pragma-empty.c.expected b/src/compiler/glsl/glcpp/tests/127-pragma-empty.c.expected new file mode 100644 index 00000000000..92371a07c32 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/127-pragma-empty.c.expected @@ -0,0 +1,3 @@ + + + diff --git a/src/compiler/glsl/glcpp/tests/128-space-before-hash.c b/src/compiler/glsl/glcpp/tests/128-space-before-hash.c new file mode 100644 index 00000000000..fba9596baf8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/128-space-before-hash.c @@ -0,0 +1,21 @@ + /* Any directive can be preceded by a space. */ + #version 300 + #pragma Testing spaces before hash + # + #line 3 + #define FOO + #ifdef FOO + yes + #endif + #if 0 + #elif defined FOO + yes again + #endif + #if 0 + #else + for the third time, yes! + #endif + #undef FOO + #ifndef FOO + yes, of course + #endif diff --git a/src/compiler/glsl/glcpp/tests/128-space-before-hash.c.expected b/src/compiler/glsl/glcpp/tests/128-space-before-hash.c.expected new file mode 100644 index 00000000000..9babb6fb078 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/128-space-before-hash.c.expected @@ -0,0 +1,21 @@ + +#version 300 +#pragma Testing spaces before hash + +#line 3 + + + yes + + + + yes again + + + + for the third time, yes! + + + + yes, of course + diff --git a/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c b/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c new file mode 100644 index 00000000000..a229179f18c --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c @@ -0,0 +1 @@ +#define 123 456 diff --git a/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c.expected b/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c.expected new file mode 100644 index 00000000000..fd0b41347fa --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/129-define-non-identifier.c.expected @@ -0,0 +1,2 @@ +0:1(9): preprocessor error: #define followed by a non-identifier: 123 +0:1(9): preprocessor error: syntax error, unexpected INTEGER_STRING, expecting FUNC_IDENTIFIER or OBJ_IDENTIFIER diff --git a/src/compiler/glsl/glcpp/tests/130-define-comment.c b/src/compiler/glsl/glcpp/tests/130-define-comment.c new file mode 100644 index 00000000000..33312362cc7 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/130-define-comment.c @@ -0,0 +1,2 @@ +#define /*...*/ FUNC( /*...*/ x /*...*/ ) /*...*/ FOO( /*...*/ x /*...*/ ) +FUNC(bar) diff --git a/src/compiler/glsl/glcpp/tests/130-define-comment.c.expected b/src/compiler/glsl/glcpp/tests/130-define-comment.c.expected new file mode 100644 index 00000000000..d789e29d5a8 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/130-define-comment.c.expected @@ -0,0 +1,2 @@ + +FOO( bar ) diff --git a/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c b/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c new file mode 100644 index 00000000000..240292dad01 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c @@ -0,0 +1 @@ +this file ends with no newline \ No newline at end of file diff --git a/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c.expected b/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c.expected new file mode 100644 index 00000000000..57800306322 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/131-eof-without-newline.c.expected @@ -0,0 +1 @@ +this file ends with no newline diff --git a/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c b/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c new file mode 100644 index 00000000000..6795e35ea00 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c @@ -0,0 +1 @@ +#define \ No newline at end of file diff --git a/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c.expected b/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c.expected new file mode 100644 index 00000000000..341e5e2aa67 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/132-eof-without-newline-define.c.expected @@ -0,0 +1 @@ +0:1(1): preprocessor error: #define without macro name diff --git a/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c b/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c new file mode 100644 index 00000000000..56ec5f722c0 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c @@ -0,0 +1 @@ +This file ends with no newline within a comment /* \ No newline at end of file diff --git a/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected b/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected new file mode 100644 index 00000000000..d186f48761f --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected @@ -0,0 +1,2 @@ +0:1(51): preprocessor error: Unterminated comment +This file ends with no newline within a comment diff --git a/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c b/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c new file mode 100644 index 00000000000..3015f0e886e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c @@ -0,0 +1,22 @@ +/*...*/ # /*...*/ version 300 + /*...*/#/*...*/ extension whatever + /*..*/ # /*..*/ pragma ignored +/**/ # /**/ line 4 + /*...*/# /*...*/ ifdef NOT_DEFINED + /*...*/# /*...*/ else + /*..*/ #/*..*/ endif + /*...*/# /*...*/ ifndef ALSO_NOT_DEFINED + /*...*/# /*...*/ else + /*..*/ #/*..*/ endif +/*...*/ # /*...*/ if 0 + /*...*/#/*...*/ elif 1 + /*..*/ # /*..*/ else + /**/ # /**/ endif + /*...*/# /*...*/ define FOO bar + /*..*/ #/*..*/ define FUNC() baz + /*..*/ # /*..*/ define FUNC2(a,b) b a +FOO +FUNC() +FUNC2(x,y) + + diff --git a/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c.expected b/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c.expected new file mode 100644 index 00000000000..760c960cb62 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/134-hash-comment-directive.c.expected @@ -0,0 +1,22 @@ +#version 300 +#extension whatever +#pragma ignored +#line 4 + + + + + + + + + + + + + +bar +baz +y x + + diff --git a/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c b/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c new file mode 100644 index 00000000000..fd96bd64c74 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c @@ -0,0 +1,2 @@ +#define FOO(a,a) which a? +#define BAR(x,y,z,x) so very x diff --git a/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c.expected b/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c.expected new file mode 100644 index 00000000000..bc1a334ed29 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/135-duplicate-parameter.c.expected @@ -0,0 +1,4 @@ +0:1(9): preprocessor error: Duplicate macro parameter "a" +0:2(9): preprocessor error: Duplicate macro parameter "x" + + diff --git a/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c b/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c new file mode 100644 index 00000000000..167d3c8a3cf --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c @@ -0,0 +1,8 @@ +/* The body can include C expressions with ++ and -- */ +a = x++; +b = ++x; +c = x--; +d = --x; +/* But these are not legal in preprocessor expressions. */ +#if x++ > 4 +#endif diff --git a/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected b/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected new file mode 100644 index 00000000000..137921b1695 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected @@ -0,0 +1,8 @@ +0:7(12): preprocessor error: syntax error, unexpected PLUS_PLUS + +a = x++; +b = ++x; +c = x--; +d = --x; + + diff --git a/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c b/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c new file mode 100644 index 00000000000..c8cd47fb57e --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c @@ -0,0 +1,4 @@ +#define FIELD(x) foo.x +#define FIELD_OF(s, x) s.x +FIELD(bar) +FIELD_OF(foo, bar) diff --git a/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c.expected b/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c.expected new file mode 100644 index 00000000000..f9f5be13e01 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/137-expand-macro-after-period.c.expected @@ -0,0 +1,4 @@ + + +foo.bar +foo.bar diff --git a/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c b/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c new file mode 100644 index 00000000000..38967dc57d1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c @@ -0,0 +1,7 @@ +#if 0 +/* + * This multi-line comment needs to be 3 lines to test what's intended. + */ +#else +SUCCESS +#endif diff --git a/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected b/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected new file mode 100644 index 00000000000..0d6ef4d9ad4 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected @@ -0,0 +1,7 @@ + + + + + +SUCCESS + diff --git a/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c b/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c new file mode 100644 index 00000000000..30e128db4c5 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c @@ -0,0 +1,5 @@ +#define +#define +#define /*...*/ +#define //... +Errors expected because no macro name is ever given! diff --git a/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c.expected b/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c.expected new file mode 100644 index 00000000000..42b02d1a8a1 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/139-define-without-macro-name.c.expected @@ -0,0 +1,5 @@ +0:1(1): preprocessor error: #define without macro name +0:2(1): preprocessor error: #define without macro name +0:3(1): preprocessor error: #define without macro name +0:4(1): preprocessor error: #define without macro name +Errors expected because no macro name is ever given! diff --git a/src/compiler/glsl/glcpp/tests/140-null-directive.c b/src/compiler/glsl/glcpp/tests/140-null-directive.c new file mode 100644 index 00000000000..1dcb26ef8b9 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/140-null-directive.c @@ -0,0 +1,9 @@ +/* GLSL accepts a null directive. Let's test that in several variations: */ +# + # +/*....*/#/*....*/ + /*..*/ # /*..*/ +#//... + # //... +/*....*/#/**///.. + /*..*/ # /**/ // diff --git a/src/compiler/glsl/glcpp/tests/140-null-directive.c.expected b/src/compiler/glsl/glcpp/tests/140-null-directive.c.expected new file mode 100644 index 00000000000..fa103f60e82 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/140-null-directive.c.expected @@ -0,0 +1,9 @@ + + + + + + + + + diff --git a/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c b/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c new file mode 100644 index 00000000000..a93f3ce35fd --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c @@ -0,0 +1,6 @@ +Line 1 /* Test for a bug where #pragma was throwing off the __LINE__ count. */ +Line __LINE__ /* Line 2 */ +#pragma Line 3 +Line __LINE__ /* Line 4 */ +#pragma Line 5 +Line __LINE__ /* Line 6 */ diff --git a/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected b/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected new file mode 100644 index 00000000000..330731dc800 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected @@ -0,0 +1,6 @@ +Line 1 +Line 2 +#pragma Line 3 +Line 4 +#pragma Line 5 +Line 6 diff --git a/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c b/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c new file mode 100644 index 00000000000..b60c04232a6 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c @@ -0,0 +1,94 @@ +/* Macro using defined with a hard-coded identifier (no parentheses) */ +#define is_foo_defined defined /*...*/ foo +#undef foo +#if is_foo_defined +failure +#else +success +#endif +#define foo +#if is_foo_defined +success +#else +failure +#endif + +/* Macro using defined with a hard-coded identifier within parentheses */ +#define is_foo_defined_parens defined /*...*/ ( /*...*/ foo /*...*/ ) // +#define foo +#if is_foo_defined_parens +success +#else +failure +#endif +#undef foo +#if is_foo_defined_parens +failure +#else +success +#endif + +/* Macro using defined with an argument identifier (no parentheses) */ +#define is_defined(arg) defined /*...*/ arg +#define foo bar +#undef bar +#if is_defined(foo) +failure +#else +success +#endif +#define bar bar +#if is_defined(foo) +success +#else +failure +#endif + +/* Macro using defined with an argument identifier within parentheses */ +#define is_defined_parens(arg) defined /*...*/ ( /*...*/ arg /*...*/ ) // +#define foo bar +#define bar bar +#if is_defined_parens(foo) +success +#else +failure +#endif +#undef bar +#if is_defined_parens(foo) +failure +#else +success +#endif + +/* Multiple levels of macro resulting in defined */ +#define X defined A && Y +#define Y defined B && Z +#define Z defined C +#define A +#define B +#define C +#if X +success +#else +failure +#endif +#undef A +#if X +failure +#else +success +#endif +#define A +#undef B +#if X +failure +#else +success +#endif +#define B +#undef C +#if X +failure +#else +success +#endif diff --git a/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c.expected b/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c.expected new file mode 100644 index 00000000000..4eca90bc3db --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/142-defined-within-macro.c.expected @@ -0,0 +1,94 @@ + + + + + + +success + + + +success + + + + + + + + +success + + + + + + + +success + + + + + + + + + +success + + + +success + + + + + + + + + +success + + + + + + + +success + + + + + + + + + + +success + + + + + + + +success + + + + + + +success + + + + + + +success + diff --git a/src/compiler/glsl/glcpp/tests/143-multiple-else.c b/src/compiler/glsl/glcpp/tests/143-multiple-else.c new file mode 100644 index 00000000000..62ad49cf7bb --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/143-multiple-else.c @@ -0,0 +1,6 @@ +#if 0 +#else +int foo; +#else +int bar; +#endif diff --git a/src/compiler/glsl/glcpp/tests/143-multiple-else.c.expected b/src/compiler/glsl/glcpp/tests/143-multiple-else.c.expected new file mode 100644 index 00000000000..00b3328c835 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/143-multiple-else.c.expected @@ -0,0 +1,7 @@ +0:4(1): preprocessor error: multiple #else + + +int foo; + +int bar; + diff --git a/src/compiler/glsl/glcpp/tests/glcpp-test b/src/compiler/glsl/glcpp/tests/glcpp-test new file mode 100755 index 00000000000..3945ee4f6ce --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/glcpp-test @@ -0,0 +1,110 @@ +#!/bin/sh + +if [ ! -z "$srcdir" ]; then + testdir=$srcdir/glsl/glcpp/tests + outdir=`pwd`/glsl/glcpp/tests + glcpp=`pwd`/glsl/glcpp/glcpp +else + testdir=. + outdir=. + glcpp=../glcpp +fi + +trap 'rm $test.valgrind-errors; exit 1' INT QUIT + +usage () +{ + cat < Use tests in the given (default is ".") + --valgrind Run the test suite a second time under valgrind +EOF +} + +test_specific_args () +{ + test="$1" + + tr "\r" "\n" < "$test" | grep 'glcpp-args:' | sed -e 's,^.*glcpp-args: *,,' +} + +# Parse command-line options +for option; do + case "${option}" in + "--help") + usage + exit 0 + ;; + "--valgrind") + do_valgrind=yes + ;; + "--testdir="*) + testdir="${option#--testdir=}" + outdir="${outdir}/${option#--testdir=}" + ;; + *) + echo "Unrecognized option: $option" >&2 + echo >&2 + usage + exit 1 + ;; + esac +done + +total=0 +pass=0 +clean=0 + +mkdir -p $outdir + +echo "====== Testing for correctness ======" +for test in $testdir/*.c; do + out=$outdir/${test##*/}.out + + printf "Testing $test... > $out ($test.expected) " + $glcpp $(test_specific_args $test) < $test > $out 2>&1 + total=$((total+1)) + if cmp $test.expected $out >/dev/null 2>&1; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + diff -u $test.expected $out + fi +done + +echo "" +echo "$pass/$total tests returned correct results" +echo "" + +if [ "$do_valgrind" = "yes" ]; then + echo "====== Testing for valgrind cleanliness ======" + for test in $testdir/*.c; do + printf "Testing $test with valgrind..." + valgrind --error-exitcode=31 --log-file=$test.valgrind-errors $glcpp $(test_specific_args $test) < $test >/dev/null 2>&1 + if [ "$?" = "31" ]; then + echo "ERRORS" + cat $test.valgrind-errors + else + echo "CLEAN" + clean=$((clean+1)) + rm $test.valgrind-errors + fi + done + + echo "" + echo "$pass/$total tests returned correct results" + echo "$clean/$total tests are valgrind-clean" +fi + +if [ "$pass" = "$total" ] && [ "$do_valgrind" != "yes" ] || [ "$pass" = "$total" ]; then + exit 0 +else + exit 1 +fi + diff --git a/src/compiler/glsl/glcpp/tests/glcpp-test-cr-lf b/src/compiler/glsl/glcpp/tests/glcpp-test-cr-lf new file mode 100755 index 00000000000..c75370f48c3 --- /dev/null +++ b/src/compiler/glsl/glcpp/tests/glcpp-test-cr-lf @@ -0,0 +1,141 @@ +#!/bin/sh + +# The build system runs this test from a different working directory, and may +# be in a build directory entirely separate from the source. So if the +# "srcdir" variable is set, we must use it to locate the test files and the +# glcpp-test script. + +if [ ! -z "$srcdir" ]; then + testdir="$srcdir/glsl/glcpp/tests" + glcpp_test="$srcdir/glsl/glcpp/tests/glcpp-test" +else + testdir=. + glcpp_test=./glcpp-test +fi + +total=0 +pass=0 + +# This supports a pipe that doesn't destroy the exit status of first command +# +# http://unix.stackexchange.com/questions/14270/get-exit-status-of-process-thats-piped-to-another +stdintoexitstatus() { + read exitstatus + return $exitstatus +} + +run_test () +{ + cmd="$1" + + total=$((total+1)) + + if [ "$VERBOSE" = "yes" ]; then + if $cmd; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + fi + else + # This is "$cmd | tail -2" but with the exit status of "$cmd" not "tail -2" + if (((($cmd; echo $? >&3) | tail -2 | head -1 >&4) 3>&1) | stdintoexitstatus) 4>&1; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + fi + fi +} + +usage () +{ + cat <&2 + echo >&2 + usage + exit 1 + ;; + esac +done + +# All tests depend on the .out files being present. So first do a +# normal run of the test suite, (silently) just to create the .out +# files as a side effect. +rm -rf ./subtest-lf +mkdir subtest-lf +for file in "$testdir"/*.c; do + base=$(basename "$file") + cp "$file" subtest-lf +done + +${glcpp_test} --testdir=subtest-lf >/dev/null 2>&1 + +echo "===== Testing with \\\\r line terminators (old Mac format) =====" + +# Prepare test files with '\r' instead of '\n' +rm -rf ./subtest-cr +mkdir subtest-cr +for file in "$testdir"/*.c; do + base=$(basename "$file") + tr "\n" "\r" < "$file" > subtest-cr/"$base" + cp `pwd`/glsl/glcpp/tests/subtest-lf/"$base".out subtest-cr/"$base".expected +done + +run_test "${glcpp_test} --testdir=subtest-cr" + +echo "===== Testing with \\\\r\\\\n line terminators (DOS format) =====" + +# Prepare test files with '\r\n' instead of '\n' +rm -rf ./subtest-cr-lf +mkdir subtest-cr-lf +for file in "$testdir"/*.c; do + base=$(basename "$file") + sed -e 's/$/ /' < "$file" > subtest-cr-lf/"$base" + cp `pwd`/glsl/glcpp/tests/subtest-lf/"$base".out subtest-cr-lf/"$base".expected +done + +run_test "${glcpp_test} --testdir=subtest-cr-lf" + +echo "===== Testing with \\\\n\\\\r (bizarre, but allowed by GLSL spec.) =====" + +# Prepare test files with '\n\r' instead of '\n' +rm -rf ./subtest-lf-cr +mkdir subtest-lf-cr +for file in "$testdir"/*.c; do + base=$(basename "$file") + sed -e 's/$/ /' < "$file" | tr "\n\r" "\r\n" > subtest-lf-cr/"$base" + cp `pwd`/glsl/glcpp/tests/subtest-lf/"$base".out subtest-lf-cr/"$base".expected +done + +run_test "${glcpp_test} --testdir=subtest-lf-cr" + +echo "" +echo "$pass/$total tests returned correct results" +echo "" + +if [ "$pass" = "$total" ]; then + exit 0 +else + exit 1 +fi diff --git a/src/compiler/glsl/glsl_lexer.ll b/src/compiler/glsl/glsl_lexer.ll new file mode 100644 index 00000000000..e59f93e10ef --- /dev/null +++ b/src/compiler/glsl/glsl_lexer.ll @@ -0,0 +1,635 @@ +%{ +/* + * Copyright © 2008, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include "util/strtod.h" +#include "ast.h" +#include "glsl_parser_extras.h" +#include "glsl_parser.h" + +static int classify_identifier(struct _mesa_glsl_parse_state *, const char *); + +#ifdef _MSC_VER +#define YY_NO_UNISTD_H +#endif + +#define YY_USER_ACTION \ + do { \ + yylloc->first_column = yycolumn + 1; \ + yylloc->first_line = yylloc->last_line = yylineno + 1; \ + yycolumn += yyleng; \ + yylloc->last_column = yycolumn + 1; \ + } while(0); + +#define YY_USER_INIT yylineno = 0; yycolumn = 0; yylloc->source = 0; + +/* A macro for handling reserved words and keywords across language versions. + * + * Certain words start out as identifiers, become reserved words in + * later language revisions, and finally become language keywords. + * This may happen at different times in desktop GLSL and GLSL ES. + * + * For example, consider the following lexer rule: + * samplerBuffer KEYWORD(130, 0, 140, 0, SAMPLERBUFFER) + * + * This means that "samplerBuffer" will be treated as: + * - a keyword (SAMPLERBUFFER token) ...in GLSL >= 1.40 + * - a reserved word - error ...in GLSL >= 1.30 + * - an identifier ...in GLSL < 1.30 or GLSL ES + */ +#define KEYWORD(reserved_glsl, reserved_glsl_es, \ + allowed_glsl, allowed_glsl_es, token) \ + KEYWORD_WITH_ALT(reserved_glsl, reserved_glsl_es, \ + allowed_glsl, allowed_glsl_es, false, token) + +/** + * Like the KEYWORD macro, but the word is also treated as a keyword + * if the given boolean expression is true. + */ +#define KEYWORD_WITH_ALT(reserved_glsl, reserved_glsl_es, \ + allowed_glsl, allowed_glsl_es, \ + alt_expr, token) \ + do { \ + if (yyextra->is_version(allowed_glsl, allowed_glsl_es) \ + || (alt_expr)) { \ + return token; \ + } else if (yyextra->is_version(reserved_glsl, \ + reserved_glsl_es)) { \ + _mesa_glsl_error(yylloc, yyextra, \ + "illegal use of reserved word `%s'", yytext); \ + return ERROR_TOK; \ + } else { \ + void *mem_ctx = yyextra; \ + yylval->identifier = ralloc_strdup(mem_ctx, yytext); \ + return classify_identifier(yyextra, yytext); \ + } \ + } while (0) + +/** + * A macro for handling keywords that have been present in GLSL since + * its origin, but were changed into reserved words in GLSL 3.00 ES. + */ +#define DEPRECATED_ES_KEYWORD(token) \ + do { \ + if (yyextra->is_version(0, 300)) { \ + _mesa_glsl_error(yylloc, yyextra, \ + "illegal use of reserved word `%s'", yytext); \ + return ERROR_TOK; \ + } else { \ + return token; \ + } \ + } while (0) + +static int +literal_integer(char *text, int len, struct _mesa_glsl_parse_state *state, + YYSTYPE *lval, YYLTYPE *lloc, int base) +{ + bool is_uint = (text[len - 1] == 'u' || + text[len - 1] == 'U'); + const char *digits = text; + + /* Skip "0x" */ + if (base == 16) + digits += 2; + +#ifdef _MSC_VER + unsigned __int64 value = _strtoui64(digits, NULL, base); +#else + unsigned long long value = strtoull(digits, NULL, base); +#endif + + lval->n = (int)value; + + if (value > UINT_MAX) { + /* Note that signed 0xffffffff is valid, not out of range! */ + if (state->is_version(130, 300)) { + _mesa_glsl_error(lloc, state, + "literal value `%s' out of range", text); + } else { + _mesa_glsl_warning(lloc, state, + "literal value `%s' out of range", text); + } + } else if (base == 10 && !is_uint && (unsigned)value > (unsigned)INT_MAX + 1) { + /* Tries to catch unintentionally providing a negative value. + * Note that -2147483648 is parsed as -(2147483648), so we don't + * want to warn for INT_MAX. + */ + _mesa_glsl_warning(lloc, state, + "signed literal value `%s' is interpreted as %d", + text, lval->n); + } + return is_uint ? UINTCONSTANT : INTCONSTANT; +} + +#define LITERAL_INTEGER(base) \ + literal_integer(yytext, yyleng, yyextra, yylval, yylloc, base) + +%} + +%option bison-bridge bison-locations reentrant noyywrap +%option nounput noyy_top_state +%option never-interactive +%option prefix="_mesa_glsl_lexer_" +%option extra-type="struct _mesa_glsl_parse_state *" +%option warn nodefault + + /* Note: When adding any start conditions to this list, you must also + * update the "Internal compiler error" catch-all rule near the end of + * this file. */ +%x PP PRAGMA + +DEC_INT [1-9][0-9]* +HEX_INT 0[xX][0-9a-fA-F]+ +OCT_INT 0[0-7]* +INT ({DEC_INT}|{HEX_INT}|{OCT_INT}) +SPC [ \t]* +SPCP [ \t]+ +HASH ^{SPC}#{SPC} +%% + +[ \r\t]+ ; + + /* Preprocessor tokens. */ +^[ \t]*#[ \t]*$ ; +^[ \t]*#[ \t]*version { BEGIN PP; return VERSION_TOK; } +^[ \t]*#[ \t]*extension { BEGIN PP; return EXTENSION; } +{HASH}line{SPCP}{INT}{SPCP}{INT}{SPC}$ { + /* Eat characters until the first digit is + * encountered + */ + char *ptr = yytext; + while (!isdigit(*ptr)) + ptr++; + + /* Subtract one from the line number because + * yylineno is zero-based instead of + * one-based. + */ + yylineno = strtol(ptr, &ptr, 0) - 1; + + /* From GLSL 3.30 and GLSL ES on, after processing the + * line directive (including its new-line), the implementation + * will behave as if it is compiling at the line number passed + * as argument. It was line number + 1 in older specifications. + */ + if (yyextra->is_version(330, 100)) + yylineno--; + + yylloc->source = strtol(ptr, NULL, 0); + } +{HASH}line{SPCP}{INT}{SPC}$ { + /* Eat characters until the first digit is + * encountered + */ + char *ptr = yytext; + while (!isdigit(*ptr)) + ptr++; + + /* Subtract one from the line number because + * yylineno is zero-based instead of + * one-based. + */ + yylineno = strtol(ptr, &ptr, 0) - 1; + + /* From GLSL 3.30 and GLSL ES on, after processing the + * line directive (including its new-line), the implementation + * will behave as if it is compiling at the line number passed + * as argument. It was line number + 1 in older specifications. + */ + if (yyextra->is_version(330, 100)) + yylineno--; + } +^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}on{SPC}\) { + BEGIN PP; + return PRAGMA_DEBUG_ON; + } +^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}off{SPC}\) { + BEGIN PP; + return PRAGMA_DEBUG_OFF; + } +^{SPC}#{SPC}pragma{SPCP}optimize{SPC}\({SPC}on{SPC}\) { + BEGIN PP; + return PRAGMA_OPTIMIZE_ON; + } +^{SPC}#{SPC}pragma{SPCP}optimize{SPC}\({SPC}off{SPC}\) { + BEGIN PP; + return PRAGMA_OPTIMIZE_OFF; + } +^{SPC}#{SPC}pragma{SPCP}STDGL{SPCP}invariant{SPC}\({SPC}all{SPC}\) { + BEGIN PP; + return PRAGMA_INVARIANT_ALL; + } +^{SPC}#{SPC}pragma{SPCP} { BEGIN PRAGMA; } + +\n { BEGIN 0; yylineno++; yycolumn = 0; } +. { } + +\/\/[^\n]* { } +[ \t\r]* { } +: return COLON; +[_a-zA-Z][_a-zA-Z0-9]* { + void *mem_ctx = yyextra; + yylval->identifier = ralloc_strdup(mem_ctx, yytext); + return IDENTIFIER; + } +[1-9][0-9]* { + yylval->n = strtol(yytext, NULL, 10); + return INTCONSTANT; + } +\n { BEGIN 0; yylineno++; yycolumn = 0; return EOL; } +. { return yytext[0]; } + +\n { yylineno++; yycolumn = 0; } + +attribute DEPRECATED_ES_KEYWORD(ATTRIBUTE); +const return CONST_TOK; +bool return BOOL_TOK; +float return FLOAT_TOK; +int return INT_TOK; +uint KEYWORD(130, 300, 130, 300, UINT_TOK); + +break return BREAK; +continue return CONTINUE; +do return DO; +while return WHILE; +else return ELSE; +for return FOR; +if return IF; +discard return DISCARD; +return return RETURN; + +bvec2 return BVEC2; +bvec3 return BVEC3; +bvec4 return BVEC4; +ivec2 return IVEC2; +ivec3 return IVEC3; +ivec4 return IVEC4; +uvec2 KEYWORD(130, 300, 130, 300, UVEC2); +uvec3 KEYWORD(130, 300, 130, 300, UVEC3); +uvec4 KEYWORD(130, 300, 130, 300, UVEC4); +vec2 return VEC2; +vec3 return VEC3; +vec4 return VEC4; +mat2 return MAT2X2; +mat3 return MAT3X3; +mat4 return MAT4X4; +mat2x2 KEYWORD(120, 300, 120, 300, MAT2X2); +mat2x3 KEYWORD(120, 300, 120, 300, MAT2X3); +mat2x4 KEYWORD(120, 300, 120, 300, MAT2X4); +mat3x2 KEYWORD(120, 300, 120, 300, MAT3X2); +mat3x3 KEYWORD(120, 300, 120, 300, MAT3X3); +mat3x4 KEYWORD(120, 300, 120, 300, MAT3X4); +mat4x2 KEYWORD(120, 300, 120, 300, MAT4X2); +mat4x3 KEYWORD(120, 300, 120, 300, MAT4X3); +mat4x4 KEYWORD(120, 300, 120, 300, MAT4X4); + +in return IN_TOK; +out return OUT_TOK; +inout return INOUT_TOK; +uniform return UNIFORM; +buffer return BUFFER; +varying DEPRECATED_ES_KEYWORD(VARYING); +centroid KEYWORD(120, 300, 120, 300, CENTROID); +invariant KEYWORD(120, 100, 120, 100, INVARIANT); +flat KEYWORD(130, 100, 130, 300, FLAT); +smooth KEYWORD(130, 300, 130, 300, SMOOTH); +noperspective KEYWORD(130, 300, 130, 0, NOPERSPECTIVE); +patch KEYWORD_WITH_ALT(0, 300, 400, 0, yyextra->ARB_tessellation_shader_enable, PATCH); + +sampler1D DEPRECATED_ES_KEYWORD(SAMPLER1D); +sampler2D return SAMPLER2D; +sampler3D return SAMPLER3D; +samplerCube return SAMPLERCUBE; +sampler1DArray KEYWORD(130, 300, 130, 0, SAMPLER1DARRAY); +sampler2DArray KEYWORD(130, 300, 130, 300, SAMPLER2DARRAY); +sampler1DShadow DEPRECATED_ES_KEYWORD(SAMPLER1DSHADOW); +sampler2DShadow return SAMPLER2DSHADOW; +samplerCubeShadow KEYWORD(130, 300, 130, 300, SAMPLERCUBESHADOW); +sampler1DArrayShadow KEYWORD(130, 300, 130, 0, SAMPLER1DARRAYSHADOW); +sampler2DArrayShadow KEYWORD(130, 300, 130, 300, SAMPLER2DARRAYSHADOW); +isampler1D KEYWORD(130, 300, 130, 0, ISAMPLER1D); +isampler2D KEYWORD(130, 300, 130, 300, ISAMPLER2D); +isampler3D KEYWORD(130, 300, 130, 300, ISAMPLER3D); +isamplerCube KEYWORD(130, 300, 130, 300, ISAMPLERCUBE); +isampler1DArray KEYWORD(130, 300, 130, 0, ISAMPLER1DARRAY); +isampler2DArray KEYWORD(130, 300, 130, 300, ISAMPLER2DARRAY); +usampler1D KEYWORD(130, 300, 130, 0, USAMPLER1D); +usampler2D KEYWORD(130, 300, 130, 300, USAMPLER2D); +usampler3D KEYWORD(130, 300, 130, 300, USAMPLER3D); +usamplerCube KEYWORD(130, 300, 130, 300, USAMPLERCUBE); +usampler1DArray KEYWORD(130, 300, 130, 0, USAMPLER1DARRAY); +usampler2DArray KEYWORD(130, 300, 130, 300, USAMPLER2DARRAY); + + /* additional keywords in ARB_texture_multisample, included in GLSL 1.50 */ + /* these are reserved but not defined in GLSL 3.00 */ + /* [iu]sampler2DMS are defined in GLSL ES 3.10 */ +sampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS); +isampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS); +usampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS); +sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, SAMPLER2DMSARRAY); +isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, ISAMPLER2DMSARRAY); +usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY); + + /* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */ +samplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY); +isamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY); +usamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY); +samplerCubeArrayShadow KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW); + +samplerExternalOES { + if (yyextra->OES_EGL_image_external_enable) + return SAMPLEREXTERNALOES; + else + return IDENTIFIER; + } + + /* keywords available with ARB_gpu_shader5 */ +precise KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_gpu_shader5_enable, PRECISE); + + /* keywords available with ARB_shader_image_load_store */ +image1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1D); +image2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2D); +image3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE3D); +image2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DRECT); +imageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGECUBE); +imageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGEBUFFER); +image1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1DARRAY); +image2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2DARRAY); +imageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGECUBEARRAY); +image2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMS); +image2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMSARRAY); +iimage1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1D); +iimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2D); +iimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE3D); +iimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DRECT); +iimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBE); +iimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGEBUFFER); +iimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1DARRAY); +iimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DARRAY); +iimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBEARRAY); +iimage2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMS); +iimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMSARRAY); +uimage1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1D); +uimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2D); +uimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE3D); +uimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DRECT); +uimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBE); +uimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGEBUFFER); +uimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1DARRAY); +uimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DARRAY); +uimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBEARRAY); +uimage2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMS); +uimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMSARRAY); +image1DShadow KEYWORD(130, 300, 0, 0, IMAGE1DSHADOW); +image2DShadow KEYWORD(130, 300, 0, 0, IMAGE2DSHADOW); +image1DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW); +image2DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW); + +coherent KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, COHERENT); +volatile KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, VOLATILE); +restrict KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, RESTRICT); +readonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, READONLY); +writeonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, WRITEONLY); + +atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT); + +shared KEYWORD_WITH_ALT(430, 310, 430, 310, yyextra->ARB_compute_shader_enable, SHARED); + +struct return STRUCT; +void return VOID_TOK; + +layout { + if ((yyextra->is_version(140, 300)) + || yyextra->AMD_conservative_depth_enable + || yyextra->ARB_conservative_depth_enable + || yyextra->ARB_explicit_attrib_location_enable + || yyextra->ARB_explicit_uniform_location_enable + || yyextra->has_separate_shader_objects() + || yyextra->ARB_uniform_buffer_object_enable + || yyextra->ARB_fragment_coord_conventions_enable + || yyextra->ARB_shading_language_420pack_enable + || yyextra->ARB_compute_shader_enable + || yyextra->ARB_tessellation_shader_enable) { + return LAYOUT_TOK; + } else { + void *mem_ctx = yyextra; + yylval->identifier = ralloc_strdup(mem_ctx, yytext); + return classify_identifier(yyextra, yytext); + } + } + +\+\+ return INC_OP; +-- return DEC_OP; +\<= return LE_OP; +>= return GE_OP; +== return EQ_OP; +!= return NE_OP; +&& return AND_OP; +\|\| return OR_OP; +"^^" return XOR_OP; +"<<" return LEFT_OP; +">>" return RIGHT_OP; + +\*= return MUL_ASSIGN; +\/= return DIV_ASSIGN; +\+= return ADD_ASSIGN; +\%= return MOD_ASSIGN; +\<\<= return LEFT_ASSIGN; +>>= return RIGHT_ASSIGN; +&= return AND_ASSIGN; +"^=" return XOR_ASSIGN; +\|= return OR_ASSIGN; +-= return SUB_ASSIGN; + +[1-9][0-9]*[uU]? { + return LITERAL_INTEGER(10); + } +0[xX][0-9a-fA-F]+[uU]? { + return LITERAL_INTEGER(16); + } +0[0-7]*[uU]? { + return LITERAL_INTEGER(8); + } + +[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?[fF]? | +\.[0-9]+([eE][+-]?[0-9]+)?[fF]? | +[0-9]+\.([eE][+-]?[0-9]+)?[fF]? | +[0-9]+[eE][+-]?[0-9]+[fF]? { + yylval->real = _mesa_strtof(yytext, NULL); + return FLOATCONSTANT; + } + +[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF) | +\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF) | +[0-9]+\.([eE][+-]?[0-9]+)?(lf|LF) | +[0-9]+[eE][+-]?[0-9]+(lf|LF) { + if (!yyextra->is_version(400, 0) && + !yyextra->ARB_gpu_shader_fp64_enable) + return ERROR_TOK; + yylval->dreal = _mesa_strtod(yytext, NULL); + return DOUBLECONSTANT; + } + +true { + yylval->n = 1; + return BOOLCONSTANT; + } +false { + yylval->n = 0; + return BOOLCONSTANT; + } + + + /* Reserved words in GLSL 1.10. */ +asm KEYWORD(110, 100, 0, 0, ASM); +class KEYWORD(110, 100, 0, 0, CLASS); +union KEYWORD(110, 100, 0, 0, UNION); +enum KEYWORD(110, 100, 0, 0, ENUM); +typedef KEYWORD(110, 100, 0, 0, TYPEDEF); +template KEYWORD(110, 100, 0, 0, TEMPLATE); +this KEYWORD(110, 100, 0, 0, THIS); +packed KEYWORD_WITH_ALT(110, 100, 140, 300, yyextra->ARB_uniform_buffer_object_enable, PACKED_TOK); +goto KEYWORD(110, 100, 0, 0, GOTO); +switch KEYWORD(110, 100, 130, 300, SWITCH); +default KEYWORD(110, 100, 130, 300, DEFAULT); +inline KEYWORD(110, 100, 0, 0, INLINE_TOK); +noinline KEYWORD(110, 100, 0, 0, NOINLINE); +public KEYWORD(110, 100, 0, 0, PUBLIC_TOK); +static KEYWORD(110, 100, 0, 0, STATIC); +extern KEYWORD(110, 100, 0, 0, EXTERN); +external KEYWORD(110, 100, 0, 0, EXTERNAL); +interface KEYWORD(110, 100, 0, 0, INTERFACE); +long KEYWORD(110, 100, 0, 0, LONG_TOK); +short KEYWORD(110, 100, 0, 0, SHORT_TOK); +double KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DOUBLE_TOK); +half KEYWORD(110, 100, 0, 0, HALF); +fixed KEYWORD(110, 100, 0, 0, FIXED_TOK); +unsigned KEYWORD(110, 100, 0, 0, UNSIGNED); +input KEYWORD(110, 100, 0, 0, INPUT_TOK); +output KEYWORD(110, 100, 0, 0, OUTPUT); +hvec2 KEYWORD(110, 100, 0, 0, HVEC2); +hvec3 KEYWORD(110, 100, 0, 0, HVEC3); +hvec4 KEYWORD(110, 100, 0, 0, HVEC4); +dvec2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC2); +dvec3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC3); +dvec4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC4); +dmat2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2); +dmat3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3); +dmat4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4); +dmat2x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2); +dmat2x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X3); +dmat2x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X4); +dmat3x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X2); +dmat3x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3); +dmat3x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X4); +dmat4x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X2); +dmat4x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X3); +dmat4x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4); +fvec2 KEYWORD(110, 100, 0, 0, FVEC2); +fvec3 KEYWORD(110, 100, 0, 0, FVEC3); +fvec4 KEYWORD(110, 100, 0, 0, FVEC4); +sampler2DRect DEPRECATED_ES_KEYWORD(SAMPLER2DRECT); +sampler3DRect KEYWORD(110, 100, 0, 0, SAMPLER3DRECT); +sampler2DRectShadow DEPRECATED_ES_KEYWORD(SAMPLER2DRECTSHADOW); +sizeof KEYWORD(110, 100, 0, 0, SIZEOF); +cast KEYWORD(110, 100, 0, 0, CAST); +namespace KEYWORD(110, 100, 0, 0, NAMESPACE); +using KEYWORD(110, 100, 0, 0, USING); + + /* Additional reserved words in GLSL 1.20. */ +lowp KEYWORD(120, 100, 130, 100, LOWP); +mediump KEYWORD(120, 100, 130, 100, MEDIUMP); +highp KEYWORD(120, 100, 130, 100, HIGHP); +precision KEYWORD(120, 100, 130, 100, PRECISION); + + /* Additional reserved words in GLSL 1.30. */ +case KEYWORD(130, 300, 130, 300, CASE); +common KEYWORD(130, 300, 0, 0, COMMON); +partition KEYWORD(130, 300, 0, 0, PARTITION); +active KEYWORD(130, 300, 0, 0, ACTIVE); +superp KEYWORD(130, 100, 0, 0, SUPERP); +samplerBuffer KEYWORD(130, 300, 140, 0, SAMPLERBUFFER); +filter KEYWORD(130, 300, 0, 0, FILTER); +row_major KEYWORD_WITH_ALT(130, 0, 140, 0, yyextra->ARB_uniform_buffer_object_enable && !yyextra->es_shader, ROW_MAJOR); + + /* Additional reserved words in GLSL 1.40 */ +isampler2DRect KEYWORD(140, 300, 140, 0, ISAMPLER2DRECT); +usampler2DRect KEYWORD(140, 300, 140, 0, USAMPLER2DRECT); +isamplerBuffer KEYWORD(140, 300, 140, 0, ISAMPLERBUFFER); +usamplerBuffer KEYWORD(140, 300, 140, 0, USAMPLERBUFFER); + + /* Additional reserved words in GLSL ES 3.00 */ +resource KEYWORD(0, 300, 0, 0, RESOURCE); +sample KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_gpu_shader5_enable, SAMPLE); +subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_shader_subroutine_enable, SUBROUTINE); + + +[_a-zA-Z][_a-zA-Z0-9]* { + struct _mesa_glsl_parse_state *state = yyextra; + void *ctx = state; + if (state->es_shader && strlen(yytext) > 1024) { + _mesa_glsl_error(yylloc, state, + "Identifier `%s' exceeds 1024 characters", + yytext); + } else { + yylval->identifier = ralloc_strdup(ctx, yytext); + } + return classify_identifier(state, yytext); + } + +\. { struct _mesa_glsl_parse_state *state = yyextra; + state->is_field = true; + return DOT_TOK; } + +. { return yytext[0]; } + +%% + +int +classify_identifier(struct _mesa_glsl_parse_state *state, const char *name) +{ + if (state->is_field) { + state->is_field = false; + return FIELD_SELECTION; + } + if (state->symbols->get_variable(name) || state->symbols->get_function(name)) + return IDENTIFIER; + else if (state->symbols->get_type(name)) + return TYPE_IDENTIFIER; + else + return NEW_IDENTIFIER; +} + +void +_mesa_glsl_lexer_ctor(struct _mesa_glsl_parse_state *state, const char *string) +{ + yylex_init_extra(state, & state->scanner); + yy_scan_string(string, state->scanner); +} + +void +_mesa_glsl_lexer_dtor(struct _mesa_glsl_parse_state *state) +{ + yylex_destroy(state->scanner); +} diff --git a/src/compiler/glsl/glsl_parser.yy b/src/compiler/glsl/glsl_parser.yy new file mode 100644 index 00000000000..99bd0e61d0e --- /dev/null +++ b/src/compiler/glsl/glsl_parser.yy @@ -0,0 +1,2855 @@ +%{ +/* + * Copyright © 2008, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#ifndef _MSC_VER +#include +#endif +#include + +#include "ast.h" +#include "glsl_parser_extras.h" +#include "compiler/glsl_types.h" +#include "main/context.h" + +#ifdef _MSC_VER +#pragma warning( disable : 4065 ) // switch statement contains 'default' but no 'case' labels +#endif + +#undef yyerror + +static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg) +{ + _mesa_glsl_error(loc, st, "%s", msg); +} + +static int +_mesa_glsl_lex(YYSTYPE *val, YYLTYPE *loc, _mesa_glsl_parse_state *state) +{ + return _mesa_glsl_lexer_lex(val, loc, state->scanner); +} + +static bool match_layout_qualifier(const char *s1, const char *s2, + _mesa_glsl_parse_state *state) +{ + /* From the GLSL 1.50 spec, section 4.3.8 (Layout Qualifiers): + * + * "The tokens in any layout-qualifier-id-list ... are not case + * sensitive, unless explicitly noted otherwise." + * + * The text "unless explicitly noted otherwise" appears to be + * vacuous--no desktop GLSL spec (up through GLSL 4.40) notes + * otherwise. + * + * However, the GLSL ES 3.00 spec says, in section 4.3.8 (Layout + * Qualifiers): + * + * "As for other identifiers, they are case sensitive." + * + * So we need to do a case-sensitive or a case-insensitive match, + * depending on whether we are compiling for GLSL ES. + */ + if (state->es_shader) + return strcmp(s1, s2); + else + return strcasecmp(s1, s2); +} +%} + +%expect 0 + +%pure-parser +%error-verbose + +%locations +%initial-action { + @$.first_line = 1; + @$.first_column = 1; + @$.last_line = 1; + @$.last_column = 1; + @$.source = 0; +} + +%lex-param {struct _mesa_glsl_parse_state *state} +%parse-param {struct _mesa_glsl_parse_state *state} + +%union { + int n; + float real; + double dreal; + const char *identifier; + + struct ast_type_qualifier type_qualifier; + + ast_node *node; + ast_type_specifier *type_specifier; + ast_array_specifier *array_specifier; + ast_fully_specified_type *fully_specified_type; + ast_function *function; + ast_parameter_declarator *parameter_declarator; + ast_function_definition *function_definition; + ast_compound_statement *compound_statement; + ast_expression *expression; + ast_declarator_list *declarator_list; + ast_struct_specifier *struct_specifier; + ast_declaration *declaration; + ast_switch_body *switch_body; + ast_case_label *case_label; + ast_case_label_list *case_label_list; + ast_case_statement *case_statement; + ast_case_statement_list *case_statement_list; + ast_interface_block *interface_block; + ast_subroutine_list *subroutine_list; + struct { + ast_node *cond; + ast_expression *rest; + } for_rest_statement; + + struct { + ast_node *then_statement; + ast_node *else_statement; + } selection_rest_statement; +} + +%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK +%token BREAK BUFFER CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT +%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 DVEC2 DVEC3 DVEC4 +%token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE +%token NOPERSPECTIVE FLAT SMOOTH +%token MAT2X2 MAT2X3 MAT2X4 +%token MAT3X2 MAT3X3 MAT3X4 +%token MAT4X2 MAT4X3 MAT4X4 +%token DMAT2X2 DMAT2X3 DMAT2X4 +%token DMAT3X2 DMAT3X3 DMAT3X4 +%token DMAT4X2 DMAT4X3 DMAT4X4 +%token SAMPLER1D SAMPLER2D SAMPLER3D SAMPLERCUBE SAMPLER1DSHADOW SAMPLER2DSHADOW +%token SAMPLERCUBESHADOW SAMPLER1DARRAY SAMPLER2DARRAY SAMPLER1DARRAYSHADOW +%token SAMPLER2DARRAYSHADOW SAMPLERCUBEARRAY SAMPLERCUBEARRAYSHADOW +%token ISAMPLER1D ISAMPLER2D ISAMPLER3D ISAMPLERCUBE +%token ISAMPLER1DARRAY ISAMPLER2DARRAY ISAMPLERCUBEARRAY +%token USAMPLER1D USAMPLER2D USAMPLER3D USAMPLERCUBE USAMPLER1DARRAY +%token USAMPLER2DARRAY USAMPLERCUBEARRAY +%token SAMPLER2DRECT ISAMPLER2DRECT USAMPLER2DRECT SAMPLER2DRECTSHADOW +%token SAMPLERBUFFER ISAMPLERBUFFER USAMPLERBUFFER +%token SAMPLER2DMS ISAMPLER2DMS USAMPLER2DMS +%token SAMPLER2DMSARRAY ISAMPLER2DMSARRAY USAMPLER2DMSARRAY +%token SAMPLEREXTERNALOES +%token IMAGE1D IMAGE2D IMAGE3D IMAGE2DRECT IMAGECUBE IMAGEBUFFER +%token IMAGE1DARRAY IMAGE2DARRAY IMAGECUBEARRAY IMAGE2DMS IMAGE2DMSARRAY +%token IIMAGE1D IIMAGE2D IIMAGE3D IIMAGE2DRECT IIMAGECUBE IIMAGEBUFFER +%token IIMAGE1DARRAY IIMAGE2DARRAY IIMAGECUBEARRAY IIMAGE2DMS IIMAGE2DMSARRAY +%token UIMAGE1D UIMAGE2D UIMAGE3D UIMAGE2DRECT UIMAGECUBE UIMAGEBUFFER +%token UIMAGE1DARRAY UIMAGE2DARRAY UIMAGECUBEARRAY UIMAGE2DMS UIMAGE2DMSARRAY +%token IMAGE1DSHADOW IMAGE2DSHADOW IMAGE1DARRAYSHADOW IMAGE2DARRAYSHADOW +%token COHERENT VOLATILE RESTRICT READONLY WRITEONLY +%token ATOMIC_UINT +%token SHARED +%token STRUCT VOID_TOK WHILE +%token IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER +%type any_identifier +%type instance_name_opt +%type buffer_instance_name_opt +%token FLOATCONSTANT +%token DOUBLECONSTANT +%token INTCONSTANT UINTCONSTANT BOOLCONSTANT +%token FIELD_SELECTION +%token LEFT_OP RIGHT_OP +%token INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP +%token AND_OP OR_OP XOR_OP MUL_ASSIGN DIV_ASSIGN ADD_ASSIGN +%token MOD_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN +%token SUB_ASSIGN +%token INVARIANT PRECISE +%token LOWP MEDIUMP HIGHP SUPERP PRECISION + +%token VERSION_TOK EXTENSION LINE COLON EOL INTERFACE OUTPUT +%token PRAGMA_DEBUG_ON PRAGMA_DEBUG_OFF +%token PRAGMA_OPTIMIZE_ON PRAGMA_OPTIMIZE_OFF +%token PRAGMA_INVARIANT_ALL +%token LAYOUT_TOK +%token DOT_TOK + /* Reserved words that are not actually used in the grammar. + */ +%token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO +%token INLINE_TOK NOINLINE PUBLIC_TOK STATIC EXTERN EXTERNAL +%token LONG_TOK SHORT_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK +%token HVEC2 HVEC3 HVEC4 FVEC2 FVEC3 FVEC4 +%token SAMPLER3DRECT +%token SIZEOF CAST NAMESPACE USING +%token RESOURCE PATCH +%token SUBROUTINE + +%token ERROR_TOK + +%token COMMON PARTITION ACTIVE FILTER ROW_MAJOR + +%type variable_identifier +%type statement +%type statement_list +%type simple_statement +%type precision_qualifier +%type type_qualifier +%type auxiliary_storage_qualifier +%type storage_qualifier +%type interpolation_qualifier +%type layout_qualifier +%type layout_qualifier_id_list layout_qualifier_id +%type interface_block_layout_qualifier +%type memory_qualifier +%type subroutine_qualifier +%type subroutine_type_list +%type interface_qualifier +%type buffer_interface_qualifier +%type type_specifier +%type type_specifier_nonarray +%type array_specifier +%type basic_type_specifier_nonarray +%type fully_specified_type +%type function_prototype +%type function_header +%type function_header_with_parameters +%type function_declarator +%type parameter_declarator +%type parameter_declaration +%type parameter_qualifier +%type parameter_direction_qualifier +%type parameter_type_specifier +%type function_definition +%type compound_statement_no_new_scope +%type compound_statement +%type statement_no_new_scope +%type expression_statement +%type expression +%type primary_expression +%type assignment_expression +%type conditional_expression +%type logical_or_expression +%type logical_xor_expression +%type logical_and_expression +%type inclusive_or_expression +%type exclusive_or_expression +%type and_expression +%type equality_expression +%type relational_expression +%type shift_expression +%type additive_expression +%type multiplicative_expression +%type unary_expression +%type constant_expression +%type integer_expression +%type postfix_expression +%type function_call_header_with_parameters +%type function_call_header_no_parameters +%type function_call_header +%type function_call_generic +%type function_call_or_method +%type function_call +%type assignment_operator +%type unary_operator +%type function_identifier +%type external_declaration +%type init_declarator_list +%type single_declaration +%type initializer +%type initializer_list +%type declaration +%type declaration_statement +%type jump_statement +%type interface_block +%type basic_interface_block +%type struct_specifier +%type struct_declaration_list +%type struct_declaration +%type struct_declarator +%type struct_declarator_list +%type member_list +%type member_declaration +%type selection_statement +%type selection_rest_statement +%type switch_statement +%type switch_body +%type case_label_list +%type case_label +%type case_statement +%type case_statement_list +%type iteration_statement +%type condition +%type conditionopt +%type for_init_statement +%type for_rest_statement +%type layout_defaults +%type layout_uniform_defaults +%type layout_buffer_defaults +%type layout_in_defaults +%type layout_out_defaults + +%right THEN ELSE +%% + +translation_unit: + version_statement extension_statement_list + { + _mesa_glsl_initialize_types(state); + } + external_declaration_list + { + delete state->symbols; + state->symbols = new(ralloc_parent(state)) glsl_symbol_table; + if (state->es_shader) { + if (state->stage == MESA_SHADER_FRAGMENT) { + state->symbols->add_default_precision_qualifier("int", ast_precision_medium); + } else { + state->symbols->add_default_precision_qualifier("float", ast_precision_high); + state->symbols->add_default_precision_qualifier("int", ast_precision_high); + } + state->symbols->add_default_precision_qualifier("sampler2D", ast_precision_low); + state->symbols->add_default_precision_qualifier("samplerExternalOES", ast_precision_low); + state->symbols->add_default_precision_qualifier("samplerCube", ast_precision_low); + state->symbols->add_default_precision_qualifier("atomic_uint", ast_precision_high); + } + _mesa_glsl_initialize_types(state); + } + ; + +version_statement: + /* blank - no #version specified: defaults are already set */ + | VERSION_TOK INTCONSTANT EOL + { + state->process_version_directive(&@2, $2, NULL); + if (state->error) { + YYERROR; + } + } + | VERSION_TOK INTCONSTANT any_identifier EOL + { + state->process_version_directive(&@2, $2, $3); + if (state->error) { + YYERROR; + } + } + ; + +pragma_statement: + PRAGMA_DEBUG_ON EOL + | PRAGMA_DEBUG_OFF EOL + | PRAGMA_OPTIMIZE_ON EOL + | PRAGMA_OPTIMIZE_OFF EOL + | PRAGMA_INVARIANT_ALL EOL + { + /* Pragma invariant(all) cannot be used in a fragment shader. + * + * Page 27 of the GLSL 1.20 spec, Page 53 of the GLSL ES 3.00 spec: + * + * "It is an error to use this pragma in a fragment shader." + */ + if (state->is_version(120, 300) && + state->stage == MESA_SHADER_FRAGMENT) { + _mesa_glsl_error(& @1, state, + "pragma `invariant(all)' cannot be used " + "in a fragment shader."); + } else if (!state->is_version(120, 100)) { + _mesa_glsl_warning(& @1, state, + "pragma `invariant(all)' not supported in %s " + "(GLSL ES 1.00 or GLSL 1.20 required)", + state->get_version_string()); + } else { + state->all_invariant = true; + } + } + ; + +extension_statement_list: + + | extension_statement_list extension_statement + ; + +any_identifier: + IDENTIFIER + | TYPE_IDENTIFIER + | NEW_IDENTIFIER + ; + +extension_statement: + EXTENSION any_identifier COLON any_identifier EOL + { + if (!_mesa_glsl_process_extension($2, & @2, $4, & @4, state)) { + YYERROR; + } + } + ; + +external_declaration_list: + external_declaration + { + /* FINISHME: The NULL test is required because pragmas are set to + * FINISHME: NULL. (See production rule for external_declaration.) + */ + if ($1 != NULL) + state->translation_unit.push_tail(& $1->link); + } + | external_declaration_list external_declaration + { + /* FINISHME: The NULL test is required because pragmas are set to + * FINISHME: NULL. (See production rule for external_declaration.) + */ + if ($2 != NULL) + state->translation_unit.push_tail(& $2->link); + } + | external_declaration_list extension_statement { + if (!state->allow_extension_directive_midshader) { + _mesa_glsl_error(& @2, state, + "#extension directive is not allowed " + "in the middle of a shader"); + YYERROR; + } + } + ; + +variable_identifier: + IDENTIFIER + | NEW_IDENTIFIER + ; + +primary_expression: + variable_identifier + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_identifier, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.identifier = $1; + } + | INTCONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_int_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.int_constant = $1; + } + | UINTCONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_uint_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.uint_constant = $1; + } + | FLOATCONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_float_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.float_constant = $1; + } + | DOUBLECONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_double_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.double_constant = $1; + } + | BOOLCONSTANT + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_bool_constant, NULL, NULL, NULL); + $$->set_location(@1); + $$->primary_expression.bool_constant = $1; + } + | '(' expression ')' + { + $$ = $2; + } + ; + +postfix_expression: + primary_expression + | postfix_expression '[' integer_expression ']' + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_array_index, $1, $3, NULL); + $$->set_location_range(@1, @4); + } + | function_call + { + $$ = $1; + } + | postfix_expression DOT_TOK FIELD_SELECTION + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_field_selection, $1, NULL, NULL); + $$->set_location_range(@1, @3); + $$->primary_expression.identifier = $3; + } + | postfix_expression INC_OP + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_post_inc, $1, NULL, NULL); + $$->set_location_range(@1, @2); + } + | postfix_expression DEC_OP + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_post_dec, $1, NULL, NULL); + $$->set_location_range(@1, @2); + } + ; + +integer_expression: + expression + ; + +function_call: + function_call_or_method + ; + +function_call_or_method: + function_call_generic + ; + +function_call_generic: + function_call_header_with_parameters ')' + | function_call_header_no_parameters ')' + ; + +function_call_header_no_parameters: + function_call_header VOID_TOK + | function_call_header + ; + +function_call_header_with_parameters: + function_call_header assignment_expression + { + $$ = $1; + $$->set_location(@1); + $$->expressions.push_tail(& $2->link); + } + | function_call_header_with_parameters ',' assignment_expression + { + $$ = $1; + $$->set_location(@1); + $$->expressions.push_tail(& $3->link); + } + ; + + // Grammar Note: Constructors look like functions, but lexical + // analysis recognized most of them as keywords. They are now + // recognized through "type_specifier". +function_call_header: + function_identifier '(' + ; + +function_identifier: + type_specifier + { + void *ctx = state; + $$ = new(ctx) ast_function_expression($1); + $$->set_location(@1); + } + | postfix_expression + { + void *ctx = state; + $$ = new(ctx) ast_function_expression($1); + $$->set_location(@1); + } + ; + + // Grammar Note: Constructors look like methods, but lexical + // analysis recognized most of them as keywords. They are now + // recognized through "type_specifier". + + // Grammar Note: No traditional style type casts. +unary_expression: + postfix_expression + | INC_OP unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_pre_inc, $2, NULL, NULL); + $$->set_location(@1); + } + | DEC_OP unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_pre_dec, $2, NULL, NULL); + $$->set_location(@1); + } + | unary_operator unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression($1, $2, NULL, NULL); + $$->set_location_range(@1, @2); + } + ; + + // Grammar Note: No '*' or '&' unary ops. Pointers are not supported. +unary_operator: + '+' { $$ = ast_plus; } + | '-' { $$ = ast_neg; } + | '!' { $$ = ast_logic_not; } + | '~' { $$ = ast_bit_not; } + ; + +multiplicative_expression: + unary_expression + | multiplicative_expression '*' unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_mul, $1, $3); + $$->set_location_range(@1, @3); + } + | multiplicative_expression '/' unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_div, $1, $3); + $$->set_location_range(@1, @3); + } + | multiplicative_expression '%' unary_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_mod, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +additive_expression: + multiplicative_expression + | additive_expression '+' multiplicative_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_add, $1, $3); + $$->set_location_range(@1, @3); + } + | additive_expression '-' multiplicative_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_sub, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +shift_expression: + additive_expression + | shift_expression LEFT_OP additive_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_lshift, $1, $3); + $$->set_location_range(@1, @3); + } + | shift_expression RIGHT_OP additive_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_rshift, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +relational_expression: + shift_expression + | relational_expression '<' shift_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_less, $1, $3); + $$->set_location_range(@1, @3); + } + | relational_expression '>' shift_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_greater, $1, $3); + $$->set_location_range(@1, @3); + } + | relational_expression LE_OP shift_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_lequal, $1, $3); + $$->set_location_range(@1, @3); + } + | relational_expression GE_OP shift_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_gequal, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +equality_expression: + relational_expression + | equality_expression EQ_OP relational_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_equal, $1, $3); + $$->set_location_range(@1, @3); + } + | equality_expression NE_OP relational_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_nequal, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +and_expression: + equality_expression + | and_expression '&' equality_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_bit_and, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +exclusive_or_expression: + and_expression + | exclusive_or_expression '^' and_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_bit_xor, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +inclusive_or_expression: + exclusive_or_expression + | inclusive_or_expression '|' exclusive_or_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_bit_or, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +logical_and_expression: + inclusive_or_expression + | logical_and_expression AND_OP inclusive_or_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_logic_and, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +logical_xor_expression: + logical_and_expression + | logical_xor_expression XOR_OP logical_and_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_logic_xor, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +logical_or_expression: + logical_xor_expression + | logical_or_expression OR_OP logical_xor_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression_bin(ast_logic_or, $1, $3); + $$->set_location_range(@1, @3); + } + ; + +conditional_expression: + logical_or_expression + | logical_or_expression '?' expression ':' assignment_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression(ast_conditional, $1, $3, $5); + $$->set_location_range(@1, @5); + } + ; + +assignment_expression: + conditional_expression + | unary_expression assignment_operator assignment_expression + { + void *ctx = state; + $$ = new(ctx) ast_expression($2, $1, $3, NULL); + $$->set_location_range(@1, @3); + } + ; + +assignment_operator: + '=' { $$ = ast_assign; } + | MUL_ASSIGN { $$ = ast_mul_assign; } + | DIV_ASSIGN { $$ = ast_div_assign; } + | MOD_ASSIGN { $$ = ast_mod_assign; } + | ADD_ASSIGN { $$ = ast_add_assign; } + | SUB_ASSIGN { $$ = ast_sub_assign; } + | LEFT_ASSIGN { $$ = ast_ls_assign; } + | RIGHT_ASSIGN { $$ = ast_rs_assign; } + | AND_ASSIGN { $$ = ast_and_assign; } + | XOR_ASSIGN { $$ = ast_xor_assign; } + | OR_ASSIGN { $$ = ast_or_assign; } + ; + +expression: + assignment_expression + { + $$ = $1; + } + | expression ',' assignment_expression + { + void *ctx = state; + if ($1->oper != ast_sequence) { + $$ = new(ctx) ast_expression(ast_sequence, NULL, NULL, NULL); + $$->set_location_range(@1, @3); + $$->expressions.push_tail(& $1->link); + } else { + $$ = $1; + } + + $$->expressions.push_tail(& $3->link); + } + ; + +constant_expression: + conditional_expression + ; + +declaration: + function_prototype ';' + { + state->symbols->pop_scope(); + $$ = $1; + } + | init_declarator_list ';' + { + $$ = $1; + } + | PRECISION precision_qualifier type_specifier ';' + { + $3->default_precision = $2; + $$ = $3; + } + | interface_block + { + $$ = $1; + } + ; + +function_prototype: + function_declarator ')' + ; + +function_declarator: + function_header + | function_header_with_parameters + ; + +function_header_with_parameters: + function_header parameter_declaration + { + $$ = $1; + $$->parameters.push_tail(& $2->link); + } + | function_header_with_parameters ',' parameter_declaration + { + $$ = $1; + $$->parameters.push_tail(& $3->link); + } + ; + +function_header: + fully_specified_type variable_identifier '(' + { + void *ctx = state; + $$ = new(ctx) ast_function(); + $$->set_location(@2); + $$->return_type = $1; + $$->identifier = $2; + + if ($1->qualifier.flags.q.subroutine) { + /* add type for IDENTIFIER search */ + state->symbols->add_type($2, glsl_type::get_subroutine_instance($2)); + } else + state->symbols->add_function(new(state) ir_function($2)); + state->symbols->push_scope(); + } + ; + +parameter_declarator: + type_specifier any_identifier + { + void *ctx = state; + $$ = new(ctx) ast_parameter_declarator(); + $$->set_location_range(@1, @2); + $$->type = new(ctx) ast_fully_specified_type(); + $$->type->set_location(@1); + $$->type->specifier = $1; + $$->identifier = $2; + } + | type_specifier any_identifier array_specifier + { + void *ctx = state; + $$ = new(ctx) ast_parameter_declarator(); + $$->set_location_range(@1, @3); + $$->type = new(ctx) ast_fully_specified_type(); + $$->type->set_location(@1); + $$->type->specifier = $1; + $$->identifier = $2; + $$->array_specifier = $3; + } + ; + +parameter_declaration: + parameter_qualifier parameter_declarator + { + $$ = $2; + $$->type->qualifier = $1; + } + | parameter_qualifier parameter_type_specifier + { + void *ctx = state; + $$ = new(ctx) ast_parameter_declarator(); + $$->set_location(@2); + $$->type = new(ctx) ast_fully_specified_type(); + $$->type->set_location_range(@1, @2); + $$->type->qualifier = $1; + $$->type->specifier = $2; + } + ; + +parameter_qualifier: + /* empty */ + { + memset(& $$, 0, sizeof($$)); + } + | CONST_TOK parameter_qualifier + { + if ($2.flags.q.constant) + _mesa_glsl_error(&@1, state, "duplicate const qualifier"); + + $$ = $2; + $$.flags.q.constant = 1; + } + | PRECISE parameter_qualifier + { + if ($2.flags.q.precise) + _mesa_glsl_error(&@1, state, "duplicate precise qualifier"); + + $$ = $2; + $$.flags.q.precise = 1; + } + | parameter_direction_qualifier parameter_qualifier + { + if (($1.flags.q.in || $1.flags.q.out) && ($2.flags.q.in || $2.flags.q.out)) + _mesa_glsl_error(&@1, state, "duplicate in/out/inout qualifier"); + + if (!state->has_420pack_or_es31() && $2.flags.q.constant) + _mesa_glsl_error(&@1, state, "in/out/inout must come after const " + "or precise"); + + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | precision_qualifier parameter_qualifier + { + if ($2.precision != ast_precision_none) + _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); + + if (!state->has_420pack_or_es31() && + $2.flags.i != 0) + _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); + + $$ = $2; + $$.precision = $1; + } + | memory_qualifier parameter_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + +parameter_direction_qualifier: + IN_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.in = 1; + } + | OUT_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.out = 1; + } + | INOUT_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.in = 1; + $$.flags.q.out = 1; + } + ; + +parameter_type_specifier: + type_specifier + ; + +init_declarator_list: + single_declaration + | init_declarator_list ',' any_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, NULL, NULL); + decl->set_location(@3); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); + } + | init_declarator_list ',' any_identifier array_specifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, $4, NULL); + decl->set_location_range(@3, @4); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); + } + | init_declarator_list ',' any_identifier array_specifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, $4, $6); + decl->set_location_range(@3, @4); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); + } + | init_declarator_list ',' any_identifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, NULL, $5); + decl->set_location(@3); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); + } + ; + + // Grammar Note: No 'enum', or 'typedef'. +single_declaration: + fully_specified_type + { + void *ctx = state; + /* Empty declaration list is valid. */ + $$ = new(ctx) ast_declarator_list($1); + $$->set_location(@1); + } + | fully_specified_type any_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, NULL); + decl->set_location(@2); + + $$ = new(ctx) ast_declarator_list($1); + $$->set_location_range(@1, @2); + $$->declarations.push_tail(&decl->link); + } + | fully_specified_type any_identifier array_specifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, $3, NULL); + decl->set_location_range(@2, @3); + + $$ = new(ctx) ast_declarator_list($1); + $$->set_location_range(@1, @3); + $$->declarations.push_tail(&decl->link); + } + | fully_specified_type any_identifier array_specifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, $3, $5); + decl->set_location_range(@2, @3); + + $$ = new(ctx) ast_declarator_list($1); + $$->set_location_range(@1, @3); + $$->declarations.push_tail(&decl->link); + } + | fully_specified_type any_identifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, $4); + decl->set_location(@2); + + $$ = new(ctx) ast_declarator_list($1); + $$->set_location_range(@1, @2); + $$->declarations.push_tail(&decl->link); + } + | INVARIANT variable_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, NULL); + decl->set_location(@2); + + $$ = new(ctx) ast_declarator_list(NULL); + $$->set_location_range(@1, @2); + $$->invariant = true; + + $$->declarations.push_tail(&decl->link); + } + | PRECISE variable_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, NULL); + decl->set_location(@2); + + $$ = new(ctx) ast_declarator_list(NULL); + $$->set_location_range(@1, @2); + $$->precise = true; + + $$->declarations.push_tail(&decl->link); + } + ; + +fully_specified_type: + type_specifier + { + void *ctx = state; + $$ = new(ctx) ast_fully_specified_type(); + $$->set_location(@1); + $$->specifier = $1; + } + | type_qualifier type_specifier + { + void *ctx = state; + $$ = new(ctx) ast_fully_specified_type(); + $$->set_location_range(@1, @2); + $$->qualifier = $1; + $$->specifier = $2; + if ($$->specifier->structure != NULL && + $$->specifier->structure->is_declaration) { + $$->specifier->structure->layout = &$$->qualifier; + } + } + ; + +layout_qualifier: + LAYOUT_TOK '(' layout_qualifier_id_list ')' + { + $$ = $3; + } + ; + +layout_qualifier_id_list: + layout_qualifier_id + | layout_qualifier_id_list ',' layout_qualifier_id + { + $$ = $1; + if (!$$.merge_qualifier(& @3, state, $3, true)) { + YYERROR; + } + } + ; + +layout_qualifier_id: + any_identifier + { + memset(& $$, 0, sizeof($$)); + + /* Layout qualifiers for ARB_fragment_coord_conventions. */ + if (!$$.flags.i && (state->ARB_fragment_coord_conventions_enable || + state->is_version(150, 0))) { + if (match_layout_qualifier($1, "origin_upper_left", state) == 0) { + $$.flags.q.origin_upper_left = 1; + } else if (match_layout_qualifier($1, "pixel_center_integer", + state) == 0) { + $$.flags.q.pixel_center_integer = 1; + } + + if ($$.flags.i && state->ARB_fragment_coord_conventions_warn) { + _mesa_glsl_warning(& @1, state, + "GL_ARB_fragment_coord_conventions layout " + "identifier `%s' used", $1); + } + } + + /* Layout qualifiers for AMD/ARB_conservative_depth. */ + if (!$$.flags.i && + (state->AMD_conservative_depth_enable || + state->ARB_conservative_depth_enable || + state->is_version(420, 0))) { + if (match_layout_qualifier($1, "depth_any", state) == 0) { + $$.flags.q.depth_any = 1; + } else if (match_layout_qualifier($1, "depth_greater", state) == 0) { + $$.flags.q.depth_greater = 1; + } else if (match_layout_qualifier($1, "depth_less", state) == 0) { + $$.flags.q.depth_less = 1; + } else if (match_layout_qualifier($1, "depth_unchanged", + state) == 0) { + $$.flags.q.depth_unchanged = 1; + } + + if ($$.flags.i && state->AMD_conservative_depth_warn) { + _mesa_glsl_warning(& @1, state, + "GL_AMD_conservative_depth " + "layout qualifier `%s' is used", $1); + } + if ($$.flags.i && state->ARB_conservative_depth_warn) { + _mesa_glsl_warning(& @1, state, + "GL_ARB_conservative_depth " + "layout qualifier `%s' is used", $1); + } + } + + /* See also interface_block_layout_qualifier. */ + if (!$$.flags.i && state->has_uniform_buffer_objects()) { + if (match_layout_qualifier($1, "std140", state) == 0) { + $$.flags.q.std140 = 1; + } else if (match_layout_qualifier($1, "shared", state) == 0) { + $$.flags.q.shared = 1; + } else if (match_layout_qualifier($1, "std430", state) == 0) { + $$.flags.q.std430 = 1; + } else if (match_layout_qualifier($1, "column_major", state) == 0) { + $$.flags.q.column_major = 1; + /* "row_major" is a reserved word in GLSL 1.30+. Its token is parsed + * below in the interface_block_layout_qualifier rule. + * + * It is not a reserved word in GLSL ES 3.00, so it's handled here as + * an identifier. + * + * Also, this takes care of alternate capitalizations of + * "row_major" (which is necessary because layout qualifiers + * are case-insensitive in desktop GLSL). + */ + } else if (match_layout_qualifier($1, "row_major", state) == 0) { + $$.flags.q.row_major = 1; + /* "packed" is a reserved word in GLSL, and its token is + * parsed below in the interface_block_layout_qualifier rule. + * However, we must take care of alternate capitalizations of + * "packed", because layout qualifiers are case-insensitive + * in desktop GLSL. + */ + } else if (match_layout_qualifier($1, "packed", state) == 0) { + $$.flags.q.packed = 1; + } + + if ($$.flags.i && state->ARB_uniform_buffer_object_warn) { + _mesa_glsl_warning(& @1, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "layout qualifier `%s' is used", $1); + } + } + + /* Layout qualifiers for GLSL 1.50 geometry shaders. */ + if (!$$.flags.i) { + static const struct { + const char *s; + GLenum e; + } map[] = { + { "points", GL_POINTS }, + { "lines", GL_LINES }, + { "lines_adjacency", GL_LINES_ADJACENCY }, + { "line_strip", GL_LINE_STRIP }, + { "triangles", GL_TRIANGLES }, + { "triangles_adjacency", GL_TRIANGLES_ADJACENCY }, + { "triangle_strip", GL_TRIANGLE_STRIP }, + }; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { + if (match_layout_qualifier($1, map[i].s, state) == 0) { + $$.flags.q.prim_type = 1; + $$.prim_type = map[i].e; + break; + } + } + + if ($$.flags.i && !state->has_geometry_shader()) { + _mesa_glsl_error(& @1, state, "#version 150 layout " + "qualifier `%s' used", $1); + } + } + + /* Layout qualifiers for ARB_shader_image_load_store. */ + if (state->ARB_shader_image_load_store_enable || + state->is_version(420, 310)) { + if (!$$.flags.i) { + static const struct { + const char *name; + GLenum format; + glsl_base_type base_type; + /** Minimum desktop GLSL version required for the image + * format. Use 130 if already present in the original + * ARB extension. + */ + unsigned required_glsl; + /** Minimum GLSL ES version required for the image format. */ + unsigned required_essl; + } map[] = { + { "rgba32f", GL_RGBA32F, GLSL_TYPE_FLOAT, 130, 310 }, + { "rgba16f", GL_RGBA16F, GLSL_TYPE_FLOAT, 130, 310 }, + { "rg32f", GL_RG32F, GLSL_TYPE_FLOAT, 130, 0 }, + { "rg16f", GL_RG16F, GLSL_TYPE_FLOAT, 130, 0 }, + { "r11f_g11f_b10f", GL_R11F_G11F_B10F, GLSL_TYPE_FLOAT, 130, 0 }, + { "r32f", GL_R32F, GLSL_TYPE_FLOAT, 130, 310 }, + { "r16f", GL_R16F, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba32ui", GL_RGBA32UI, GLSL_TYPE_UINT, 130, 310 }, + { "rgba16ui", GL_RGBA16UI, GLSL_TYPE_UINT, 130, 310 }, + { "rgb10_a2ui", GL_RGB10_A2UI, GLSL_TYPE_UINT, 130, 0 }, + { "rgba8ui", GL_RGBA8UI, GLSL_TYPE_UINT, 130, 310 }, + { "rg32ui", GL_RG32UI, GLSL_TYPE_UINT, 130, 0 }, + { "rg16ui", GL_RG16UI, GLSL_TYPE_UINT, 130, 0 }, + { "rg8ui", GL_RG8UI, GLSL_TYPE_UINT, 130, 0 }, + { "r32ui", GL_R32UI, GLSL_TYPE_UINT, 130, 310 }, + { "r16ui", GL_R16UI, GLSL_TYPE_UINT, 130, 0 }, + { "r8ui", GL_R8UI, GLSL_TYPE_UINT, 130, 0 }, + { "rgba32i", GL_RGBA32I, GLSL_TYPE_INT, 130, 310 }, + { "rgba16i", GL_RGBA16I, GLSL_TYPE_INT, 130, 310 }, + { "rgba8i", GL_RGBA8I, GLSL_TYPE_INT, 130, 310 }, + { "rg32i", GL_RG32I, GLSL_TYPE_INT, 130, 0 }, + { "rg16i", GL_RG16I, GLSL_TYPE_INT, 130, 0 }, + { "rg8i", GL_RG8I, GLSL_TYPE_INT, 130, 0 }, + { "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310 }, + { "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0 }, + { "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0 }, + { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310 }, + { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0 }, + { "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0 }, + { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0 }, + { "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310 }, + { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, + { "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0 } + }; + + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { + if (state->is_version(map[i].required_glsl, + map[i].required_essl) && + match_layout_qualifier($1, map[i].name, state) == 0) { + $$.flags.q.explicit_image_format = 1; + $$.image_format = map[i].format; + $$.image_base_type = map[i].base_type; + break; + } + } + } + + if (!$$.flags.i && + match_layout_qualifier($1, "early_fragment_tests", state) == 0) { + /* From section 4.4.1.3 of the GLSL 4.50 specification + * (Fragment Shader Inputs): + * + * "Fragment shaders also allow the following layout + * qualifier on in only (not with variable declarations) + * layout-qualifier-id + * early_fragment_tests + * [...]" + */ + if (state->stage != MESA_SHADER_FRAGMENT) { + _mesa_glsl_error(& @1, state, + "early_fragment_tests layout qualifier only " + "valid in fragment shaders"); + } + + $$.flags.q.early_fragment_tests = 1; + } + } + + /* Layout qualifiers for tessellation evaluation shaders. */ + if (!$$.flags.i) { + struct { + const char *s; + GLenum e; + } map[] = { + /* triangles already parsed by gs-specific code */ + { "quads", GL_QUADS }, + { "isolines", GL_ISOLINES }, + }; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { + if (match_layout_qualifier($1, map[i].s, state) == 0) { + $$.flags.q.prim_type = 1; + $$.prim_type = map[i].e; + break; + } + } + + if ($$.flags.i && + !state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "primitive mode qualifier `%s' requires " + "GLSL 4.00 or ARB_tessellation_shader", $1); + } + } + if (!$$.flags.i) { + struct { + const char *s; + GLenum e; + } map[] = { + { "equal_spacing", GL_EQUAL }, + { "fractional_odd_spacing", GL_FRACTIONAL_ODD }, + { "fractional_even_spacing", GL_FRACTIONAL_EVEN }, + }; + for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { + if (match_layout_qualifier($1, map[i].s, state) == 0) { + $$.flags.q.vertex_spacing = 1; + $$.vertex_spacing = map[i].e; + break; + } + } + + if ($$.flags.i && + !state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "vertex spacing qualifier `%s' requires " + "GLSL 4.00 or ARB_tessellation_shader", $1); + } + } + if (!$$.flags.i) { + if (match_layout_qualifier($1, "cw", state) == 0) { + $$.flags.q.ordering = 1; + $$.ordering = GL_CW; + } else if (match_layout_qualifier($1, "ccw", state) == 0) { + $$.flags.q.ordering = 1; + $$.ordering = GL_CCW; + } + + if ($$.flags.i && + !state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "ordering qualifier `%s' requires " + "GLSL 4.00 or ARB_tessellation_shader", $1); + } + } + if (!$$.flags.i) { + if (match_layout_qualifier($1, "point_mode", state) == 0) { + $$.flags.q.point_mode = 1; + $$.point_mode = true; + } + + if ($$.flags.i && + !state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "qualifier `point_mode' requires " + "GLSL 4.00 or ARB_tessellation_shader"); + } + } + + if (!$$.flags.i) { + _mesa_glsl_error(& @1, state, "unrecognized layout identifier " + "`%s'", $1); + YYERROR; + } + } + | any_identifier '=' constant_expression + { + memset(& $$, 0, sizeof($$)); + void *ctx = state; + + if ($3->oper != ast_int_constant && + $3->oper != ast_uint_constant && + !state->has_enhanced_layouts()) { + _mesa_glsl_error(& @1, state, + "compile-time constant expressions require " + "GLSL 4.40 or ARB_enhanced_layouts"); + } + + if (match_layout_qualifier("location", $1, state) == 0) { + $$.flags.q.explicit_location = 1; + + if ($$.flags.q.attribute == 1 && + state->ARB_explicit_attrib_location_warn) { + _mesa_glsl_warning(& @1, state, + "GL_ARB_explicit_attrib_location layout " + "identifier `%s' used", $1); + } + $$.location = $3; + } + + if (match_layout_qualifier("index", $1, state) == 0) { + if (state->es_shader && !state->EXT_blend_func_extended_enable) { + _mesa_glsl_error(& @3, state, "index layout qualifier requires EXT_blend_func_extended"); + YYERROR; + } + + $$.flags.q.explicit_index = 1; + $$.index = $3; + } + + if ((state->has_420pack_or_es31() || + state->has_atomic_counters() || + state->has_shader_storage_buffer_objects()) && + match_layout_qualifier("binding", $1, state) == 0) { + $$.flags.q.explicit_binding = 1; + $$.binding = $3; + } + + if (state->has_atomic_counters() && + match_layout_qualifier("offset", $1, state) == 0) { + $$.flags.q.explicit_offset = 1; + $$.offset = $3; + } + + if (match_layout_qualifier("max_vertices", $1, state) == 0) { + $$.flags.q.max_vertices = 1; + $$.max_vertices = new(ctx) ast_layout_expression(@1, $3); + if (!state->has_geometry_shader()) { + _mesa_glsl_error(& @3, state, + "#version 150 max_vertices qualifier " + "specified", $3); + } + } + + if (state->stage == MESA_SHADER_GEOMETRY) { + if (match_layout_qualifier("stream", $1, state) == 0 && + state->check_explicit_attrib_stream_allowed(& @3)) { + $$.flags.q.stream = 1; + $$.flags.q.explicit_stream = 1; + $$.stream = $3; + } + } + + static const char * const local_size_qualifiers[3] = { + "local_size_x", + "local_size_y", + "local_size_z", + }; + for (int i = 0; i < 3; i++) { + if (match_layout_qualifier(local_size_qualifiers[i], $1, + state) == 0) { + if (!state->has_compute_shader()) { + _mesa_glsl_error(& @3, state, + "%s qualifier requires GLSL 4.30 or " + "GLSL ES 3.10 or ARB_compute_shader", + local_size_qualifiers[i]); + YYERROR; + } else { + $$.flags.q.local_size |= (1 << i); + $$.local_size[i] = new(ctx) ast_layout_expression(@1, $3); + } + break; + } + } + + if (match_layout_qualifier("invocations", $1, state) == 0) { + $$.flags.q.invocations = 1; + $$.invocations = new(ctx) ast_layout_expression(@1, $3); + if (!state->is_version(400, 0) && + !state->ARB_gpu_shader5_enable) { + _mesa_glsl_error(& @3, state, + "GL_ARB_gpu_shader5 invocations " + "qualifier specified", $3); + } + } + + /* Layout qualifiers for tessellation control shaders. */ + if (match_layout_qualifier("vertices", $1, state) == 0) { + $$.flags.q.vertices = 1; + $$.vertices = new(ctx) ast_layout_expression(@1, $3); + if (!state->ARB_tessellation_shader_enable && + !state->is_version(400, 0)) { + _mesa_glsl_error(& @1, state, + "vertices qualifier requires GLSL 4.00 or " + "ARB_tessellation_shader"); + } + } + + /* If the identifier didn't match any known layout identifiers, + * emit an error. + */ + if (!$$.flags.i) { + _mesa_glsl_error(& @1, state, "unrecognized layout identifier " + "`%s'", $1); + YYERROR; + } + } + | interface_block_layout_qualifier + { + $$ = $1; + /* Layout qualifiers for ARB_uniform_buffer_object. */ + if ($$.flags.q.uniform && !state->has_uniform_buffer_objects()) { + _mesa_glsl_error(& @1, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "layout qualifier `%s' is used", $1); + } else if ($$.flags.q.uniform && state->ARB_uniform_buffer_object_warn) { + _mesa_glsl_warning(& @1, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "layout qualifier `%s' is used", $1); + } + } + ; + +/* This is a separate language rule because we parse these as tokens + * (due to them being reserved keywords) instead of identifiers like + * most qualifiers. See the any_identifier path of + * layout_qualifier_id for the others. + * + * Note that since layout qualifiers are case-insensitive in desktop + * GLSL, all of these qualifiers need to be handled as identifiers as + * well (by the any_identifier path of layout_qualifier_id). + */ +interface_block_layout_qualifier: + ROW_MAJOR + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.row_major = 1; + } + | PACKED_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.packed = 1; + } + | SHARED + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.shared = 1; + } + ; + +subroutine_qualifier: + SUBROUTINE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.subroutine = 1; + } + | SUBROUTINE '(' subroutine_type_list ')' + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.subroutine_def = 1; + $$.subroutine_list = $3; + } + ; + +subroutine_type_list: + any_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($1, NULL, NULL); + decl->set_location(@1); + + $$ = new(ctx) ast_subroutine_list(); + $$->declarations.push_tail(&decl->link); + } + | subroutine_type_list ',' any_identifier + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($3, NULL, NULL); + decl->set_location(@3); + + $$ = $1; + $$->declarations.push_tail(&decl->link); + } + ; + +interpolation_qualifier: + SMOOTH + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.smooth = 1; + } + | FLAT + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.flat = 1; + } + | NOPERSPECTIVE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.noperspective = 1; + } + ; + +type_qualifier: + /* Single qualifiers */ + INVARIANT + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.invariant = 1; + } + | PRECISE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.precise = 1; + } + | auxiliary_storage_qualifier + | storage_qualifier + | interpolation_qualifier + | layout_qualifier + | memory_qualifier + | subroutine_qualifier + | precision_qualifier + { + memset(&$$, 0, sizeof($$)); + $$.precision = $1; + } + + /* Multiple qualifiers: + * In GLSL 4.20, these can be specified in any order. In earlier versions, + * they appear in this order (see GLSL 1.50 section 4.7 & comments below): + * + * invariant interpolation auxiliary storage precision ...or... + * layout storage precision + * + * Each qualifier's rule ensures that the accumulated qualifiers on the right + * side don't contain any that must appear on the left hand side. + * For example, when processing a storage qualifier, we check that there are + * no auxiliary, interpolation, layout, invariant, or precise qualifiers to the right. + */ + | PRECISE type_qualifier + { + if ($2.flags.q.precise) + _mesa_glsl_error(&@1, state, "duplicate \"precise\" qualifier"); + + $$ = $2; + $$.flags.q.precise = 1; + } + | INVARIANT type_qualifier + { + if ($2.flags.q.invariant) + _mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier"); + + if (!state->has_420pack_or_es31() && $2.flags.q.precise) + _mesa_glsl_error(&@1, state, + "\"invariant\" must come after \"precise\""); + + $$ = $2; + $$.flags.q.invariant = 1; + + /* GLSL ES 3.00 spec, section 4.6.1 "The Invariant Qualifier": + * + * "Only variables output from a shader can be candidates for invariance. + * This includes user-defined output variables and the built-in output + * variables. As only outputs can be declared as invariant, an invariant + * output from one shader stage will still match an input of a subsequent + * stage without the input being declared as invariant." + */ + if (state->es_shader && state->language_version >= 300 && $$.flags.q.in) + _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs"); + } + | interpolation_qualifier type_qualifier + { + /* Section 4.3 of the GLSL 1.40 specification states: + * "...qualified with one of these interpolation qualifiers" + * + * GLSL 1.30 claims to allow "one or more", but insists that: + * "These interpolation qualifiers may only precede the qualifiers in, + * centroid in, out, or centroid out in a declaration." + * + * ...which means that e.g. smooth can't precede smooth, so there can be + * only one after all, and the 1.40 text is a clarification, not a change. + */ + if ($2.has_interpolation()) + _mesa_glsl_error(&@1, state, "duplicate interpolation qualifier"); + + if (!state->has_420pack_or_es31() && + ($2.flags.q.precise || $2.flags.q.invariant)) { + _mesa_glsl_error(&@1, state, "interpolation qualifiers must come " + "after \"precise\" or \"invariant\""); + } + + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | layout_qualifier type_qualifier + { + /* In the absence of ARB_shading_language_420pack, layout qualifiers may + * appear no later than auxiliary storage qualifiers. There is no + * particularly clear spec language mandating this, but in all examples + * the layout qualifier precedes the storage qualifier. + * + * We allow combinations of layout with interpolation, invariant or + * precise qualifiers since these are useful in ARB_separate_shader_objects. + * There is no clear spec guidance on this either. + */ + if (!state->has_420pack_or_es31() && $2.has_layout()) + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | subroutine_qualifier type_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | auxiliary_storage_qualifier type_qualifier + { + if ($2.has_auxiliary_storage()) { + _mesa_glsl_error(&@1, state, + "duplicate auxiliary storage qualifier (centroid or sample)"); + } + + if (!state->has_420pack_or_es31() && + ($2.flags.q.precise || $2.flags.q.invariant || + $2.has_interpolation() || $2.has_layout())) { + _mesa_glsl_error(&@1, state, "auxiliary storage qualifiers must come " + "just before storage qualifiers"); + } + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | storage_qualifier type_qualifier + { + /* Section 4.3 of the GLSL 1.20 specification states: + * "Variable declarations may have a storage qualifier specified..." + * 1.30 clarifies this to "may have one storage qualifier". + */ + if ($2.has_storage()) + _mesa_glsl_error(&@1, state, "duplicate storage qualifier"); + + if (!state->has_420pack_or_es31() && + ($2.flags.q.precise || $2.flags.q.invariant || $2.has_interpolation() || + $2.has_layout() || $2.has_auxiliary_storage())) { + _mesa_glsl_error(&@1, state, "storage qualifiers must come after " + "precise, invariant, interpolation, layout and auxiliary " + "storage qualifiers"); + } + + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + | precision_qualifier type_qualifier + { + if ($2.precision != ast_precision_none) + _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); + + if (!(state->has_420pack_or_es31()) && + $2.flags.i != 0) + _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); + + $$ = $2; + $$.precision = $1; + } + | memory_qualifier type_qualifier + { + $$ = $1; + $$.merge_qualifier(&@1, state, $2, false); + } + ; + +auxiliary_storage_qualifier: + CENTROID + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.centroid = 1; + } + | SAMPLE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.sample = 1; + } + | PATCH + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.patch = 1; + } + +storage_qualifier: + CONST_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.constant = 1; + } + | ATTRIBUTE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.attribute = 1; + } + | VARYING + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.varying = 1; + } + | IN_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.in = 1; + } + | OUT_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.out = 1; + + if (state->stage == MESA_SHADER_GEOMETRY && + state->has_explicit_attrib_stream()) { + /* Section 4.3.8.2 (Output Layout Qualifiers) of the GLSL 4.00 + * spec says: + * + * "If the block or variable is declared with the stream + * identifier, it is associated with the specified stream; + * otherwise, it is associated with the current default stream." + */ + $$.flags.q.stream = 1; + $$.flags.q.explicit_stream = 0; + $$.stream = state->out_qualifier->stream; + } + } + | UNIFORM + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.uniform = 1; + } + | BUFFER + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.buffer = 1; + } + | SHARED + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.shared_storage = 1; + } + ; + +memory_qualifier: + COHERENT + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.coherent = 1; + } + | VOLATILE + { + memset(& $$, 0, sizeof($$)); + $$.flags.q._volatile = 1; + } + | RESTRICT + { + STATIC_ASSERT(sizeof($$.flags.q) <= sizeof($$.flags.i)); + memset(& $$, 0, sizeof($$)); + $$.flags.q.restrict_flag = 1; + } + | READONLY + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.read_only = 1; + } + | WRITEONLY + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.write_only = 1; + } + ; + +array_specifier: + '[' ']' + { + void *ctx = state; + $$ = new(ctx) ast_array_specifier(@1, new(ctx) ast_expression( + ast_unsized_array_dim, NULL, + NULL, NULL)); + $$->set_location_range(@1, @2); + } + | '[' constant_expression ']' + { + void *ctx = state; + $$ = new(ctx) ast_array_specifier(@1, $2); + $$->set_location_range(@1, @3); + } + | array_specifier '[' ']' + { + void *ctx = state; + $$ = $1; + + if (state->check_arrays_of_arrays_allowed(& @1)) { + $$->add_dimension(new(ctx) ast_expression(ast_unsized_array_dim, NULL, + NULL, NULL)); + } + } + | array_specifier '[' constant_expression ']' + { + $$ = $1; + + if (state->check_arrays_of_arrays_allowed(& @1)) { + $$->add_dimension($3); + } + } + ; + +type_specifier: + type_specifier_nonarray + | type_specifier_nonarray array_specifier + { + $$ = $1; + $$->array_specifier = $2; + } + ; + +type_specifier_nonarray: + basic_type_specifier_nonarray + { + void *ctx = state; + $$ = new(ctx) ast_type_specifier($1); + $$->set_location(@1); + } + | struct_specifier + { + void *ctx = state; + $$ = new(ctx) ast_type_specifier($1); + $$->set_location(@1); + } + | TYPE_IDENTIFIER + { + void *ctx = state; + $$ = new(ctx) ast_type_specifier($1); + $$->set_location(@1); + } + ; + +basic_type_specifier_nonarray: + VOID_TOK { $$ = "void"; } + | FLOAT_TOK { $$ = "float"; } + | DOUBLE_TOK { $$ = "double"; } + | INT_TOK { $$ = "int"; } + | UINT_TOK { $$ = "uint"; } + | BOOL_TOK { $$ = "bool"; } + | VEC2 { $$ = "vec2"; } + | VEC3 { $$ = "vec3"; } + | VEC4 { $$ = "vec4"; } + | BVEC2 { $$ = "bvec2"; } + | BVEC3 { $$ = "bvec3"; } + | BVEC4 { $$ = "bvec4"; } + | IVEC2 { $$ = "ivec2"; } + | IVEC3 { $$ = "ivec3"; } + | IVEC4 { $$ = "ivec4"; } + | UVEC2 { $$ = "uvec2"; } + | UVEC3 { $$ = "uvec3"; } + | UVEC4 { $$ = "uvec4"; } + | DVEC2 { $$ = "dvec2"; } + | DVEC3 { $$ = "dvec3"; } + | DVEC4 { $$ = "dvec4"; } + | MAT2X2 { $$ = "mat2"; } + | MAT2X3 { $$ = "mat2x3"; } + | MAT2X4 { $$ = "mat2x4"; } + | MAT3X2 { $$ = "mat3x2"; } + | MAT3X3 { $$ = "mat3"; } + | MAT3X4 { $$ = "mat3x4"; } + | MAT4X2 { $$ = "mat4x2"; } + | MAT4X3 { $$ = "mat4x3"; } + | MAT4X4 { $$ = "mat4"; } + | DMAT2X2 { $$ = "dmat2"; } + | DMAT2X3 { $$ = "dmat2x3"; } + | DMAT2X4 { $$ = "dmat2x4"; } + | DMAT3X2 { $$ = "dmat3x2"; } + | DMAT3X3 { $$ = "dmat3"; } + | DMAT3X4 { $$ = "dmat3x4"; } + | DMAT4X2 { $$ = "dmat4x2"; } + | DMAT4X3 { $$ = "dmat4x3"; } + | DMAT4X4 { $$ = "dmat4"; } + | SAMPLER1D { $$ = "sampler1D"; } + | SAMPLER2D { $$ = "sampler2D"; } + | SAMPLER2DRECT { $$ = "sampler2DRect"; } + | SAMPLER3D { $$ = "sampler3D"; } + | SAMPLERCUBE { $$ = "samplerCube"; } + | SAMPLEREXTERNALOES { $$ = "samplerExternalOES"; } + | SAMPLER1DSHADOW { $$ = "sampler1DShadow"; } + | SAMPLER2DSHADOW { $$ = "sampler2DShadow"; } + | SAMPLER2DRECTSHADOW { $$ = "sampler2DRectShadow"; } + | SAMPLERCUBESHADOW { $$ = "samplerCubeShadow"; } + | SAMPLER1DARRAY { $$ = "sampler1DArray"; } + | SAMPLER2DARRAY { $$ = "sampler2DArray"; } + | SAMPLER1DARRAYSHADOW { $$ = "sampler1DArrayShadow"; } + | SAMPLER2DARRAYSHADOW { $$ = "sampler2DArrayShadow"; } + | SAMPLERBUFFER { $$ = "samplerBuffer"; } + | SAMPLERCUBEARRAY { $$ = "samplerCubeArray"; } + | SAMPLERCUBEARRAYSHADOW { $$ = "samplerCubeArrayShadow"; } + | ISAMPLER1D { $$ = "isampler1D"; } + | ISAMPLER2D { $$ = "isampler2D"; } + | ISAMPLER2DRECT { $$ = "isampler2DRect"; } + | ISAMPLER3D { $$ = "isampler3D"; } + | ISAMPLERCUBE { $$ = "isamplerCube"; } + | ISAMPLER1DARRAY { $$ = "isampler1DArray"; } + | ISAMPLER2DARRAY { $$ = "isampler2DArray"; } + | ISAMPLERBUFFER { $$ = "isamplerBuffer"; } + | ISAMPLERCUBEARRAY { $$ = "isamplerCubeArray"; } + | USAMPLER1D { $$ = "usampler1D"; } + | USAMPLER2D { $$ = "usampler2D"; } + | USAMPLER2DRECT { $$ = "usampler2DRect"; } + | USAMPLER3D { $$ = "usampler3D"; } + | USAMPLERCUBE { $$ = "usamplerCube"; } + | USAMPLER1DARRAY { $$ = "usampler1DArray"; } + | USAMPLER2DARRAY { $$ = "usampler2DArray"; } + | USAMPLERBUFFER { $$ = "usamplerBuffer"; } + | USAMPLERCUBEARRAY { $$ = "usamplerCubeArray"; } + | SAMPLER2DMS { $$ = "sampler2DMS"; } + | ISAMPLER2DMS { $$ = "isampler2DMS"; } + | USAMPLER2DMS { $$ = "usampler2DMS"; } + | SAMPLER2DMSARRAY { $$ = "sampler2DMSArray"; } + | ISAMPLER2DMSARRAY { $$ = "isampler2DMSArray"; } + | USAMPLER2DMSARRAY { $$ = "usampler2DMSArray"; } + | IMAGE1D { $$ = "image1D"; } + | IMAGE2D { $$ = "image2D"; } + | IMAGE3D { $$ = "image3D"; } + | IMAGE2DRECT { $$ = "image2DRect"; } + | IMAGECUBE { $$ = "imageCube"; } + | IMAGEBUFFER { $$ = "imageBuffer"; } + | IMAGE1DARRAY { $$ = "image1DArray"; } + | IMAGE2DARRAY { $$ = "image2DArray"; } + | IMAGECUBEARRAY { $$ = "imageCubeArray"; } + | IMAGE2DMS { $$ = "image2DMS"; } + | IMAGE2DMSARRAY { $$ = "image2DMSArray"; } + | IIMAGE1D { $$ = "iimage1D"; } + | IIMAGE2D { $$ = "iimage2D"; } + | IIMAGE3D { $$ = "iimage3D"; } + | IIMAGE2DRECT { $$ = "iimage2DRect"; } + | IIMAGECUBE { $$ = "iimageCube"; } + | IIMAGEBUFFER { $$ = "iimageBuffer"; } + | IIMAGE1DARRAY { $$ = "iimage1DArray"; } + | IIMAGE2DARRAY { $$ = "iimage2DArray"; } + | IIMAGECUBEARRAY { $$ = "iimageCubeArray"; } + | IIMAGE2DMS { $$ = "iimage2DMS"; } + | IIMAGE2DMSARRAY { $$ = "iimage2DMSArray"; } + | UIMAGE1D { $$ = "uimage1D"; } + | UIMAGE2D { $$ = "uimage2D"; } + | UIMAGE3D { $$ = "uimage3D"; } + | UIMAGE2DRECT { $$ = "uimage2DRect"; } + | UIMAGECUBE { $$ = "uimageCube"; } + | UIMAGEBUFFER { $$ = "uimageBuffer"; } + | UIMAGE1DARRAY { $$ = "uimage1DArray"; } + | UIMAGE2DARRAY { $$ = "uimage2DArray"; } + | UIMAGECUBEARRAY { $$ = "uimageCubeArray"; } + | UIMAGE2DMS { $$ = "uimage2DMS"; } + | UIMAGE2DMSARRAY { $$ = "uimage2DMSArray"; } + | ATOMIC_UINT { $$ = "atomic_uint"; } + ; + +precision_qualifier: + HIGHP + { + state->check_precision_qualifiers_allowed(&@1); + $$ = ast_precision_high; + } + | MEDIUMP + { + state->check_precision_qualifiers_allowed(&@1); + $$ = ast_precision_medium; + } + | LOWP + { + state->check_precision_qualifiers_allowed(&@1); + $$ = ast_precision_low; + } + ; + +struct_specifier: + STRUCT any_identifier '{' struct_declaration_list '}' + { + void *ctx = state; + $$ = new(ctx) ast_struct_specifier($2, $4); + $$->set_location_range(@2, @5); + state->symbols->add_type($2, glsl_type::void_type); + } + | STRUCT '{' struct_declaration_list '}' + { + void *ctx = state; + $$ = new(ctx) ast_struct_specifier(NULL, $3); + $$->set_location_range(@2, @4); + } + ; + +struct_declaration_list: + struct_declaration + { + $$ = $1; + $1->link.self_link(); + } + | struct_declaration_list struct_declaration + { + $$ = $1; + $$->link.insert_before(& $2->link); + } + ; + +struct_declaration: + fully_specified_type struct_declarator_list ';' + { + void *ctx = state; + ast_fully_specified_type *const type = $1; + type->set_location(@1); + + if (type->qualifier.flags.i != 0) + _mesa_glsl_error(&@1, state, + "only precision qualifiers may be applied to " + "structure members"); + + $$ = new(ctx) ast_declarator_list(type); + $$->set_location(@2); + + $$->declarations.push_degenerate_list_at_head(& $2->link); + } + ; + +struct_declarator_list: + struct_declarator + { + $$ = $1; + $1->link.self_link(); + } + | struct_declarator_list ',' struct_declarator + { + $$ = $1; + $$->link.insert_before(& $3->link); + } + ; + +struct_declarator: + any_identifier + { + void *ctx = state; + $$ = new(ctx) ast_declaration($1, NULL, NULL); + $$->set_location(@1); + } + | any_identifier array_specifier + { + void *ctx = state; + $$ = new(ctx) ast_declaration($1, $2, NULL); + $$->set_location_range(@1, @2); + } + ; + +initializer: + assignment_expression + | '{' initializer_list '}' + { + $$ = $2; + } + | '{' initializer_list ',' '}' + { + $$ = $2; + } + ; + +initializer_list: + initializer + { + void *ctx = state; + $$ = new(ctx) ast_aggregate_initializer(); + $$->set_location(@1); + $$->expressions.push_tail(& $1->link); + } + | initializer_list ',' initializer + { + $1->expressions.push_tail(& $3->link); + } + ; + +declaration_statement: + declaration + ; + + // Grammar Note: labeled statements for SWITCH only; 'goto' is not + // supported. +statement: + compound_statement { $$ = (ast_node *) $1; } + | simple_statement + ; + +simple_statement: + declaration_statement + | expression_statement + | selection_statement + | switch_statement + | iteration_statement + | jump_statement + ; + +compound_statement: + '{' '}' + { + void *ctx = state; + $$ = new(ctx) ast_compound_statement(true, NULL); + $$->set_location_range(@1, @2); + } + | '{' + { + state->symbols->push_scope(); + } + statement_list '}' + { + void *ctx = state; + $$ = new(ctx) ast_compound_statement(true, $3); + $$->set_location_range(@1, @4); + state->symbols->pop_scope(); + } + ; + +statement_no_new_scope: + compound_statement_no_new_scope { $$ = (ast_node *) $1; } + | simple_statement + ; + +compound_statement_no_new_scope: + '{' '}' + { + void *ctx = state; + $$ = new(ctx) ast_compound_statement(false, NULL); + $$->set_location_range(@1, @2); + } + | '{' statement_list '}' + { + void *ctx = state; + $$ = new(ctx) ast_compound_statement(false, $2); + $$->set_location_range(@1, @3); + } + ; + +statement_list: + statement + { + if ($1 == NULL) { + _mesa_glsl_error(& @1, state, " statement"); + assert($1 != NULL); + } + + $$ = $1; + $$->link.self_link(); + } + | statement_list statement + { + if ($2 == NULL) { + _mesa_glsl_error(& @2, state, " statement"); + assert($2 != NULL); + } + $$ = $1; + $$->link.insert_before(& $2->link); + } + ; + +expression_statement: + ';' + { + void *ctx = state; + $$ = new(ctx) ast_expression_statement(NULL); + $$->set_location(@1); + } + | expression ';' + { + void *ctx = state; + $$ = new(ctx) ast_expression_statement($1); + $$->set_location(@1); + } + ; + +selection_statement: + IF '(' expression ')' selection_rest_statement + { + $$ = new(state) ast_selection_statement($3, $5.then_statement, + $5.else_statement); + $$->set_location_range(@1, @5); + } + ; + +selection_rest_statement: + statement ELSE statement + { + $$.then_statement = $1; + $$.else_statement = $3; + } + | statement %prec THEN + { + $$.then_statement = $1; + $$.else_statement = NULL; + } + ; + +condition: + expression + { + $$ = (ast_node *) $1; + } + | fully_specified_type any_identifier '=' initializer + { + void *ctx = state; + ast_declaration *decl = new(ctx) ast_declaration($2, NULL, $4); + ast_declarator_list *declarator = new(ctx) ast_declarator_list($1); + decl->set_location_range(@2, @4); + declarator->set_location(@1); + + declarator->declarations.push_tail(&decl->link); + $$ = declarator; + } + ; + +/* + * switch_statement grammar is based on the syntax described in the body + * of the GLSL spec, not in it's appendix!!! + */ +switch_statement: + SWITCH '(' expression ')' switch_body + { + $$ = new(state) ast_switch_statement($3, $5); + $$->set_location_range(@1, @5); + } + ; + +switch_body: + '{' '}' + { + $$ = new(state) ast_switch_body(NULL); + $$->set_location_range(@1, @2); + } + | '{' case_statement_list '}' + { + $$ = new(state) ast_switch_body($2); + $$->set_location_range(@1, @3); + } + ; + +case_label: + CASE expression ':' + { + $$ = new(state) ast_case_label($2); + $$->set_location(@2); + } + | DEFAULT ':' + { + $$ = new(state) ast_case_label(NULL); + $$->set_location(@2); + } + ; + +case_label_list: + case_label + { + ast_case_label_list *labels = new(state) ast_case_label_list(); + + labels->labels.push_tail(& $1->link); + $$ = labels; + $$->set_location(@1); + } + | case_label_list case_label + { + $$ = $1; + $$->labels.push_tail(& $2->link); + } + ; + +case_statement: + case_label_list statement + { + ast_case_statement *stmts = new(state) ast_case_statement($1); + stmts->set_location(@2); + + stmts->stmts.push_tail(& $2->link); + $$ = stmts; + } + | case_statement statement + { + $$ = $1; + $$->stmts.push_tail(& $2->link); + } + ; + +case_statement_list: + case_statement + { + ast_case_statement_list *cases= new(state) ast_case_statement_list(); + cases->set_location(@1); + + cases->cases.push_tail(& $1->link); + $$ = cases; + } + | case_statement_list case_statement + { + $$ = $1; + $$->cases.push_tail(& $2->link); + } + ; + +iteration_statement: + WHILE '(' condition ')' statement_no_new_scope + { + void *ctx = state; + $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_while, + NULL, $3, NULL, $5); + $$->set_location_range(@1, @4); + } + | DO statement WHILE '(' expression ')' ';' + { + void *ctx = state; + $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_do_while, + NULL, $5, NULL, $2); + $$->set_location_range(@1, @6); + } + | FOR '(' for_init_statement for_rest_statement ')' statement_no_new_scope + { + void *ctx = state; + $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_for, + $3, $4.cond, $4.rest, $6); + $$->set_location_range(@1, @6); + } + ; + +for_init_statement: + expression_statement + | declaration_statement + ; + +conditionopt: + condition + | /* empty */ + { + $$ = NULL; + } + ; + +for_rest_statement: + conditionopt ';' + { + $$.cond = $1; + $$.rest = NULL; + } + | conditionopt ';' expression + { + $$.cond = $1; + $$.rest = $3; + } + ; + + // Grammar Note: No 'goto'. Gotos are not supported. +jump_statement: + CONTINUE ';' + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_continue, NULL); + $$->set_location(@1); + } + | BREAK ';' + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_break, NULL); + $$->set_location(@1); + } + | RETURN ';' + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, NULL); + $$->set_location(@1); + } + | RETURN expression ';' + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, $2); + $$->set_location_range(@1, @2); + } + | DISCARD ';' // Fragment shader only. + { + void *ctx = state; + $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_discard, NULL); + $$->set_location(@1); + } + ; + +external_declaration: + function_definition { $$ = $1; } + | declaration { $$ = $1; } + | pragma_statement { $$ = NULL; } + | layout_defaults { $$ = $1; } + ; + +function_definition: + function_prototype compound_statement_no_new_scope + { + void *ctx = state; + $$ = new(ctx) ast_function_definition(); + $$->set_location_range(@1, @2); + $$->prototype = $1; + $$->body = $2; + + state->symbols->pop_scope(); + } + ; + +/* layout_qualifieropt is packed into this rule */ +interface_block: + basic_interface_block + { + $$ = $1; + } + | layout_qualifier interface_block + { + ast_interface_block *block = (ast_interface_block *) $2; + + if (!state->has_420pack_or_es31() && block->layout.has_layout() && + !block->layout.is_default_qualifier) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } + + if (!block->layout.merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + + block->layout.is_default_qualifier = false; + + $$ = block; + } + | memory_qualifier interface_block + { + ast_interface_block *block = (ast_interface_block *)$2; + + if (!block->layout.flags.q.buffer) { + _mesa_glsl_error(& @1, state, + "memory qualifiers can only be used in the " + "declaration of shader storage blocks"); + } + if (!block->layout.merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + $$ = block; + } + ; + +basic_interface_block: + interface_qualifier NEW_IDENTIFIER '{' member_list '}' instance_name_opt ';' + { + ast_interface_block *const block = $6; + + block->block_name = $2; + block->declarations.push_degenerate_list_at_head(& $4->link); + + _mesa_ast_process_interface_block(& @1, state, block, $1); + + $$ = block; + } + | buffer_interface_qualifier NEW_IDENTIFIER '{' member_list '}' buffer_instance_name_opt ';' + { + ast_interface_block *const block = $6; + + block->block_name = $2; + block->declarations.push_degenerate_list_at_head(& $4->link); + + _mesa_ast_process_interface_block(& @1, state, block, $1); + + $$ = block; + } + ; + +interface_qualifier: + IN_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.in = 1; + } + | OUT_TOK + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.out = 1; + } + | UNIFORM + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.uniform = 1; + } + ; + +buffer_interface_qualifier: + BUFFER + { + memset(& $$, 0, sizeof($$)); + $$.flags.q.buffer = 1; + } + ; + +instance_name_opt: + /* empty */ + { + $$ = new(state) ast_interface_block(*state->default_uniform_qualifier, + NULL, NULL); + } + | NEW_IDENTIFIER + { + $$ = new(state) ast_interface_block(*state->default_uniform_qualifier, + $1, NULL); + $$->set_location(@1); + } + | NEW_IDENTIFIER array_specifier + { + $$ = new(state) ast_interface_block(*state->default_uniform_qualifier, + $1, $2); + $$->set_location_range(@1, @2); + } + ; + +buffer_instance_name_opt: + /* empty */ + { + $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier, + NULL, NULL); + } + | NEW_IDENTIFIER + { + $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier, + $1, NULL); + $$->set_location(@1); + } + | NEW_IDENTIFIER array_specifier + { + $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier, + $1, $2); + $$->set_location_range(@1, @2); + } + ; + +member_list: + member_declaration + { + $$ = $1; + $1->link.self_link(); + } + | member_declaration member_list + { + $$ = $1; + $2->link.insert_before(& $$->link); + } + ; + +member_declaration: + fully_specified_type struct_declarator_list ';' + { + void *ctx = state; + ast_fully_specified_type *type = $1; + type->set_location(@1); + + if (type->qualifier.flags.q.attribute) { + _mesa_glsl_error(& @1, state, + "keyword 'attribute' cannot be used with " + "interface block member"); + } else if (type->qualifier.flags.q.varying) { + _mesa_glsl_error(& @1, state, + "keyword 'varying' cannot be used with " + "interface block member"); + } + + $$ = new(ctx) ast_declarator_list(type); + $$->set_location(@2); + + $$->declarations.push_degenerate_list_at_head(& $2->link); + } + ; + +layout_uniform_defaults: + layout_qualifier layout_uniform_defaults + { + $$ = NULL; + if (!state->has_420pack_or_es31()) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } else { + if (!state->default_uniform_qualifier-> + merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + } + } + | layout_qualifier UNIFORM ';' + { + if (!state->default_uniform_qualifier-> + merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + $$ = NULL; + } + ; + +layout_buffer_defaults: + layout_qualifier layout_buffer_defaults + { + $$ = NULL; + if (!state->has_420pack_or_es31()) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } else { + if (!state->default_shader_storage_qualifier-> + merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + } + } + | layout_qualifier BUFFER ';' + { + if (!state->default_shader_storage_qualifier-> + merge_qualifier(& @1, state, $1, false)) { + YYERROR; + } + + /* From the GLSL 4.50 spec, section 4.4.5: + * + * "It is a compile-time error to specify the binding identifier for + * the global scope or for block member declarations." + */ + if (state->default_shader_storage_qualifier->flags.q.explicit_binding) { + _mesa_glsl_error(& @1, state, + "binding qualifier cannot be set for default layout"); + } + + $$ = NULL; + } + ; + +layout_in_defaults: + layout_qualifier layout_in_defaults + { + $$ = NULL; + if (!state->has_420pack_or_es31()) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } else { + if (!state->in_qualifier-> + merge_in_qualifier(& @1, state, $1, $$, false)) { + YYERROR; + } + } + } + | layout_qualifier IN_TOK ';' + { + $$ = NULL; + if (!state->in_qualifier-> + merge_in_qualifier(& @1, state, $1, $$, true)) { + YYERROR; + } + } + ; + +layout_out_defaults: + layout_qualifier layout_out_defaults + { + $$ = NULL; + if (!state->has_420pack_or_es31()) { + _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); + YYERROR; + } else { + if (!state->out_qualifier-> + merge_out_qualifier(& @1, state, $1, $$, false)) { + YYERROR; + } + } + } + | layout_qualifier OUT_TOK ';' + { + $$ = NULL; + if (!state->out_qualifier-> + merge_out_qualifier(& @1, state, $1, $$, true)) + YYERROR; + } + ; + +layout_defaults: + layout_uniform_defaults + | layout_buffer_defaults + | layout_in_defaults + | layout_out_defaults + ; diff --git a/src/compiler/glsl/glsl_parser_extras.cpp b/src/compiler/glsl/glsl_parser_extras.cpp new file mode 100644 index 00000000000..603895497d1 --- /dev/null +++ b/src/compiler/glsl/glsl_parser_extras.cpp @@ -0,0 +1,1952 @@ +/* + * Copyright © 2008, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include +#include +#include + +#include "main/core.h" /* for struct gl_context */ +#include "main/context.h" +#include "main/shaderobj.h" +#include "util/u_atomic.h" /* for p_atomic_cmpxchg */ +#include "util/ralloc.h" +#include "ast.h" +#include "glsl_parser_extras.h" +#include "glsl_parser.h" +#include "ir_optimization.h" +#include "loop_analysis.h" + +/** + * Format a short human-readable description of the given GLSL version. + */ +const char * +glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version) +{ + return ralloc_asprintf(mem_ctx, "GLSL%s %d.%02d", is_es ? " ES" : "", + version / 100, version % 100); +} + + +static const unsigned known_desktop_glsl_versions[] = + { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450 }; + + +_mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, + gl_shader_stage stage, + void *mem_ctx) + : ctx(_ctx), cs_input_local_size_specified(false), cs_input_local_size(), + switch_state() +{ + assert(stage < MESA_SHADER_STAGES); + this->stage = stage; + + this->scanner = NULL; + this->translation_unit.make_empty(); + this->symbols = new(mem_ctx) glsl_symbol_table; + + this->info_log = ralloc_strdup(mem_ctx, ""); + this->error = false; + this->loop_nesting_ast = NULL; + + this->struct_specifier_depth = 0; + + this->uses_builtin_functions = false; + + /* Set default language version and extensions */ + this->language_version = 110; + this->forced_language_version = ctx->Const.ForceGLSLVersion; + this->es_shader = false; + this->ARB_texture_rectangle_enable = true; + + /* OpenGL ES 2.0 has different defaults from desktop GL. */ + if (ctx->API == API_OPENGLES2) { + this->language_version = 100; + this->es_shader = true; + this->ARB_texture_rectangle_enable = false; + } + + this->extensions = &ctx->Extensions; + + this->Const.MaxLights = ctx->Const.MaxLights; + this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; + this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; + this->Const.MaxTextureCoords = ctx->Const.MaxTextureCoordUnits; + this->Const.MaxVertexAttribs = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs; + this->Const.MaxVertexUniformComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents; + this->Const.MaxVertexTextureImageUnits = ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits; + this->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxCombinedTextureImageUnits; + this->Const.MaxTextureImageUnits = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + this->Const.MaxFragmentUniformComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents; + this->Const.MinProgramTexelOffset = ctx->Const.MinProgramTexelOffset; + this->Const.MaxProgramTexelOffset = ctx->Const.MaxProgramTexelOffset; + + this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers; + + this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers; + + /* 1.50 constants */ + this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents; + this->Const.MaxGeometryOutputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents; + this->Const.MaxFragmentInputComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents; + this->Const.MaxGeometryTextureImageUnits = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits; + this->Const.MaxGeometryOutputVertices = ctx->Const.MaxGeometryOutputVertices; + this->Const.MaxGeometryTotalOutputComponents = ctx->Const.MaxGeometryTotalOutputComponents; + this->Const.MaxGeometryUniformComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents; + + this->Const.MaxVertexAtomicCounters = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters; + this->Const.MaxTessControlAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters; + this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters; + this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters; + this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters; + this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters; + this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings; + this->Const.MaxVertexAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers; + this->Const.MaxTessControlAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers; + this->Const.MaxTessEvaluationAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers; + this->Const.MaxGeometryAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers; + this->Const.MaxFragmentAtomicCounterBuffers = + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; + this->Const.MaxCombinedAtomicCounterBuffers = + ctx->Const.MaxCombinedAtomicBuffers; + this->Const.MaxAtomicCounterBufferSize = + ctx->Const.MaxAtomicBufferSize; + + /* Compute shader constants */ + for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupCount); i++) + this->Const.MaxComputeWorkGroupCount[i] = ctx->Const.MaxComputeWorkGroupCount[i]; + for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++) + this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i]; + + this->Const.MaxImageUnits = ctx->Const.MaxImageUnits; + this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources; + this->Const.MaxImageSamples = ctx->Const.MaxImageSamples; + this->Const.MaxVertexImageUniforms = ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms; + this->Const.MaxTessControlImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms; + this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms; + this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms; + this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms; + this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms; + + /* ARB_viewport_array */ + this->Const.MaxViewports = ctx->Const.MaxViewports; + + /* tessellation shader constants */ + this->Const.MaxPatchVertices = ctx->Const.MaxPatchVertices; + this->Const.MaxTessGenLevel = ctx->Const.MaxTessGenLevel; + this->Const.MaxTessControlInputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents; + this->Const.MaxTessControlOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents; + this->Const.MaxTessControlTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits; + this->Const.MaxTessEvaluationInputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents; + this->Const.MaxTessEvaluationOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents; + this->Const.MaxTessEvaluationTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits; + this->Const.MaxTessPatchComponents = ctx->Const.MaxTessPatchComponents; + this->Const.MaxTessControlTotalOutputComponents = ctx->Const.MaxTessControlTotalOutputComponents; + this->Const.MaxTessControlUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents; + this->Const.MaxTessEvaluationUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents; + + this->current_function = NULL; + this->toplevel_ir = NULL; + this->found_return = false; + this->all_invariant = false; + this->user_structures = NULL; + this->num_user_structures = 0; + this->num_subroutines = 0; + this->subroutines = NULL; + this->num_subroutine_types = 0; + this->subroutine_types = NULL; + + /* supported_versions should be large enough to support the known desktop + * GLSL versions plus 3 GLES versions (ES 1.00, ES 3.00, and ES 3.10)) + */ + STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 3) == + ARRAY_SIZE(this->supported_versions)); + + /* Populate the list of supported GLSL versions */ + /* FINISHME: Once the OpenGL 3.0 'forward compatible' context or + * the OpenGL 3.2 Core context is supported, this logic will need + * change. Older versions of GLSL are no longer supported + * outside the compatibility contexts of 3.x. + */ + this->num_supported_versions = 0; + if (_mesa_is_desktop_gl(ctx)) { + for (unsigned i = 0; i < ARRAY_SIZE(known_desktop_glsl_versions); i++) { + if (known_desktop_glsl_versions[i] <= ctx->Const.GLSLVersion) { + this->supported_versions[this->num_supported_versions].ver + = known_desktop_glsl_versions[i]; + this->supported_versions[this->num_supported_versions].es = false; + this->num_supported_versions++; + } + } + } + if (ctx->API == API_OPENGLES2 || ctx->Extensions.ARB_ES2_compatibility) { + this->supported_versions[this->num_supported_versions].ver = 100; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } + if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) { + this->supported_versions[this->num_supported_versions].ver = 300; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } + if (_mesa_is_gles31(ctx)) { + this->supported_versions[this->num_supported_versions].ver = 310; + this->supported_versions[this->num_supported_versions].es = true; + this->num_supported_versions++; + } + + /* Create a string for use in error messages to tell the user which GLSL + * versions are supported. + */ + char *supported = ralloc_strdup(this, ""); + for (unsigned i = 0; i < this->num_supported_versions; i++) { + unsigned ver = this->supported_versions[i].ver; + const char *const prefix = (i == 0) + ? "" + : ((i == this->num_supported_versions - 1) ? ", and " : ", "); + const char *const suffix = (this->supported_versions[i].es) ? " ES" : ""; + + ralloc_asprintf_append(& supported, "%s%u.%02u%s", + prefix, + ver / 100, ver % 100, + suffix); + } + + this->supported_version_string = supported; + + if (ctx->Const.ForceGLSLExtensionsWarn) + _mesa_glsl_process_extension("all", NULL, "warn", NULL, this); + + this->default_uniform_qualifier = new(this) ast_type_qualifier(); + this->default_uniform_qualifier->flags.q.shared = 1; + this->default_uniform_qualifier->flags.q.column_major = 1; + this->default_uniform_qualifier->is_default_qualifier = true; + + this->default_shader_storage_qualifier = new(this) ast_type_qualifier(); + this->default_shader_storage_qualifier->flags.q.shared = 1; + this->default_shader_storage_qualifier->flags.q.column_major = 1; + this->default_shader_storage_qualifier->is_default_qualifier = true; + + this->fs_uses_gl_fragcoord = false; + this->fs_redeclares_gl_fragcoord = false; + this->fs_origin_upper_left = false; + this->fs_pixel_center_integer = false; + this->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = false; + + this->gs_input_prim_type_specified = false; + this->tcs_output_vertices_specified = false; + this->gs_input_size = 0; + this->in_qualifier = new(this) ast_type_qualifier(); + this->out_qualifier = new(this) ast_type_qualifier(); + this->fs_early_fragment_tests = false; + memset(this->atomic_counter_offsets, 0, + sizeof(this->atomic_counter_offsets)); + this->allow_extension_directive_midshader = + ctx->Const.AllowGLSLExtensionDirectiveMidShader; +} + +/** + * Determine whether the current GLSL version is sufficiently high to support + * a certain feature, and generate an error message if it isn't. + * + * \param required_glsl_version and \c required_glsl_es_version are + * interpreted as they are in _mesa_glsl_parse_state::is_version(). + * + * \param locp is the parser location where the error should be reported. + * + * \param fmt (and additional arguments) constitute a printf-style error + * message to report if the version check fails. Information about the + * current and required GLSL versions will be appended. So, for example, if + * the GLSL version being compiled is 1.20, and check_version(130, 300, locp, + * "foo unsupported") is called, the error message will be "foo unsupported in + * GLSL 1.20 (GLSL 1.30 or GLSL 3.00 ES required)". + */ +bool +_mesa_glsl_parse_state::check_version(unsigned required_glsl_version, + unsigned required_glsl_es_version, + YYLTYPE *locp, const char *fmt, ...) +{ + if (this->is_version(required_glsl_version, required_glsl_es_version)) + return true; + + va_list args; + va_start(args, fmt); + char *problem = ralloc_vasprintf(this, fmt, args); + va_end(args); + const char *glsl_version_string + = glsl_compute_version_string(this, false, required_glsl_version); + const char *glsl_es_version_string + = glsl_compute_version_string(this, true, required_glsl_es_version); + const char *requirement_string = ""; + if (required_glsl_version && required_glsl_es_version) { + requirement_string = ralloc_asprintf(this, " (%s or %s required)", + glsl_version_string, + glsl_es_version_string); + } else if (required_glsl_version) { + requirement_string = ralloc_asprintf(this, " (%s required)", + glsl_version_string); + } else if (required_glsl_es_version) { + requirement_string = ralloc_asprintf(this, " (%s required)", + glsl_es_version_string); + } + _mesa_glsl_error(locp, this, "%s in %s%s", + problem, this->get_version_string(), + requirement_string); + + return false; +} + +/** + * Process a GLSL #version directive. + * + * \param version is the integer that follows the #version token. + * + * \param ident is a string identifier that follows the integer, if any is + * present. Otherwise NULL. + */ +void +_mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version, + const char *ident) +{ + bool es_token_present = false; + if (ident) { + if (strcmp(ident, "es") == 0) { + es_token_present = true; + } else if (version >= 150) { + if (strcmp(ident, "core") == 0) { + /* Accept the token. There's no need to record that this is + * a core profile shader since that's the only profile we support. + */ + } else if (strcmp(ident, "compatibility") == 0) { + _mesa_glsl_error(locp, this, + "the compatibility profile is not supported"); + } else { + _mesa_glsl_error(locp, this, + "\"%s\" is not a valid shading language profile; " + "if present, it must be \"core\"", ident); + } + } else { + _mesa_glsl_error(locp, this, + "illegal text following version number"); + } + } + + this->es_shader = es_token_present; + if (version == 100) { + if (es_token_present) { + _mesa_glsl_error(locp, this, + "GLSL 1.00 ES should be selected using " + "`#version 100'"); + } else { + this->es_shader = true; + } + } + + if (this->es_shader) { + this->ARB_texture_rectangle_enable = false; + } + + if (this->forced_language_version) + this->language_version = this->forced_language_version; + else + this->language_version = version; + + bool supported = false; + for (unsigned i = 0; i < this->num_supported_versions; i++) { + if (this->supported_versions[i].ver == this->language_version + && this->supported_versions[i].es == this->es_shader) { + supported = true; + break; + } + } + + if (!supported) { + _mesa_glsl_error(locp, this, "%s is not supported. " + "Supported versions are: %s", + this->get_version_string(), + this->supported_version_string); + + /* On exit, the language_version must be set to a valid value. + * Later calls to _mesa_glsl_initialize_types will misbehave if + * the version is invalid. + */ + switch (this->ctx->API) { + case API_OPENGL_COMPAT: + case API_OPENGL_CORE: + this->language_version = this->ctx->Const.GLSLVersion; + break; + + case API_OPENGLES: + assert(!"Should not get here."); + /* FALLTHROUGH */ + + case API_OPENGLES2: + this->language_version = 100; + break; + } + } +} + + +/* This helper function will append the given message to the shader's + info log and report it via GL_ARB_debug_output. Per that extension, + 'type' is one of the enum values classifying the message, and + 'id' is the implementation-defined ID of the given message. */ +static void +_mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state, + GLenum type, const char *fmt, va_list ap) +{ + bool error = (type == MESA_DEBUG_TYPE_ERROR); + GLuint msg_id = 0; + + assert(state->info_log != NULL); + + /* Get the offset that the new message will be written to. */ + int msg_offset = strlen(state->info_log); + + ralloc_asprintf_append(&state->info_log, "%u:%u(%u): %s: ", + locp->source, + locp->first_line, + locp->first_column, + error ? "error" : "warning"); + ralloc_vasprintf_append(&state->info_log, fmt, ap); + + const char *const msg = &state->info_log[msg_offset]; + struct gl_context *ctx = state->ctx; + + /* Report the error via GL_ARB_debug_output. */ + _mesa_shader_debug(ctx, type, &msg_id, msg); + + ralloc_strcat(&state->info_log, "\n"); +} + +void +_mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state, + const char *fmt, ...) +{ + va_list ap; + + state->error = true; + + va_start(ap, fmt); + _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_ERROR, fmt, ap); + va_end(ap); +} + + +void +_mesa_glsl_warning(const YYLTYPE *locp, _mesa_glsl_parse_state *state, + const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_OTHER, fmt, ap); + va_end(ap); +} + + +/** + * Enum representing the possible behaviors that can be specified in + * an #extension directive. + */ +enum ext_behavior { + extension_disable, + extension_enable, + extension_require, + extension_warn +}; + +/** + * Element type for _mesa_glsl_supported_extensions + */ +struct _mesa_glsl_extension { + /** + * Name of the extension when referred to in a GLSL extension + * statement + */ + const char *name; + + /** True if this extension is available to desktop GL shaders */ + bool avail_in_GL; + + /** True if this extension is available to GLES shaders */ + bool avail_in_ES; + + /** + * Flag in the gl_extensions struct indicating whether this + * extension is supported by the driver, or + * &gl_extensions::dummy_true if supported by all drivers. + * + * Note: the type (GLboolean gl_extensions::*) is a "pointer to + * member" type, the type-safe alternative to the "offsetof" macro. + * In a nutshell: + * + * - foo bar::* p declares p to be an "offset" to a field of type + * foo that exists within struct bar + * - &bar::baz computes the "offset" of field baz within struct bar + * - x.*p accesses the field of x that exists at "offset" p + * - x->*p is equivalent to (*x).*p + */ + const GLboolean gl_extensions::* supported_flag; + + /** + * Flag in the _mesa_glsl_parse_state struct that should be set + * when this extension is enabled. + * + * See note in _mesa_glsl_extension::supported_flag about "pointer + * to member" types. + */ + bool _mesa_glsl_parse_state::* enable_flag; + + /** + * Flag in the _mesa_glsl_parse_state struct that should be set + * when the shader requests "warn" behavior for this extension. + * + * See note in _mesa_glsl_extension::supported_flag about "pointer + * to member" types. + */ + bool _mesa_glsl_parse_state::* warn_flag; + + + bool compatible_with_state(const _mesa_glsl_parse_state *state) const; + void set_flags(_mesa_glsl_parse_state *state, ext_behavior behavior) const; +}; + +#define EXT(NAME, GL, ES, SUPPORTED_FLAG) \ + { "GL_" #NAME, GL, ES, &gl_extensions::SUPPORTED_FLAG, \ + &_mesa_glsl_parse_state::NAME##_enable, \ + &_mesa_glsl_parse_state::NAME##_warn } + +/** + * Table of extensions that can be enabled/disabled within a shader, + * and the conditions under which they are supported. + */ +static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { + /* API availability */ + /* name GL ES supported flag */ + + /* ARB extensions go here, sorted alphabetically. + */ + EXT(ARB_arrays_of_arrays, true, false, ARB_arrays_of_arrays), + EXT(ARB_compute_shader, true, false, ARB_compute_shader), + EXT(ARB_conservative_depth, true, false, ARB_conservative_depth), + EXT(ARB_derivative_control, true, false, ARB_derivative_control), + EXT(ARB_draw_buffers, true, false, dummy_true), + EXT(ARB_draw_instanced, true, false, ARB_draw_instanced), + EXT(ARB_enhanced_layouts, true, false, ARB_enhanced_layouts), + EXT(ARB_explicit_attrib_location, true, false, ARB_explicit_attrib_location), + EXT(ARB_explicit_uniform_location, true, false, ARB_explicit_uniform_location), + EXT(ARB_fragment_coord_conventions, true, false, ARB_fragment_coord_conventions), + EXT(ARB_fragment_layer_viewport, true, false, ARB_fragment_layer_viewport), + EXT(ARB_gpu_shader5, true, false, ARB_gpu_shader5), + EXT(ARB_gpu_shader_fp64, true, false, ARB_gpu_shader_fp64), + EXT(ARB_sample_shading, true, false, ARB_sample_shading), + EXT(ARB_separate_shader_objects, true, false, dummy_true), + EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters), + EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding), + EXT(ARB_shader_clock, true, false, ARB_shader_clock), + EXT(ARB_shader_draw_parameters, true, false, ARB_shader_draw_parameters), + EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store), + EXT(ARB_shader_image_size, true, false, ARB_shader_image_size), + EXT(ARB_shader_precision, true, false, ARB_shader_precision), + EXT(ARB_shader_stencil_export, true, false, ARB_shader_stencil_export), + EXT(ARB_shader_storage_buffer_object, true, true, ARB_shader_storage_buffer_object), + EXT(ARB_shader_subroutine, true, false, ARB_shader_subroutine), + EXT(ARB_shader_texture_image_samples, true, false, ARB_shader_texture_image_samples), + EXT(ARB_shader_texture_lod, true, false, ARB_shader_texture_lod), + EXT(ARB_shading_language_420pack, true, false, ARB_shading_language_420pack), + EXT(ARB_shading_language_packing, true, false, ARB_shading_language_packing), + EXT(ARB_tessellation_shader, true, false, ARB_tessellation_shader), + EXT(ARB_texture_cube_map_array, true, false, ARB_texture_cube_map_array), + EXT(ARB_texture_gather, true, false, ARB_texture_gather), + EXT(ARB_texture_multisample, true, false, ARB_texture_multisample), + EXT(ARB_texture_query_levels, true, false, ARB_texture_query_levels), + EXT(ARB_texture_query_lod, true, false, ARB_texture_query_lod), + EXT(ARB_texture_rectangle, true, false, dummy_true), + EXT(ARB_uniform_buffer_object, true, false, ARB_uniform_buffer_object), + EXT(ARB_vertex_attrib_64bit, true, false, ARB_vertex_attrib_64bit), + EXT(ARB_viewport_array, true, false, ARB_viewport_array), + + /* KHR extensions go here, sorted alphabetically. + */ + + /* OES extensions go here, sorted alphabetically. + */ + EXT(OES_EGL_image_external, false, true, OES_EGL_image_external), + EXT(OES_geometry_shader, false, true, OES_geometry_shader), + EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), + EXT(OES_texture_3D, false, true, dummy_true), + EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample), + + /* All other extensions go here, sorted alphabetically. + */ + EXT(AMD_conservative_depth, true, false, ARB_conservative_depth), + EXT(AMD_shader_stencil_export, true, false, ARB_shader_stencil_export), + EXT(AMD_shader_trinary_minmax, true, false, dummy_true), + EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer), + EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index), + EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended), + EXT(EXT_draw_buffers, false, true, dummy_true), + EXT(EXT_separate_shader_objects, false, true, dummy_true), + EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix), + EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical), + EXT(EXT_texture_array, true, false, EXT_texture_array), +}; + +#undef EXT + + +/** + * Determine whether a given extension is compatible with the target, + * API, and extension information in the current parser state. + */ +bool _mesa_glsl_extension::compatible_with_state(const _mesa_glsl_parse_state * + state) const +{ + /* Check that this extension matches whether we are compiling + * for desktop GL or GLES. + */ + if (state->es_shader) { + if (!this->avail_in_ES) return false; + } else { + if (!this->avail_in_GL) return false; + } + + /* Check that this extension is supported by the OpenGL + * implementation. + * + * Note: the ->* operator indexes into state->extensions by the + * offset this->supported_flag. See + * _mesa_glsl_extension::supported_flag for more info. + */ + return state->extensions->*(this->supported_flag); +} + +/** + * Set the appropriate flags in the parser state to establish the + * given behavior for this extension. + */ +void _mesa_glsl_extension::set_flags(_mesa_glsl_parse_state *state, + ext_behavior behavior) const +{ + /* Note: the ->* operator indexes into state by the + * offsets this->enable_flag and this->warn_flag. See + * _mesa_glsl_extension::supported_flag for more info. + */ + state->*(this->enable_flag) = (behavior != extension_disable); + state->*(this->warn_flag) = (behavior == extension_warn); +} + +/** + * Find an extension by name in _mesa_glsl_supported_extensions. If + * the name is not found, return NULL. + */ +static const _mesa_glsl_extension *find_extension(const char *name) +{ + for (unsigned i = 0; i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) { + if (strcmp(name, _mesa_glsl_supported_extensions[i].name) == 0) { + return &_mesa_glsl_supported_extensions[i]; + } + } + return NULL; +} + + +bool +_mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp, + const char *behavior_string, YYLTYPE *behavior_locp, + _mesa_glsl_parse_state *state) +{ + ext_behavior behavior; + if (strcmp(behavior_string, "warn") == 0) { + behavior = extension_warn; + } else if (strcmp(behavior_string, "require") == 0) { + behavior = extension_require; + } else if (strcmp(behavior_string, "enable") == 0) { + behavior = extension_enable; + } else if (strcmp(behavior_string, "disable") == 0) { + behavior = extension_disable; + } else { + _mesa_glsl_error(behavior_locp, state, + "unknown extension behavior `%s'", + behavior_string); + return false; + } + + if (strcmp(name, "all") == 0) { + if ((behavior == extension_enable) || (behavior == extension_require)) { + _mesa_glsl_error(name_locp, state, "cannot %s all extensions", + (behavior == extension_enable) + ? "enable" : "require"); + return false; + } else { + for (unsigned i = 0; + i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) { + const _mesa_glsl_extension *extension + = &_mesa_glsl_supported_extensions[i]; + if (extension->compatible_with_state(state)) { + _mesa_glsl_supported_extensions[i].set_flags(state, behavior); + } + } + } + } else { + const _mesa_glsl_extension *extension = find_extension(name); + if (extension && extension->compatible_with_state(state)) { + extension->set_flags(state, behavior); + } else { + static const char fmt[] = "extension `%s' unsupported in %s shader"; + + if (behavior == extension_require) { + _mesa_glsl_error(name_locp, state, fmt, + name, _mesa_shader_stage_to_string(state->stage)); + return false; + } else { + _mesa_glsl_warning(name_locp, state, fmt, + name, _mesa_shader_stage_to_string(state->stage)); + } + } + } + + return true; +} + + +/** + * Recurses through and if is an aggregate initializer + * and sets 's field to . Gives later functions + * (process_array_constructor, et al) sufficient information to do type + * checking. + * + * Operates on assignments involving an aggregate initializer. E.g., + * + * vec4 pos = {1.0, -1.0, 0.0, 1.0}; + * + * or more ridiculously, + * + * struct S { + * vec4 v[2]; + * }; + * + * struct { + * S a[2], b; + * int c; + * } aggregate = { + * { + * { + * { + * {1.0, 2.0, 3.0, 4.0}, // a[0].v[0] + * {5.0, 6.0, 7.0, 8.0} // a[0].v[1] + * } // a[0].v + * }, // a[0] + * { + * { + * {1.0, 2.0, 3.0, 4.0}, // a[1].v[0] + * {5.0, 6.0, 7.0, 8.0} // a[1].v[1] + * } // a[1].v + * } // a[1] + * }, // a + * { + * { + * {1.0, 2.0, 3.0, 4.0}, // b.v[0] + * {5.0, 6.0, 7.0, 8.0} // b.v[1] + * } // b.v + * }, // b + * 4 // c + * }; + * + * This pass is necessary because the right-hand side of e = { ... } + * doesn't contain sufficient information to determine if the types match. + */ +void +_mesa_ast_set_aggregate_type(const glsl_type *type, + ast_expression *expr) +{ + ast_aggregate_initializer *ai = (ast_aggregate_initializer *)expr; + ai->constructor_type = type; + + /* If the aggregate is an array, recursively set its elements' types. */ + if (type->is_array()) { + /* Each array element has the type type->fields.array. + * + * E.g., if if struct S[2] we want to set each element's type to + * struct S. + */ + for (exec_node *expr_node = ai->expressions.head; + !expr_node->is_tail_sentinel(); + expr_node = expr_node->next) { + ast_expression *expr = exec_node_data(ast_expression, expr_node, + link); + + if (expr->oper == ast_aggregate) + _mesa_ast_set_aggregate_type(type->fields.array, expr); + } + + /* If the aggregate is a struct, recursively set its fields' types. */ + } else if (type->is_record()) { + exec_node *expr_node = ai->expressions.head; + + /* Iterate through the struct's fields. */ + for (unsigned i = 0; !expr_node->is_tail_sentinel() && i < type->length; + i++, expr_node = expr_node->next) { + ast_expression *expr = exec_node_data(ast_expression, expr_node, + link); + + if (expr->oper == ast_aggregate) { + _mesa_ast_set_aggregate_type(type->fields.structure[i].type, expr); + } + } + /* If the aggregate is a matrix, set its columns' types. */ + } else if (type->is_matrix()) { + for (exec_node *expr_node = ai->expressions.head; + !expr_node->is_tail_sentinel(); + expr_node = expr_node->next) { + ast_expression *expr = exec_node_data(ast_expression, expr_node, + link); + + if (expr->oper == ast_aggregate) + _mesa_ast_set_aggregate_type(type->column_type(), expr); + } + } +} + +void +_mesa_ast_process_interface_block(YYLTYPE *locp, + _mesa_glsl_parse_state *state, + ast_interface_block *const block, + const struct ast_type_qualifier &q) +{ + if (q.flags.q.buffer) { + if (!state->has_shader_storage_buffer_objects()) { + _mesa_glsl_error(locp, state, + "#version 430 / GL_ARB_shader_storage_buffer_object " + "required for defining shader storage blocks"); + } else if (state->ARB_shader_storage_buffer_object_warn) { + _mesa_glsl_warning(locp, state, + "#version 430 / GL_ARB_shader_storage_buffer_object " + "required for defining shader storage blocks"); + } + } else if (q.flags.q.uniform) { + if (!state->has_uniform_buffer_objects()) { + _mesa_glsl_error(locp, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "required for defining uniform blocks"); + } else if (state->ARB_uniform_buffer_object_warn) { + _mesa_glsl_warning(locp, state, + "#version 140 / GL_ARB_uniform_buffer_object " + "required for defining uniform blocks"); + } + } else { + if (state->es_shader || state->language_version < 150) { + _mesa_glsl_error(locp, state, + "#version 150 required for using " + "interface blocks"); + } + } + + /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"): + * "It is illegal to have an input block in a vertex shader + * or an output block in a fragment shader" + */ + if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) { + _mesa_glsl_error(locp, state, + "`in' interface block is not allowed for " + "a vertex shader"); + } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) { + _mesa_glsl_error(locp, state, + "`out' interface block is not allowed for " + "a fragment shader"); + } + + /* Since block arrays require names, and both features are added in + * the same language versions, we don't have to explicitly + * version-check both things. + */ + if (block->instance_name != NULL) { + state->check_version(150, 300, locp, "interface blocks with " + "an instance name are not allowed"); + } + + uint64_t interface_type_mask; + struct ast_type_qualifier temp_type_qualifier; + + /* Get a bitmask containing only the in/out/uniform/buffer + * flags, allowing us to ignore other irrelevant flags like + * interpolation qualifiers. + */ + temp_type_qualifier.flags.i = 0; + temp_type_qualifier.flags.q.uniform = true; + temp_type_qualifier.flags.q.in = true; + temp_type_qualifier.flags.q.out = true; + temp_type_qualifier.flags.q.buffer = true; + interface_type_mask = temp_type_qualifier.flags.i; + + /* Get the block's interface qualifier. The interface_qualifier + * production rule guarantees that only one bit will be set (and + * it will be in/out/uniform). + */ + uint64_t block_interface_qualifier = q.flags.i; + + block->layout.flags.i |= block_interface_qualifier; + + if (state->stage == MESA_SHADER_GEOMETRY && + state->has_explicit_attrib_stream()) { + /* Assign global layout's stream value. */ + block->layout.flags.q.stream = 1; + block->layout.flags.q.explicit_stream = 0; + block->layout.stream = state->out_qualifier->stream; + } + + foreach_list_typed (ast_declarator_list, member, link, &block->declarations) { + ast_type_qualifier& qualifier = member->type->qualifier; + if ((qualifier.flags.i & interface_type_mask) == 0) { + /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks): + * "If no optional qualifier is used in a member declaration, the + * qualifier of the variable is just in, out, or uniform as declared + * by interface-qualifier." + */ + qualifier.flags.i |= block_interface_qualifier; + } else if ((qualifier.flags.i & interface_type_mask) != + block_interface_qualifier) { + /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks): + * "If optional qualifiers are used, they can include interpolation + * and storage qualifiers and they must declare an input, output, + * or uniform variable consistent with the interface qualifier of + * the block." + */ + _mesa_glsl_error(locp, state, + "uniform/in/out qualifier on " + "interface block member does not match " + "the interface block"); + } + + /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks": + * + * "GLSL ES 3.0 does not support interface blocks for shader inputs or + * outputs." + * + * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":. + * + * "Only variables output from a shader can be candidates for + * invariance." + * + * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks": + * + * "If optional qualifiers are used, they can include interpolation + * qualifiers, auxiliary storage qualifiers, and storage qualifiers + * and they must declare an input, output, or uniform member + * consistent with the interface qualifier of the block" + */ + if (qualifier.flags.q.invariant) + _mesa_glsl_error(locp, state, + "invariant qualifiers cannot be used " + "with interface blocks members"); + } +} + +void +_mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q) +{ + if (q->flags.q.subroutine) + printf("subroutine "); + + if (q->flags.q.subroutine_def) { + printf("subroutine ("); + q->subroutine_list->print(); + printf(")"); + } + + if (q->flags.q.constant) + printf("const "); + + if (q->flags.q.invariant) + printf("invariant "); + + if (q->flags.q.attribute) + printf("attribute "); + + if (q->flags.q.varying) + printf("varying "); + + if (q->flags.q.in && q->flags.q.out) + printf("inout "); + else { + if (q->flags.q.in) + printf("in "); + + if (q->flags.q.out) + printf("out "); + } + + if (q->flags.q.centroid) + printf("centroid "); + if (q->flags.q.sample) + printf("sample "); + if (q->flags.q.patch) + printf("patch "); + if (q->flags.q.uniform) + printf("uniform "); + if (q->flags.q.buffer) + printf("buffer "); + if (q->flags.q.smooth) + printf("smooth "); + if (q->flags.q.flat) + printf("flat "); + if (q->flags.q.noperspective) + printf("noperspective "); +} + + +void +ast_node::print(void) const +{ + printf("unhandled node "); +} + + +ast_node::ast_node(void) +{ + this->location.source = 0; + this->location.first_line = 0; + this->location.first_column = 0; + this->location.last_line = 0; + this->location.last_column = 0; +} + + +static void +ast_opt_array_dimensions_print(const ast_array_specifier *array_specifier) +{ + if (array_specifier) + array_specifier->print(); +} + + +void +ast_compound_statement::print(void) const +{ + printf("{\n"); + + foreach_list_typed(ast_node, ast, link, &this->statements) { + ast->print(); + } + + printf("}\n"); +} + + +ast_compound_statement::ast_compound_statement(int new_scope, + ast_node *statements) +{ + this->new_scope = new_scope; + + if (statements != NULL) { + this->statements.push_degenerate_list_at_head(&statements->link); + } +} + + +void +ast_expression::print(void) const +{ + switch (oper) { + case ast_assign: + case ast_mul_assign: + case ast_div_assign: + case ast_mod_assign: + case ast_add_assign: + case ast_sub_assign: + case ast_ls_assign: + case ast_rs_assign: + case ast_and_assign: + case ast_xor_assign: + case ast_or_assign: + subexpressions[0]->print(); + printf("%s ", operator_string(oper)); + subexpressions[1]->print(); + break; + + case ast_field_selection: + subexpressions[0]->print(); + printf(". %s ", primary_expression.identifier); + break; + + case ast_plus: + case ast_neg: + case ast_bit_not: + case ast_logic_not: + case ast_pre_inc: + case ast_pre_dec: + printf("%s ", operator_string(oper)); + subexpressions[0]->print(); + break; + + case ast_post_inc: + case ast_post_dec: + subexpressions[0]->print(); + printf("%s ", operator_string(oper)); + break; + + case ast_conditional: + subexpressions[0]->print(); + printf("? "); + subexpressions[1]->print(); + printf(": "); + subexpressions[2]->print(); + break; + + case ast_array_index: + subexpressions[0]->print(); + printf("[ "); + subexpressions[1]->print(); + printf("] "); + break; + + case ast_function_call: { + subexpressions[0]->print(); + printf("( "); + + foreach_list_typed (ast_node, ast, link, &this->expressions) { + if (&ast->link != this->expressions.get_head()) + printf(", "); + + ast->print(); + } + + printf(") "); + break; + } + + case ast_identifier: + printf("%s ", primary_expression.identifier); + break; + + case ast_int_constant: + printf("%d ", primary_expression.int_constant); + break; + + case ast_uint_constant: + printf("%u ", primary_expression.uint_constant); + break; + + case ast_float_constant: + printf("%f ", primary_expression.float_constant); + break; + + case ast_double_constant: + printf("%f ", primary_expression.double_constant); + break; + + case ast_bool_constant: + printf("%s ", + primary_expression.bool_constant + ? "true" : "false"); + break; + + case ast_sequence: { + printf("( "); + foreach_list_typed (ast_node, ast, link, & this->expressions) { + if (&ast->link != this->expressions.get_head()) + printf(", "); + + ast->print(); + } + printf(") "); + break; + } + + case ast_aggregate: { + printf("{ "); + foreach_list_typed (ast_node, ast, link, & this->expressions) { + if (&ast->link != this->expressions.get_head()) + printf(", "); + + ast->print(); + } + printf("} "); + break; + } + + default: + assert(0); + break; + } +} + +ast_expression::ast_expression(int oper, + ast_expression *ex0, + ast_expression *ex1, + ast_expression *ex2) : + primary_expression() +{ + this->oper = ast_operators(oper); + this->subexpressions[0] = ex0; + this->subexpressions[1] = ex1; + this->subexpressions[2] = ex2; + this->non_lvalue_description = NULL; +} + + +void +ast_expression_statement::print(void) const +{ + if (expression) + expression->print(); + + printf("; "); +} + + +ast_expression_statement::ast_expression_statement(ast_expression *ex) : + expression(ex) +{ + /* empty */ +} + + +void +ast_function::print(void) const +{ + return_type->print(); + printf(" %s (", identifier); + + foreach_list_typed(ast_node, ast, link, & this->parameters) { + ast->print(); + } + + printf(")"); +} + + +ast_function::ast_function(void) + : return_type(NULL), identifier(NULL), is_definition(false), + signature(NULL) +{ + /* empty */ +} + + +void +ast_fully_specified_type::print(void) const +{ + _mesa_ast_type_qualifier_print(& qualifier); + specifier->print(); +} + + +void +ast_parameter_declarator::print(void) const +{ + type->print(); + if (identifier) + printf("%s ", identifier); + ast_opt_array_dimensions_print(array_specifier); +} + + +void +ast_function_definition::print(void) const +{ + prototype->print(); + body->print(); +} + + +void +ast_declaration::print(void) const +{ + printf("%s ", identifier); + ast_opt_array_dimensions_print(array_specifier); + + if (initializer) { + printf("= "); + initializer->print(); + } +} + + +ast_declaration::ast_declaration(const char *identifier, + ast_array_specifier *array_specifier, + ast_expression *initializer) +{ + this->identifier = identifier; + this->array_specifier = array_specifier; + this->initializer = initializer; +} + + +void +ast_declarator_list::print(void) const +{ + assert(type || invariant); + + if (type) + type->print(); + else if (invariant) + printf("invariant "); + else + printf("precise "); + + foreach_list_typed (ast_node, ast, link, & this->declarations) { + if (&ast->link != this->declarations.get_head()) + printf(", "); + + ast->print(); + } + + printf("; "); +} + + +ast_declarator_list::ast_declarator_list(ast_fully_specified_type *type) +{ + this->type = type; + this->invariant = false; + this->precise = false; +} + +void +ast_jump_statement::print(void) const +{ + switch (mode) { + case ast_continue: + printf("continue; "); + break; + case ast_break: + printf("break; "); + break; + case ast_return: + printf("return "); + if (opt_return_value) + opt_return_value->print(); + + printf("; "); + break; + case ast_discard: + printf("discard; "); + break; + } +} + + +ast_jump_statement::ast_jump_statement(int mode, ast_expression *return_value) + : opt_return_value(NULL) +{ + this->mode = ast_jump_modes(mode); + + if (mode == ast_return) + opt_return_value = return_value; +} + + +void +ast_selection_statement::print(void) const +{ + printf("if ( "); + condition->print(); + printf(") "); + + then_statement->print(); + + if (else_statement) { + printf("else "); + else_statement->print(); + } +} + + +ast_selection_statement::ast_selection_statement(ast_expression *condition, + ast_node *then_statement, + ast_node *else_statement) +{ + this->condition = condition; + this->then_statement = then_statement; + this->else_statement = else_statement; +} + + +void +ast_switch_statement::print(void) const +{ + printf("switch ( "); + test_expression->print(); + printf(") "); + + body->print(); +} + + +ast_switch_statement::ast_switch_statement(ast_expression *test_expression, + ast_node *body) +{ + this->test_expression = test_expression; + this->body = body; +} + + +void +ast_switch_body::print(void) const +{ + printf("{\n"); + if (stmts != NULL) { + stmts->print(); + } + printf("}\n"); +} + + +ast_switch_body::ast_switch_body(ast_case_statement_list *stmts) +{ + this->stmts = stmts; +} + + +void ast_case_label::print(void) const +{ + if (test_value != NULL) { + printf("case "); + test_value->print(); + printf(": "); + } else { + printf("default: "); + } +} + + +ast_case_label::ast_case_label(ast_expression *test_value) +{ + this->test_value = test_value; +} + + +void ast_case_label_list::print(void) const +{ + foreach_list_typed(ast_node, ast, link, & this->labels) { + ast->print(); + } + printf("\n"); +} + + +ast_case_label_list::ast_case_label_list(void) +{ +} + + +void ast_case_statement::print(void) const +{ + labels->print(); + foreach_list_typed(ast_node, ast, link, & this->stmts) { + ast->print(); + printf("\n"); + } +} + + +ast_case_statement::ast_case_statement(ast_case_label_list *labels) +{ + this->labels = labels; +} + + +void ast_case_statement_list::print(void) const +{ + foreach_list_typed(ast_node, ast, link, & this->cases) { + ast->print(); + } +} + + +ast_case_statement_list::ast_case_statement_list(void) +{ +} + + +void +ast_iteration_statement::print(void) const +{ + switch (mode) { + case ast_for: + printf("for( "); + if (init_statement) + init_statement->print(); + printf("; "); + + if (condition) + condition->print(); + printf("; "); + + if (rest_expression) + rest_expression->print(); + printf(") "); + + body->print(); + break; + + case ast_while: + printf("while ( "); + if (condition) + condition->print(); + printf(") "); + body->print(); + break; + + case ast_do_while: + printf("do "); + body->print(); + printf("while ( "); + if (condition) + condition->print(); + printf("); "); + break; + } +} + + +ast_iteration_statement::ast_iteration_statement(int mode, + ast_node *init, + ast_node *condition, + ast_expression *rest_expression, + ast_node *body) +{ + this->mode = ast_iteration_modes(mode); + this->init_statement = init; + this->condition = condition; + this->rest_expression = rest_expression; + this->body = body; +} + + +void +ast_struct_specifier::print(void) const +{ + printf("struct %s { ", name); + foreach_list_typed(ast_node, ast, link, &this->declarations) { + ast->print(); + } + printf("} "); +} + + +ast_struct_specifier::ast_struct_specifier(const char *identifier, + ast_declarator_list *declarator_list) +{ + if (identifier == NULL) { + static mtx_t mutex = _MTX_INITIALIZER_NP; + static unsigned anon_count = 1; + unsigned count; + + mtx_lock(&mutex); + count = anon_count++; + mtx_unlock(&mutex); + + identifier = ralloc_asprintf(this, "#anon_struct_%04x", count); + } + name = identifier; + this->declarations.push_degenerate_list_at_head(&declarator_list->link); + is_declaration = true; +} + +void ast_subroutine_list::print(void) const +{ + foreach_list_typed (ast_node, ast, link, & this->declarations) { + if (&ast->link != this->declarations.get_head()) + printf(", "); + ast->print(); + } +} + +static void +set_shader_inout_layout(struct gl_shader *shader, + struct _mesa_glsl_parse_state *state) +{ + /* Should have been prevented by the parser. */ + if (shader->Stage == MESA_SHADER_TESS_CTRL) { + assert(!state->in_qualifier->flags.i); + } else if (shader->Stage == MESA_SHADER_TESS_EVAL) { + assert(!state->out_qualifier->flags.i); + } else if (shader->Stage != MESA_SHADER_GEOMETRY) { + assert(!state->in_qualifier->flags.i); + assert(!state->out_qualifier->flags.i); + } + + if (shader->Stage != MESA_SHADER_COMPUTE) { + /* Should have been prevented by the parser. */ + assert(!state->cs_input_local_size_specified); + } + + if (shader->Stage != MESA_SHADER_FRAGMENT) { + /* Should have been prevented by the parser. */ + assert(!state->fs_uses_gl_fragcoord); + assert(!state->fs_redeclares_gl_fragcoord); + assert(!state->fs_pixel_center_integer); + assert(!state->fs_origin_upper_left); + assert(!state->fs_early_fragment_tests); + } + + switch (shader->Stage) { + case MESA_SHADER_TESS_CTRL: + shader->TessCtrl.VerticesOut = 0; + if (state->tcs_output_vertices_specified) { + unsigned vertices; + if (state->out_qualifier->vertices-> + process_qualifier_constant(state, "vertices", &vertices, + false)) { + + YYLTYPE loc = state->out_qualifier->vertices->get_location(); + if (vertices > state->Const.MaxPatchVertices) { + _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " + "GL_MAX_PATCH_VERTICES", vertices); + } + shader->TessCtrl.VerticesOut = vertices; + } + } + break; + case MESA_SHADER_TESS_EVAL: + shader->TessEval.PrimitiveMode = PRIM_UNKNOWN; + if (state->in_qualifier->flags.q.prim_type) + shader->TessEval.PrimitiveMode = state->in_qualifier->prim_type; + + shader->TessEval.Spacing = 0; + if (state->in_qualifier->flags.q.vertex_spacing) + shader->TessEval.Spacing = state->in_qualifier->vertex_spacing; + + shader->TessEval.VertexOrder = 0; + if (state->in_qualifier->flags.q.ordering) + shader->TessEval.VertexOrder = state->in_qualifier->ordering; + + shader->TessEval.PointMode = -1; + if (state->in_qualifier->flags.q.point_mode) + shader->TessEval.PointMode = state->in_qualifier->point_mode; + break; + case MESA_SHADER_GEOMETRY: + shader->Geom.VerticesOut = 0; + if (state->out_qualifier->flags.q.max_vertices) { + unsigned qual_max_vertices; + if (state->out_qualifier->max_vertices-> + process_qualifier_constant(state, "max_vertices", + &qual_max_vertices, true)) { + shader->Geom.VerticesOut = qual_max_vertices; + } + } + + if (state->gs_input_prim_type_specified) { + shader->Geom.InputType = state->in_qualifier->prim_type; + } else { + shader->Geom.InputType = PRIM_UNKNOWN; + } + + if (state->out_qualifier->flags.q.prim_type) { + shader->Geom.OutputType = state->out_qualifier->prim_type; + } else { + shader->Geom.OutputType = PRIM_UNKNOWN; + } + + shader->Geom.Invocations = 0; + if (state->in_qualifier->flags.q.invocations) { + unsigned invocations; + if (state->in_qualifier->invocations-> + process_qualifier_constant(state, "invocations", + &invocations, false)) { + + YYLTYPE loc = state->in_qualifier->invocations->get_location(); + if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) { + _mesa_glsl_error(&loc, state, + "invocations (%d) exceeds " + "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", + invocations); + } + shader->Geom.Invocations = invocations; + } + } + break; + + case MESA_SHADER_COMPUTE: + if (state->cs_input_local_size_specified) { + for (int i = 0; i < 3; i++) + shader->Comp.LocalSize[i] = state->cs_input_local_size[i]; + } else { + for (int i = 0; i < 3; i++) + shader->Comp.LocalSize[i] = 0; + } + break; + + case MESA_SHADER_FRAGMENT: + shader->redeclares_gl_fragcoord = state->fs_redeclares_gl_fragcoord; + shader->uses_gl_fragcoord = state->fs_uses_gl_fragcoord; + shader->pixel_center_integer = state->fs_pixel_center_integer; + shader->origin_upper_left = state->fs_origin_upper_left; + shader->ARB_fragment_coord_conventions_enable = + state->ARB_fragment_coord_conventions_enable; + shader->EarlyFragmentTests = state->fs_early_fragment_tests; + break; + + default: + /* Nothing to do. */ + break; + } +} + +extern "C" { + +void +_mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, + bool dump_ast, bool dump_hir) +{ + struct _mesa_glsl_parse_state *state = + new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); + const char *source = shader->Source; + + if (ctx->Const.GenerateTemporaryNames) + (void) p_atomic_cmpxchg(&ir_variable::temporaries_allocate_names, + false, true); + + state->error = glcpp_preprocess(state, &source, &state->info_log, + &ctx->Extensions, ctx); + + if (!state->error) { + _mesa_glsl_lexer_ctor(state, source); + _mesa_glsl_parse(state); + _mesa_glsl_lexer_dtor(state); + } + + if (dump_ast) { + foreach_list_typed(ast_node, ast, link, &state->translation_unit) { + ast->print(); + } + printf("\n\n"); + } + + ralloc_free(shader->ir); + shader->ir = new(shader) exec_list; + if (!state->error && !state->translation_unit.is_empty()) + _mesa_ast_to_hir(shader->ir, state); + + if (!state->error) { + validate_ir_tree(shader->ir); + + /* Print out the unoptimized IR. */ + if (dump_hir) { + _mesa_print_ir(stdout, shader->ir, state); + } + } + + + if (!state->error && !shader->ir->is_empty()) { + struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[shader->Stage]; + + lower_subroutine(shader->ir, state); + /* Do some optimization at compile time to reduce shader IR size + * and reduce later work if the same shader is linked multiple times + */ + while (do_common_optimization(shader->ir, false, false, options, + ctx->Const.NativeIntegers)) + ; + + validate_ir_tree(shader->ir); + + enum ir_variable_mode other; + switch (shader->Stage) { + case MESA_SHADER_VERTEX: + other = ir_var_shader_in; + break; + case MESA_SHADER_FRAGMENT: + other = ir_var_shader_out; + break; + default: + /* Something invalid to ensure optimize_dead_builtin_uniforms + * doesn't remove anything other than uniforms or constants. + */ + other = ir_var_mode_count; + break; + } + + optimize_dead_builtin_variables(shader->ir, other); + + validate_ir_tree(shader->ir); + } + + if (shader->InfoLog) + ralloc_free(shader->InfoLog); + + if (!state->error) + set_shader_inout_layout(shader, state); + + shader->symbols = new(shader->ir) glsl_symbol_table; + shader->CompileStatus = !state->error; + shader->InfoLog = state->info_log; + shader->Version = state->language_version; + shader->IsES = state->es_shader; + shader->uses_builtin_functions = state->uses_builtin_functions; + + /* Retain any live IR, but trash the rest. */ + reparent_ir(shader->ir, shader->ir); + + /* Destroy the symbol table. Create a new symbol table that contains only + * the variables and functions that still exist in the IR. The symbol + * table will be used later during linking. + * + * There must NOT be any freed objects still referenced by the symbol + * table. That could cause the linker to dereference freed memory. + * + * We don't have to worry about types or interface-types here because those + * are fly-weights that are looked up by glsl_type. + */ + foreach_in_list (ir_instruction, ir, shader->ir) { + switch (ir->ir_type) { + case ir_type_function: + shader->symbols->add_function((ir_function *) ir); + break; + case ir_type_variable: { + ir_variable *const var = (ir_variable *) ir; + + if (var->data.mode != ir_var_temporary) + shader->symbols->add_variable(var); + break; + } + default: + break; + } + } + + _mesa_glsl_initialize_derived_variables(shader); + + delete state->symbols; + ralloc_free(state); +} + +} /* extern "C" */ +/** + * Do the set of common optimizations passes + * + * \param ir List of instructions to be optimized + * \param linked Is the shader linked? This enables + * optimizations passes that remove code at + * global scope and could cause linking to + * fail. + * \param uniform_locations_assigned Have locations already been assigned for + * uniforms? This prevents the declarations + * of unused uniforms from being removed. + * The setting of this flag only matters if + * \c linked is \c true. + * \param max_unroll_iterations Maximum number of loop iterations to be + * unrolled. Setting to 0 disables loop + * unrolling. + * \param options The driver's preferred shader options. + */ +bool +do_common_optimization(exec_list *ir, bool linked, + bool uniform_locations_assigned, + const struct gl_shader_compiler_options *options, + bool native_integers) +{ + GLboolean progress = GL_FALSE; + + progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress; + + if (linked) { + progress = do_function_inlining(ir) || progress; + progress = do_dead_functions(ir) || progress; + progress = do_structure_splitting(ir) || progress; + } + progress = do_if_simplification(ir) || progress; + progress = opt_flatten_nested_if_blocks(ir) || progress; + progress = opt_conditional_discard(ir) || progress; + progress = do_copy_propagation(ir) || progress; + progress = do_copy_propagation_elements(ir) || progress; + + if (options->OptimizeForAOS && !linked) + progress = opt_flip_matrices(ir) || progress; + + if (linked && options->OptimizeForAOS) { + progress = do_vectorize(ir) || progress; + } + + if (linked) + progress = do_dead_code(ir, uniform_locations_assigned) || progress; + else + progress = do_dead_code_unlinked(ir) || progress; + progress = do_dead_code_local(ir) || progress; + progress = do_tree_grafting(ir) || progress; + progress = do_constant_propagation(ir) || progress; + if (linked) + progress = do_constant_variable(ir) || progress; + else + progress = do_constant_variable_unlinked(ir) || progress; + progress = do_constant_folding(ir) || progress; + progress = do_minmax_prune(ir) || progress; + progress = do_rebalance_tree(ir) || progress; + progress = do_algebraic(ir, native_integers, options) || progress; + progress = do_lower_jumps(ir) || progress; + progress = do_vec_index_to_swizzle(ir) || progress; + progress = lower_vector_insert(ir, false) || progress; + progress = do_swizzle_swizzle(ir) || progress; + progress = do_noop_swizzle(ir) || progress; + + progress = optimize_split_arrays(ir, linked) || progress; + progress = optimize_redundant_jumps(ir) || progress; + + loop_state *ls = analyze_loop_variables(ir); + if (ls->loop_found) { + progress = set_loop_controls(ir, ls) || progress; + progress = unroll_loops(ir, ls, options) || progress; + } + delete ls; + + return progress; +} + +extern "C" { + +/** + * To be called at GL teardown time, this frees compiler datastructures. + * + * After calling this, any previously compiled shaders and shader + * programs would be invalid. So this should happen at approximately + * program exit. + */ +void +_mesa_destroy_shader_compiler(void) +{ + _mesa_destroy_shader_compiler_caches(); + + _mesa_glsl_release_types(); +} + +/** + * Releases compiler caches to trade off performance for memory. + * + * Intended to be used with glReleaseShaderCompiler(). + */ +void +_mesa_destroy_shader_compiler_caches(void) +{ + _mesa_glsl_release_builtin_functions(); +} + +} diff --git a/src/compiler/glsl/glsl_parser_extras.h b/src/compiler/glsl/glsl_parser_extras.h new file mode 100644 index 00000000000..3f88e01d599 --- /dev/null +++ b/src/compiler/glsl/glsl_parser_extras.h @@ -0,0 +1,752 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_PARSER_EXTRAS_H +#define GLSL_PARSER_EXTRAS_H + +/* + * Most of the definitions here only apply to C++ + */ +#ifdef __cplusplus + + +#include +#include "glsl_symbol_table.h" + +struct gl_context; + +struct glsl_switch_state { + /** Temporary variables needed for switch statement. */ + ir_variable *test_var; + ir_variable *is_fallthru_var; + class ast_switch_statement *switch_nesting_ast; + + /** Used to detect if 'continue' was called inside a switch. */ + ir_variable *continue_inside; + + /** Used to set condition if 'default' label should be chosen. */ + ir_variable *run_default; + + /** Table of constant values already used in case labels */ + struct hash_table *labels_ht; + class ast_case_label *previous_default; + + bool is_switch_innermost; // if switch stmt is closest to break, ... +}; + +const char * +glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version); + +typedef struct YYLTYPE { + int first_line; + int first_column; + int last_line; + int last_column; + unsigned source; +} YYLTYPE; +# define YYLTYPE_IS_DECLARED 1 +# define YYLTYPE_IS_TRIVIAL 1 + +extern void _mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state, + const char *fmt, ...); + + +struct _mesa_glsl_parse_state { + _mesa_glsl_parse_state(struct gl_context *_ctx, gl_shader_stage stage, + void *mem_ctx); + + DECLARE_RALLOC_CXX_OPERATORS(_mesa_glsl_parse_state); + + /** + * Generate a string representing the GLSL version currently being compiled + * (useful for error messages). + */ + const char *get_version_string() + { + return glsl_compute_version_string(this, this->es_shader, + this->language_version); + } + + /** + * Determine whether the current GLSL version is sufficiently high to + * support a certain feature. + * + * \param required_glsl_version is the desktop GLSL version that is + * required to support the feature, or 0 if no version of desktop GLSL + * supports the feature. + * + * \param required_glsl_es_version is the GLSL ES version that is required + * to support the feature, or 0 if no version of GLSL ES supports the + * feature. + */ + bool is_version(unsigned required_glsl_version, + unsigned required_glsl_es_version) const + { + unsigned required_version = this->es_shader ? + required_glsl_es_version : required_glsl_version; + unsigned this_version = this->forced_language_version + ? this->forced_language_version : this->language_version; + return required_version != 0 + && this_version >= required_version; + } + + bool check_version(unsigned required_glsl_version, + unsigned required_glsl_es_version, + YYLTYPE *locp, const char *fmt, ...) PRINTFLIKE(5, 6); + + bool check_arrays_of_arrays_allowed(YYLTYPE *locp) + { + if (!(ARB_arrays_of_arrays_enable || is_version(430, 310))) { + const char *const requirement = this->es_shader + ? "GLSL ES 3.10" + : "GL_ARB_arrays_of_arrays or GLSL 4.30"; + _mesa_glsl_error(locp, this, + "%s required for defining arrays of arrays.", + requirement); + return false; + } + return true; + } + + bool check_precision_qualifiers_allowed(YYLTYPE *locp) + { + return check_version(130, 100, locp, + "precision qualifiers are forbidden"); + } + + bool check_bitwise_operations_allowed(YYLTYPE *locp) + { + return check_version(130, 300, locp, "bit-wise operations are forbidden"); + } + + bool check_explicit_attrib_stream_allowed(YYLTYPE *locp) + { + if (!this->has_explicit_attrib_stream()) { + const char *const requirement = "GL_ARB_gpu_shader5 extension or GLSL 4.00"; + + _mesa_glsl_error(locp, this, "explicit stream requires %s", + requirement); + return false; + } + + return true; + } + + bool check_explicit_attrib_location_allowed(YYLTYPE *locp, + const ir_variable *var) + { + if (!this->has_explicit_attrib_location()) { + const char *const requirement = this->es_shader + ? "GLSL ES 3.00" + : "GL_ARB_explicit_attrib_location extension or GLSL 3.30"; + + _mesa_glsl_error(locp, this, "%s explicit location requires %s", + mode_string(var), requirement); + return false; + } + + return true; + } + + bool check_separate_shader_objects_allowed(YYLTYPE *locp, + const ir_variable *var) + { + if (!this->has_separate_shader_objects()) { + const char *const requirement = this->es_shader + ? "GL_EXT_separate_shader_objects extension or GLSL ES 3.10" + : "GL_ARB_separate_shader_objects extension or GLSL 4.20"; + + _mesa_glsl_error(locp, this, "%s explicit location requires %s", + mode_string(var), requirement); + return false; + } + + return true; + } + + bool check_explicit_uniform_location_allowed(YYLTYPE *locp, + const ir_variable *) + { + if (!this->has_explicit_attrib_location() || + !this->has_explicit_uniform_location()) { + const char *const requirement = this->es_shader + ? "GLSL ES 3.10" + : "GL_ARB_explicit_uniform_location and either " + "GL_ARB_explicit_attrib_location or GLSL 3.30."; + + _mesa_glsl_error(locp, this, + "uniform explicit location requires %s", + requirement); + return false; + } + + return true; + } + + bool has_atomic_counters() const + { + return ARB_shader_atomic_counters_enable || is_version(420, 310); + } + + bool has_enhanced_layouts() const + { + return ARB_enhanced_layouts_enable || is_version(440, 0); + } + + bool has_explicit_attrib_stream() const + { + return ARB_gpu_shader5_enable || is_version(400, 0); + } + + bool has_explicit_attrib_location() const + { + return ARB_explicit_attrib_location_enable || is_version(330, 300); + } + + bool has_explicit_uniform_location() const + { + return ARB_explicit_uniform_location_enable || is_version(430, 310); + } + + bool has_uniform_buffer_objects() const + { + return ARB_uniform_buffer_object_enable || is_version(140, 300); + } + + bool has_shader_storage_buffer_objects() const + { + return ARB_shader_storage_buffer_object_enable || is_version(430, 310); + } + + bool has_separate_shader_objects() const + { + return ARB_separate_shader_objects_enable || is_version(410, 310) + || EXT_separate_shader_objects_enable; + } + + bool has_double() const + { + return ARB_gpu_shader_fp64_enable || is_version(400, 0); + } + + bool has_420pack() const + { + return ARB_shading_language_420pack_enable || is_version(420, 0); + } + + bool has_420pack_or_es31() const + { + return ARB_shading_language_420pack_enable || is_version(420, 310); + } + + bool has_compute_shader() const + { + return ARB_compute_shader_enable || is_version(430, 310); + } + + bool has_geometry_shader() const + { + return OES_geometry_shader_enable || is_version(150, 320); + } + + void process_version_directive(YYLTYPE *locp, int version, + const char *ident); + + struct gl_context *const ctx; + void *scanner; + exec_list translation_unit; + glsl_symbol_table *symbols; + + unsigned num_supported_versions; + struct { + unsigned ver; + bool es; + } supported_versions[15]; + + bool es_shader; + unsigned language_version; + unsigned forced_language_version; + gl_shader_stage stage; + + /** + * Number of nested struct_specifier levels + * + * Outside a struct_specifier, this is zero. + */ + unsigned struct_specifier_depth; + + /** + * Default uniform layout qualifiers tracked during parsing. + * Currently affects uniform blocks and uniform buffer variables in + * those blocks. + */ + struct ast_type_qualifier *default_uniform_qualifier; + + /** + * Default shader storage layout qualifiers tracked during parsing. + * Currently affects shader storage blocks and shader storage buffer + * variables in those blocks. + */ + struct ast_type_qualifier *default_shader_storage_qualifier; + + /** + * Variables to track different cases if a fragment shader redeclares + * built-in variable gl_FragCoord. + * + * Note: These values are computed at ast_to_hir time rather than at parse + * time. + */ + bool fs_redeclares_gl_fragcoord; + bool fs_origin_upper_left; + bool fs_pixel_center_integer; + bool fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; + + /** + * True if a geometry shader input primitive type or tessellation control + * output vertices were specified using a layout directive. + * + * Note: these values are computed at ast_to_hir time rather than at parse + * time. + */ + bool gs_input_prim_type_specified; + bool tcs_output_vertices_specified; + + /** + * Input layout qualifiers from GLSL 1.50 (geometry shader controls), + * and GLSL 4.00 (tessellation evaluation shader) + */ + struct ast_type_qualifier *in_qualifier; + + /** + * True if a compute shader input local size was specified using a layout + * directive. + * + * Note: this value is computed at ast_to_hir time rather than at parse + * time. + */ + bool cs_input_local_size_specified; + + /** + * If cs_input_local_size_specified is true, the local size that was + * specified. Otherwise ignored. + */ + unsigned cs_input_local_size[3]; + + /** + * Output layout qualifiers from GLSL 1.50 (geometry shader controls), + * and GLSL 4.00 (tessellation control shader). + */ + struct ast_type_qualifier *out_qualifier; + + /** + * Printable list of GLSL versions supported by the current context + * + * \note + * This string should probably be generated per-context instead of per + * invokation of the compiler. This should be changed when the method of + * tracking supported GLSL versions changes. + */ + const char *supported_version_string; + + /** + * Implementation defined limits that affect built-in variables, etc. + * + * \sa struct gl_constants (in mtypes.h) + */ + struct { + /* 1.10 */ + unsigned MaxLights; + unsigned MaxClipPlanes; + unsigned MaxTextureUnits; + unsigned MaxTextureCoords; + unsigned MaxVertexAttribs; + unsigned MaxVertexUniformComponents; + unsigned MaxVertexTextureImageUnits; + unsigned MaxCombinedTextureImageUnits; + unsigned MaxTextureImageUnits; + unsigned MaxFragmentUniformComponents; + + /* ARB_draw_buffers */ + unsigned MaxDrawBuffers; + + /* ARB_blend_func_extended */ + unsigned MaxDualSourceDrawBuffers; + + /* 3.00 ES */ + int MinProgramTexelOffset; + int MaxProgramTexelOffset; + + /* 1.50 */ + unsigned MaxVertexOutputComponents; + unsigned MaxGeometryInputComponents; + unsigned MaxGeometryOutputComponents; + unsigned MaxFragmentInputComponents; + unsigned MaxGeometryTextureImageUnits; + unsigned MaxGeometryOutputVertices; + unsigned MaxGeometryTotalOutputComponents; + unsigned MaxGeometryUniformComponents; + + /* ARB_shader_atomic_counters */ + unsigned MaxVertexAtomicCounters; + unsigned MaxTessControlAtomicCounters; + unsigned MaxTessEvaluationAtomicCounters; + unsigned MaxGeometryAtomicCounters; + unsigned MaxFragmentAtomicCounters; + unsigned MaxCombinedAtomicCounters; + unsigned MaxAtomicBufferBindings; + + /* These are also atomic counter related, but they weren't added to + * until atomic counters were added to core in GLSL 4.20 and GLSL ES + * 3.10. + */ + unsigned MaxVertexAtomicCounterBuffers; + unsigned MaxTessControlAtomicCounterBuffers; + unsigned MaxTessEvaluationAtomicCounterBuffers; + unsigned MaxGeometryAtomicCounterBuffers; + unsigned MaxFragmentAtomicCounterBuffers; + unsigned MaxCombinedAtomicCounterBuffers; + unsigned MaxAtomicCounterBufferSize; + + /* ARB_compute_shader */ + unsigned MaxComputeWorkGroupCount[3]; + unsigned MaxComputeWorkGroupSize[3]; + + /* ARB_shader_image_load_store */ + unsigned MaxImageUnits; + unsigned MaxCombinedShaderOutputResources; + unsigned MaxImageSamples; + unsigned MaxVertexImageUniforms; + unsigned MaxTessControlImageUniforms; + unsigned MaxTessEvaluationImageUniforms; + unsigned MaxGeometryImageUniforms; + unsigned MaxFragmentImageUniforms; + unsigned MaxCombinedImageUniforms; + + /* ARB_viewport_array */ + unsigned MaxViewports; + + /* ARB_tessellation_shader */ + unsigned MaxPatchVertices; + unsigned MaxTessGenLevel; + unsigned MaxTessControlInputComponents; + unsigned MaxTessControlOutputComponents; + unsigned MaxTessControlTextureImageUnits; + unsigned MaxTessEvaluationInputComponents; + unsigned MaxTessEvaluationOutputComponents; + unsigned MaxTessEvaluationTextureImageUnits; + unsigned MaxTessPatchComponents; + unsigned MaxTessControlTotalOutputComponents; + unsigned MaxTessControlUniformComponents; + unsigned MaxTessEvaluationUniformComponents; + } Const; + + /** + * During AST to IR conversion, pointer to current IR function + * + * Will be \c NULL whenever the AST to IR conversion is not inside a + * function definition. + */ + class ir_function_signature *current_function; + + /** + * During AST to IR conversion, pointer to the toplevel IR + * instruction list being generated. + */ + exec_list *toplevel_ir; + + /** Have we found a return statement in this function? */ + bool found_return; + + /** Was there an error during compilation? */ + bool error; + + /** + * Are all shader inputs / outputs invariant? + * + * This is set when the 'STDGL invariant(all)' pragma is used. + */ + bool all_invariant; + + /** Loop or switch statement containing the current instructions. */ + class ast_iteration_statement *loop_nesting_ast; + + struct glsl_switch_state switch_state; + + /** List of structures defined in user code. */ + const glsl_type **user_structures; + unsigned num_user_structures; + + char *info_log; + + /** + * \name Enable bits for GLSL extensions + */ + /*@{*/ + /* ARB extensions go here, sorted alphabetically. + */ + bool ARB_arrays_of_arrays_enable; + bool ARB_arrays_of_arrays_warn; + bool ARB_compute_shader_enable; + bool ARB_compute_shader_warn; + bool ARB_conservative_depth_enable; + bool ARB_conservative_depth_warn; + bool ARB_derivative_control_enable; + bool ARB_derivative_control_warn; + bool ARB_draw_buffers_enable; + bool ARB_draw_buffers_warn; + bool ARB_draw_instanced_enable; + bool ARB_draw_instanced_warn; + bool ARB_enhanced_layouts_enable; + bool ARB_enhanced_layouts_warn; + bool ARB_explicit_attrib_location_enable; + bool ARB_explicit_attrib_location_warn; + bool ARB_explicit_uniform_location_enable; + bool ARB_explicit_uniform_location_warn; + bool ARB_fragment_coord_conventions_enable; + bool ARB_fragment_coord_conventions_warn; + bool ARB_fragment_layer_viewport_enable; + bool ARB_fragment_layer_viewport_warn; + bool ARB_gpu_shader5_enable; + bool ARB_gpu_shader5_warn; + bool ARB_gpu_shader_fp64_enable; + bool ARB_gpu_shader_fp64_warn; + bool ARB_sample_shading_enable; + bool ARB_sample_shading_warn; + bool ARB_separate_shader_objects_enable; + bool ARB_separate_shader_objects_warn; + bool ARB_shader_atomic_counters_enable; + bool ARB_shader_atomic_counters_warn; + bool ARB_shader_bit_encoding_enable; + bool ARB_shader_bit_encoding_warn; + bool ARB_shader_clock_enable; + bool ARB_shader_clock_warn; + bool ARB_shader_draw_parameters_enable; + bool ARB_shader_draw_parameters_warn; + bool ARB_shader_image_load_store_enable; + bool ARB_shader_image_load_store_warn; + bool ARB_shader_image_size_enable; + bool ARB_shader_image_size_warn; + bool ARB_shader_precision_enable; + bool ARB_shader_precision_warn; + bool ARB_shader_stencil_export_enable; + bool ARB_shader_stencil_export_warn; + bool ARB_shader_storage_buffer_object_enable; + bool ARB_shader_storage_buffer_object_warn; + bool ARB_shader_subroutine_enable; + bool ARB_shader_subroutine_warn; + bool ARB_shader_texture_image_samples_enable; + bool ARB_shader_texture_image_samples_warn; + bool ARB_shader_texture_lod_enable; + bool ARB_shader_texture_lod_warn; + bool ARB_shading_language_420pack_enable; + bool ARB_shading_language_420pack_warn; + bool ARB_shading_language_packing_enable; + bool ARB_shading_language_packing_warn; + bool ARB_tessellation_shader_enable; + bool ARB_tessellation_shader_warn; + bool ARB_texture_cube_map_array_enable; + bool ARB_texture_cube_map_array_warn; + bool ARB_texture_gather_enable; + bool ARB_texture_gather_warn; + bool ARB_texture_multisample_enable; + bool ARB_texture_multisample_warn; + bool ARB_texture_query_levels_enable; + bool ARB_texture_query_levels_warn; + bool ARB_texture_query_lod_enable; + bool ARB_texture_query_lod_warn; + bool ARB_texture_rectangle_enable; + bool ARB_texture_rectangle_warn; + bool ARB_uniform_buffer_object_enable; + bool ARB_uniform_buffer_object_warn; + bool ARB_vertex_attrib_64bit_enable; + bool ARB_vertex_attrib_64bit_warn; + bool ARB_viewport_array_enable; + bool ARB_viewport_array_warn; + + /* KHR extensions go here, sorted alphabetically. + */ + + /* OES extensions go here, sorted alphabetically. + */ + bool OES_EGL_image_external_enable; + bool OES_EGL_image_external_warn; + bool OES_geometry_shader_enable; + bool OES_geometry_shader_warn; + bool OES_standard_derivatives_enable; + bool OES_standard_derivatives_warn; + bool OES_texture_3D_enable; + bool OES_texture_3D_warn; + bool OES_texture_storage_multisample_2d_array_enable; + bool OES_texture_storage_multisample_2d_array_warn; + + /* All other extensions go here, sorted alphabetically. + */ + bool AMD_conservative_depth_enable; + bool AMD_conservative_depth_warn; + bool AMD_shader_stencil_export_enable; + bool AMD_shader_stencil_export_warn; + bool AMD_shader_trinary_minmax_enable; + bool AMD_shader_trinary_minmax_warn; + bool AMD_vertex_shader_layer_enable; + bool AMD_vertex_shader_layer_warn; + bool AMD_vertex_shader_viewport_index_enable; + bool AMD_vertex_shader_viewport_index_warn; + bool EXT_blend_func_extended_enable; + bool EXT_blend_func_extended_warn; + bool EXT_draw_buffers_enable; + bool EXT_draw_buffers_warn; + bool EXT_separate_shader_objects_enable; + bool EXT_separate_shader_objects_warn; + bool EXT_shader_integer_mix_enable; + bool EXT_shader_integer_mix_warn; + bool EXT_shader_samples_identical_enable; + bool EXT_shader_samples_identical_warn; + bool EXT_texture_array_enable; + bool EXT_texture_array_warn; + /*@}*/ + + /** Extensions supported by the OpenGL implementation. */ + const struct gl_extensions *extensions; + + bool uses_builtin_functions; + bool fs_uses_gl_fragcoord; + + /** + * For geometry shaders, size of the most recently seen input declaration + * that was a sized array, or 0 if no sized input array declarations have + * been seen. + * + * Unused for other shader types. + */ + unsigned gs_input_size; + + bool fs_early_fragment_tests; + + /** + * For tessellation control shaders, size of the most recently seen output + * declaration that was a sized array, or 0 if no sized output array + * declarations have been seen. + * + * Unused for other shader types. + */ + unsigned tcs_output_size; + + /** Atomic counter offsets by binding */ + unsigned atomic_counter_offsets[MAX_COMBINED_ATOMIC_BUFFERS]; + + bool allow_extension_directive_midshader; + + /** + * Known subroutine type declarations. + */ + int num_subroutine_types; + ir_function **subroutine_types; + + /** + * Functions that are associated with + * subroutine types. + */ + int num_subroutines; + ir_function **subroutines; + + /** + * field selection temporary parser storage - + * did the parser just parse a dot. + */ + bool is_field; +}; + +# define YYLLOC_DEFAULT(Current, Rhs, N) \ +do { \ + if (N) \ + { \ + (Current).first_line = YYRHSLOC(Rhs, 1).first_line; \ + (Current).first_column = YYRHSLOC(Rhs, 1).first_column; \ + (Current).last_line = YYRHSLOC(Rhs, N).last_line; \ + (Current).last_column = YYRHSLOC(Rhs, N).last_column; \ + } \ + else \ + { \ + (Current).first_line = (Current).last_line = \ + YYRHSLOC(Rhs, 0).last_line; \ + (Current).first_column = (Current).last_column = \ + YYRHSLOC(Rhs, 0).last_column; \ + } \ + (Current).source = 0; \ +} while (0) + +/** + * Emit a warning to the shader log + * + * \sa _mesa_glsl_error + */ +extern void _mesa_glsl_warning(const YYLTYPE *locp, + _mesa_glsl_parse_state *state, + const char *fmt, ...); + +extern void _mesa_glsl_lexer_ctor(struct _mesa_glsl_parse_state *state, + const char *string); + +extern void _mesa_glsl_lexer_dtor(struct _mesa_glsl_parse_state *state); + +union YYSTYPE; +extern int _mesa_glsl_lexer_lex(union YYSTYPE *yylval, YYLTYPE *yylloc, + void *scanner); + +extern int _mesa_glsl_parse(struct _mesa_glsl_parse_state *); + +/** + * Process elements of the #extension directive + * + * \return + * If \c name and \c behavior are valid, \c true is returned. Otherwise + * \c false is returned. + */ +extern bool _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp, + const char *behavior, + YYLTYPE *behavior_locp, + _mesa_glsl_parse_state *state); + +#endif /* __cplusplus */ + + +/* + * These definitions apply to C and C++ + */ +#ifdef __cplusplus +extern "C" { +#endif + +extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log, + const struct gl_extensions *extensions, struct gl_context *gl_ctx); + +extern void _mesa_destroy_shader_compiler(void); +extern void _mesa_destroy_shader_compiler_caches(void); + +#ifdef __cplusplus +} +#endif + + +#endif /* GLSL_PARSER_EXTRAS_H */ diff --git a/src/compiler/glsl/glsl_symbol_table.cpp b/src/compiler/glsl/glsl_symbol_table.cpp new file mode 100644 index 00000000000..6c682acf560 --- /dev/null +++ b/src/compiler/glsl/glsl_symbol_table.cpp @@ -0,0 +1,280 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "glsl_symbol_table.h" +#include "ast.h" + +class symbol_table_entry { +public: + DECLARE_RALLOC_CXX_OPERATORS(symbol_table_entry); + + bool add_interface(const glsl_type *i, enum ir_variable_mode mode) + { + const glsl_type **dest; + + switch (mode) { + case ir_var_uniform: + dest = &ibu; + break; + case ir_var_shader_storage: + dest = &iss; + break; + case ir_var_shader_in: + dest = &ibi; + break; + case ir_var_shader_out: + dest = &ibo; + break; + default: + assert(!"Unsupported interface variable mode!"); + return false; + } + + if (*dest != NULL) { + return false; + } else { + *dest = i; + return true; + } + } + + const glsl_type *get_interface(enum ir_variable_mode mode) + { + switch (mode) { + case ir_var_uniform: + return ibu; + case ir_var_shader_storage: + return iss; + case ir_var_shader_in: + return ibi; + case ir_var_shader_out: + return ibo; + default: + assert(!"Unsupported interface variable mode!"); + return NULL; + } + } + + symbol_table_entry(ir_variable *v) : + v(v), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {} + symbol_table_entry(ir_function *f) : + v(0), f(f), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {} + symbol_table_entry(const glsl_type *t) : + v(0), f(0), t(t), ibu(0), iss(0), ibi(0), ibo(0), a(0) {} + symbol_table_entry(const glsl_type *t, enum ir_variable_mode mode) : + v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) + { + assert(t->is_interface()); + add_interface(t, mode); + } + symbol_table_entry(const class ast_type_specifier *a): + v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(a) {} + + ir_variable *v; + ir_function *f; + const glsl_type *t; + const glsl_type *ibu; + const glsl_type *iss; + const glsl_type *ibi; + const glsl_type *ibo; + const class ast_type_specifier *a; +}; + +glsl_symbol_table::glsl_symbol_table() +{ + this->separate_function_namespace = false; + this->table = _mesa_symbol_table_ctor(); + this->mem_ctx = ralloc_context(NULL); +} + +glsl_symbol_table::~glsl_symbol_table() +{ + _mesa_symbol_table_dtor(table); + ralloc_free(mem_ctx); +} + +void glsl_symbol_table::push_scope() +{ + _mesa_symbol_table_push_scope(table); +} + +void glsl_symbol_table::pop_scope() +{ + _mesa_symbol_table_pop_scope(table); +} + +bool glsl_symbol_table::name_declared_this_scope(const char *name) +{ + return _mesa_symbol_table_symbol_scope(table, -1, name) == 0; +} + +bool glsl_symbol_table::add_variable(ir_variable *v) +{ + assert(v->data.mode != ir_var_temporary); + + if (this->separate_function_namespace) { + /* In 1.10, functions and variables have separate namespaces. */ + symbol_table_entry *existing = get_entry(v->name); + if (name_declared_this_scope(v->name)) { + /* If there's already an existing function (not a constructor!) in + * the current scope, just update the existing entry to include 'v'. + */ + if (existing->v == NULL && existing->t == NULL) { + existing->v = v; + return true; + } + } else { + /* If not declared at this scope, add a new entry. But if an existing + * entry includes a function, propagate that to this block - otherwise + * the new variable declaration would shadow the function. + */ + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v); + if (existing != NULL) + entry->f = existing->f; + int added = _mesa_symbol_table_add_symbol(table, -1, v->name, entry); + assert(added == 0); + (void)added; + return true; + } + return false; + } + + /* 1.20+ rules: */ + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v); + return _mesa_symbol_table_add_symbol(table, -1, v->name, entry) == 0; +} + +bool glsl_symbol_table::add_type(const char *name, const glsl_type *t) +{ + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(t); + return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0; +} + +bool glsl_symbol_table::add_interface(const char *name, const glsl_type *i, + enum ir_variable_mode mode) +{ + assert(i->is_interface()); + symbol_table_entry *entry = get_entry(name); + if (entry == NULL) { + symbol_table_entry *entry = + new(mem_ctx) symbol_table_entry(i, mode); + bool add_interface_symbol_result = + _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0; + assert(add_interface_symbol_result); + return add_interface_symbol_result; + } else { + return entry->add_interface(i, mode); + } +} + +bool glsl_symbol_table::add_function(ir_function *f) +{ + if (this->separate_function_namespace && name_declared_this_scope(f->name)) { + /* In 1.10, functions and variables have separate namespaces. */ + symbol_table_entry *existing = get_entry(f->name); + if ((existing->f == NULL) && (existing->t == NULL)) { + existing->f = f; + return true; + } + } + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f); + return _mesa_symbol_table_add_symbol(table, -1, f->name, entry) == 0; +} + +bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name, + int precision) +{ + char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name); + + ast_type_specifier *default_specifier = new(mem_ctx) ast_type_specifier(name); + default_specifier->default_precision = precision; + + symbol_table_entry *entry = + new(mem_ctx) symbol_table_entry(default_specifier); + + return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0; +} + +void glsl_symbol_table::add_global_function(ir_function *f) +{ + symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f); + int added = _mesa_symbol_table_add_global_symbol(table, -1, f->name, entry); + assert(added == 0); + (void)added; +} + +ir_variable *glsl_symbol_table::get_variable(const char *name) +{ + symbol_table_entry *entry = get_entry(name); + return entry != NULL ? entry->v : NULL; +} + +const glsl_type *glsl_symbol_table::get_type(const char *name) +{ + symbol_table_entry *entry = get_entry(name); + return entry != NULL ? entry->t : NULL; +} + +const glsl_type *glsl_symbol_table::get_interface(const char *name, + enum ir_variable_mode mode) +{ + symbol_table_entry *entry = get_entry(name); + return entry != NULL ? entry->get_interface(mode) : NULL; +} + +ir_function *glsl_symbol_table::get_function(const char *name) +{ + symbol_table_entry *entry = get_entry(name); + return entry != NULL ? entry->f : NULL; +} + +int glsl_symbol_table::get_default_precision_qualifier(const char *type_name) +{ + char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name); + symbol_table_entry *entry = get_entry(name); + if (!entry) + return ast_precision_none; + return entry->a->default_precision; +} + +symbol_table_entry *glsl_symbol_table::get_entry(const char *name) +{ + return (symbol_table_entry *) + _mesa_symbol_table_find_symbol(table, -1, name); +} + +void +glsl_symbol_table::disable_variable(const char *name) +{ + /* Ideally we would remove the variable's entry from the symbol table, but + * that would be difficult. Fortunately, since this is only used for + * built-in variables, it won't be possible for the shader to re-introduce + * the variable later, so all we really need to do is to make sure that + * further attempts to access it using get_variable() will return NULL. + */ + symbol_table_entry *entry = get_entry(name); + if (entry != NULL) { + entry->v = NULL; + } +} diff --git a/src/compiler/glsl/glsl_symbol_table.h b/src/compiler/glsl/glsl_symbol_table.h new file mode 100644 index 00000000000..5d654e5e6a7 --- /dev/null +++ b/src/compiler/glsl/glsl_symbol_table.h @@ -0,0 +1,110 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_SYMBOL_TABLE +#define GLSL_SYMBOL_TABLE + +#include + +extern "C" { +#include "program/symbol_table.h" +} +#include "ir.h" + +class symbol_table_entry; +struct glsl_type; + +/** + * Facade class for _mesa_symbol_table + * + * Wraps the existing \c _mesa_symbol_table data structure to enforce some + * type safe and some symbol table invariants. + */ +struct glsl_symbol_table { + DECLARE_RALLOC_CXX_OPERATORS(glsl_symbol_table) + + glsl_symbol_table(); + ~glsl_symbol_table(); + + /* In 1.10, functions and variables have separate namespaces. */ + bool separate_function_namespace; + + void push_scope(); + void pop_scope(); + + /** + * Determine whether a name was declared at the current scope + */ + bool name_declared_this_scope(const char *name); + + /** + * \name Methods to add symbols to the table + * + * There is some temptation to rename all these functions to \c add_symbol + * or similar. However, this breaks symmetry with the getter functions and + * reduces the clarity of the intention of code that uses these methods. + */ + /*@{*/ + bool add_variable(ir_variable *v); + bool add_type(const char *name, const glsl_type *t); + bool add_function(ir_function *f); + bool add_interface(const char *name, const glsl_type *i, + enum ir_variable_mode mode); + bool add_default_precision_qualifier(const char *type_name, int precision); + /*@}*/ + + /** + * Add an function at global scope without checking for scoping conflicts. + */ + void add_global_function(ir_function *f); + + /** + * \name Methods to get symbols from the table + */ + /*@{*/ + ir_variable *get_variable(const char *name); + const glsl_type *get_type(const char *name); + ir_function *get_function(const char *name); + const glsl_type *get_interface(const char *name, + enum ir_variable_mode mode); + int get_default_precision_qualifier(const char *type_name); + /*@}*/ + + /** + * Disable a previously-added variable so that it no longer appears to be + * in the symbol table. This is necessary when gl_PerVertex is redeclared, + * to ensure that previously-available built-in variables are no longer + * available. + */ + void disable_variable(const char *name); + +private: + symbol_table_entry *get_entry(const char *name); + + struct _mesa_symbol_table *table; + void *mem_ctx; +}; + +#endif /* GLSL_SYMBOL_TABLE */ diff --git a/src/compiler/glsl/hir_field_selection.cpp b/src/compiler/glsl/hir_field_selection.cpp new file mode 100644 index 00000000000..eab08ad8235 --- /dev/null +++ b/src/compiler/glsl/hir_field_selection.cpp @@ -0,0 +1,81 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "glsl_parser_extras.h" +#include "ast.h" +#include "compiler/glsl_types.h" + +ir_rvalue * +_mesa_ast_field_selection_to_hir(const ast_expression *expr, + exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + void *ctx = state; + ir_rvalue *result = NULL; + ir_rvalue *op; + + op = expr->subexpressions[0]->hir(instructions, state); + + /* There are two kinds of field selection. There is the selection of a + * specific field from a structure, and there is the selection of a + * swizzle / mask from a vector. Which is which is determined entirely + * by the base type of the thing to which the field selection operator is + * being applied. + */ + YYLTYPE loc = expr->get_location(); + if (op->type->is_error()) { + /* silently propagate the error */ + } else if (op->type->base_type == GLSL_TYPE_STRUCT + || op->type->base_type == GLSL_TYPE_INTERFACE) { + result = new(ctx) ir_dereference_record(op, + expr->primary_expression.identifier); + + if (result->type->is_error()) { + _mesa_glsl_error(& loc, state, "cannot access field `%s' of " + "structure", + expr->primary_expression.identifier); + } + } else if (op->type->is_vector() || + (state->has_420pack() && op->type->is_scalar())) { + ir_swizzle *swiz = ir_swizzle::create(op, + expr->primary_expression.identifier, + op->type->vector_elements); + if (swiz != NULL) { + result = swiz; + } else { + /* FINISHME: Logging of error messages should be moved into + * FINISHME: ir_swizzle::create. This allows the generation of more + * FINISHME: specific error messages. + */ + _mesa_glsl_error(& loc, state, "invalid swizzle / mask `%s'", + expr->primary_expression.identifier); + } + } else { + _mesa_glsl_error(& loc, state, "cannot access field `%s' of " + "non-structure / non-vector", + expr->primary_expression.identifier); + } + + return result ? result : ir_rvalue::error_value(ctx); +} diff --git a/src/compiler/glsl/ir.cpp b/src/compiler/glsl/ir.cpp new file mode 100644 index 00000000000..de9d314bae4 --- /dev/null +++ b/src/compiler/glsl/ir.cpp @@ -0,0 +1,2039 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "main/core.h" /* for MAX2 */ +#include "ir.h" +#include "compiler/glsl_types.h" + +ir_rvalue::ir_rvalue(enum ir_node_type t) + : ir_instruction(t) +{ + this->type = glsl_type::error_type; +} + +bool ir_rvalue::is_zero() const +{ + return false; +} + +bool ir_rvalue::is_one() const +{ + return false; +} + +bool ir_rvalue::is_negative_one() const +{ + return false; +} + +/** + * Modify the swizzle make to move one component to another + * + * \param m IR swizzle to be modified + * \param from Component in the RHS that is to be swizzled + * \param to Desired swizzle location of \c from + */ +static void +update_rhs_swizzle(ir_swizzle_mask &m, unsigned from, unsigned to) +{ + switch (to) { + case 0: m.x = from; break; + case 1: m.y = from; break; + case 2: m.z = from; break; + case 3: m.w = from; break; + default: assert(!"Should not get here."); + } +} + +void +ir_assignment::set_lhs(ir_rvalue *lhs) +{ + void *mem_ctx = this; + bool swizzled = false; + + while (lhs != NULL) { + ir_swizzle *swiz = lhs->as_swizzle(); + + if (swiz == NULL) + break; + + unsigned write_mask = 0; + ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 }; + + for (unsigned i = 0; i < swiz->mask.num_components; i++) { + unsigned c = 0; + + switch (i) { + case 0: c = swiz->mask.x; break; + case 1: c = swiz->mask.y; break; + case 2: c = swiz->mask.z; break; + case 3: c = swiz->mask.w; break; + default: assert(!"Should not get here."); + } + + write_mask |= (((this->write_mask >> i) & 1) << c); + update_rhs_swizzle(rhs_swiz, i, c); + rhs_swiz.num_components = swiz->val->type->vector_elements; + } + + this->write_mask = write_mask; + lhs = swiz->val; + + this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz); + swizzled = true; + } + + if (swizzled) { + /* Now, RHS channels line up with the LHS writemask. Collapse it + * to just the channels that will be written. + */ + ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 }; + int rhs_chan = 0; + for (int i = 0; i < 4; i++) { + if (write_mask & (1 << i)) + update_rhs_swizzle(rhs_swiz, i, rhs_chan++); + } + rhs_swiz.num_components = rhs_chan; + this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz); + } + + assert((lhs == NULL) || lhs->as_dereference()); + + this->lhs = (ir_dereference *) lhs; +} + +ir_variable * +ir_assignment::whole_variable_written() +{ + ir_variable *v = this->lhs->whole_variable_referenced(); + + if (v == NULL) + return NULL; + + if (v->type->is_scalar()) + return v; + + if (v->type->is_vector()) { + const unsigned mask = (1U << v->type->vector_elements) - 1; + + if (mask != this->write_mask) + return NULL; + } + + /* Either all the vector components are assigned or the variable is some + * composite type (and the whole thing is assigned. + */ + return v; +} + +ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, + ir_rvalue *condition, unsigned write_mask) + : ir_instruction(ir_type_assignment) +{ + this->condition = condition; + this->rhs = rhs; + this->lhs = lhs; + this->write_mask = write_mask; + + if (lhs->type->is_scalar() || lhs->type->is_vector()) { + int lhs_components = 0; + for (int i = 0; i < 4; i++) { + if (write_mask & (1 << i)) + lhs_components++; + } + + assert(lhs_components == this->rhs->type->vector_elements); + } +} + +ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, + ir_rvalue *condition) + : ir_instruction(ir_type_assignment) +{ + this->condition = condition; + this->rhs = rhs; + + /* If the RHS is a vector type, assume that all components of the vector + * type are being written to the LHS. The write mask comes from the RHS + * because we can have a case where the LHS is a vec4 and the RHS is a + * vec3. In that case, the assignment is: + * + * (assign (...) (xyz) (var_ref lhs) (var_ref rhs)) + */ + if (rhs->type->is_vector()) + this->write_mask = (1U << rhs->type->vector_elements) - 1; + else if (rhs->type->is_scalar()) + this->write_mask = 1; + else + this->write_mask = 0; + + this->set_lhs(lhs); +} + +ir_expression::ir_expression(int op, const struct glsl_type *type, + ir_rvalue *op0, ir_rvalue *op1, + ir_rvalue *op2, ir_rvalue *op3) + : ir_rvalue(ir_type_expression) +{ + this->type = type; + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = op2; + this->operands[3] = op3; +#ifndef NDEBUG + int num_operands = get_num_operands(this->operation); + for (int i = num_operands; i < 4; i++) { + assert(this->operands[i] == NULL); + } +#endif +} + +ir_expression::ir_expression(int op, ir_rvalue *op0) + : ir_rvalue(ir_type_expression) +{ + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = NULL; + this->operands[2] = NULL; + this->operands[3] = NULL; + + assert(op <= ir_last_unop); + + switch (this->operation) { + case ir_unop_bit_not: + case ir_unop_logic_not: + case ir_unop_neg: + case ir_unop_abs: + case ir_unop_sign: + case ir_unop_rcp: + case ir_unop_rsq: + case ir_unop_sqrt: + case ir_unop_exp: + case ir_unop_log: + case ir_unop_exp2: + case ir_unop_log2: + case ir_unop_trunc: + case ir_unop_ceil: + case ir_unop_floor: + case ir_unop_fract: + case ir_unop_round_even: + case ir_unop_sin: + case ir_unop_cos: + case ir_unop_dFdx: + case ir_unop_dFdx_coarse: + case ir_unop_dFdx_fine: + case ir_unop_dFdy: + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: + case ir_unop_bitfield_reverse: + case ir_unop_interpolate_at_centroid: + case ir_unop_saturate: + this->type = op0->type; + break; + + case ir_unop_f2i: + case ir_unop_b2i: + case ir_unop_u2i: + case ir_unop_d2i: + case ir_unop_bitcast_f2i: + case ir_unop_bit_count: + case ir_unop_find_msb: + case ir_unop_find_lsb: + case ir_unop_subroutine_to_int: + this->type = glsl_type::get_instance(GLSL_TYPE_INT, + op0->type->vector_elements, 1); + break; + + case ir_unop_b2f: + case ir_unop_i2f: + case ir_unop_u2f: + case ir_unop_d2f: + case ir_unop_bitcast_i2f: + case ir_unop_bitcast_u2f: + this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + op0->type->vector_elements, 1); + break; + + case ir_unop_f2b: + case ir_unop_i2b: + case ir_unop_d2b: + this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, + op0->type->vector_elements, 1); + break; + + case ir_unop_f2d: + case ir_unop_i2d: + case ir_unop_u2d: + this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, + op0->type->vector_elements, 1); + break; + + case ir_unop_i2u: + case ir_unop_f2u: + case ir_unop_d2u: + case ir_unop_bitcast_f2u: + this->type = glsl_type::get_instance(GLSL_TYPE_UINT, + op0->type->vector_elements, 1); + break; + + case ir_unop_noise: + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + this->type = glsl_type::float_type; + break; + + case ir_unop_unpack_double_2x32: + this->type = glsl_type::uvec2_type; + break; + + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_unorm_4x8: + case ir_unop_pack_half_2x16: + this->type = glsl_type::uint_type; + break; + + case ir_unop_pack_double_2x32: + this->type = glsl_type::double_type; + break; + + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + this->type = glsl_type::vec2_type; + break; + + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + this->type = glsl_type::vec4_type; + break; + + case ir_unop_frexp_sig: + this->type = op0->type; + break; + case ir_unop_frexp_exp: + this->type = glsl_type::get_instance(GLSL_TYPE_INT, + op0->type->vector_elements, 1); + break; + + case ir_unop_get_buffer_size: + case ir_unop_ssbo_unsized_array_length: + this->type = glsl_type::int_type; + break; + + default: + assert(!"not reached: missing automatic type setup for ir_expression"); + this->type = op0->type; + break; + } +} + +ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) + : ir_rvalue(ir_type_expression) +{ + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = NULL; + this->operands[3] = NULL; + + assert(op > ir_last_unop); + + switch (this->operation) { + case ir_binop_all_equal: + case ir_binop_any_nequal: + this->type = glsl_type::bool_type; + break; + + case ir_binop_add: + case ir_binop_sub: + case ir_binop_min: + case ir_binop_max: + case ir_binop_pow: + case ir_binop_mul: + case ir_binop_div: + case ir_binop_mod: + if (op0->type->is_scalar()) { + this->type = op1->type; + } else if (op1->type->is_scalar()) { + this->type = op0->type; + } else { + if (this->operation == ir_binop_mul) { + this->type = glsl_type::get_mul_type(op0->type, op1->type); + } else { + assert(op0->type == op1->type); + this->type = op0->type; + } + } + break; + + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + assert(!op0->type->is_matrix()); + assert(!op1->type->is_matrix()); + if (op0->type->is_scalar()) { + this->type = op1->type; + } else if (op1->type->is_scalar()) { + this->type = op0->type; + } else { + assert(op0->type->vector_elements == op1->type->vector_elements); + this->type = op0->type; + } + break; + + case ir_binop_equal: + case ir_binop_nequal: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_less: + case ir_binop_greater: + assert(op0->type == op1->type); + this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, + op0->type->vector_elements, 1); + break; + + case ir_binop_dot: + this->type = op0->type->get_base_type(); + break; + + case ir_binop_pack_half_2x16_split: + this->type = glsl_type::uint_type; + break; + + case ir_binop_imul_high: + case ir_binop_carry: + case ir_binop_borrow: + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_ldexp: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + this->type = op0->type; + break; + + case ir_binop_vector_extract: + this->type = op0->type->get_scalar_type(); + break; + + default: + assert(!"not reached: missing automatic type setup for ir_expression"); + this->type = glsl_type::float_type; + } +} + +ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, + ir_rvalue *op2) + : ir_rvalue(ir_type_expression) +{ + this->operation = ir_expression_operation(op); + this->operands[0] = op0; + this->operands[1] = op1; + this->operands[2] = op2; + this->operands[3] = NULL; + + assert(op > ir_last_binop && op <= ir_last_triop); + + switch (this->operation) { + case ir_triop_fma: + case ir_triop_lrp: + case ir_triop_bitfield_extract: + case ir_triop_vector_insert: + this->type = op0->type; + break; + + case ir_triop_csel: + this->type = op1->type; + break; + + default: + assert(!"not reached: missing automatic type setup for ir_expression"); + this->type = glsl_type::float_type; + } +} + +unsigned int +ir_expression::get_num_operands(ir_expression_operation op) +{ + assert(op <= ir_last_opcode); + + if (op <= ir_last_unop) + return 1; + + if (op <= ir_last_binop) + return 2; + + if (op <= ir_last_triop) + return 3; + + if (op <= ir_last_quadop) + return 4; + + assert(false); + return 0; +} + +static const char *const operator_strs[] = { + "~", + "!", + "neg", + "abs", + "sign", + "rcp", + "rsq", + "sqrt", + "exp", + "log", + "exp2", + "log2", + "f2i", + "f2u", + "i2f", + "f2b", + "b2f", + "i2b", + "b2i", + "u2f", + "i2u", + "u2i", + "d2f", + "f2d", + "d2i", + "i2d", + "d2u", + "u2d", + "d2b", + "bitcast_i2f", + "bitcast_f2i", + "bitcast_u2f", + "bitcast_f2u", + "trunc", + "ceil", + "floor", + "fract", + "round_even", + "sin", + "cos", + "dFdx", + "dFdxCoarse", + "dFdxFine", + "dFdy", + "dFdyCoarse", + "dFdyFine", + "packSnorm2x16", + "packSnorm4x8", + "packUnorm2x16", + "packUnorm4x8", + "packHalf2x16", + "unpackSnorm2x16", + "unpackSnorm4x8", + "unpackUnorm2x16", + "unpackUnorm4x8", + "unpackHalf2x16", + "unpackHalf2x16_split_x", + "unpackHalf2x16_split_y", + "bitfield_reverse", + "bit_count", + "find_msb", + "find_lsb", + "sat", + "packDouble2x32", + "unpackDouble2x32", + "frexp_sig", + "frexp_exp", + "noise", + "subroutine_to_int", + "interpolate_at_centroid", + "get_buffer_size", + "ssbo_unsized_array_length", + "+", + "-", + "*", + "imul_high", + "/", + "carry", + "borrow", + "%", + "<", + ">", + "<=", + ">=", + "==", + "!=", + "all_equal", + "any_nequal", + "<<", + ">>", + "&", + "^", + "|", + "&&", + "^^", + "||", + "dot", + "min", + "max", + "pow", + "packHalf2x16_split", + "ubo_load", + "ldexp", + "vector_extract", + "interpolate_at_offset", + "interpolate_at_sample", + "fma", + "lrp", + "csel", + "bitfield_extract", + "vector_insert", + "bitfield_insert", + "vector", +}; + +const char *ir_expression::operator_string(ir_expression_operation op) +{ + assert((unsigned int) op < ARRAY_SIZE(operator_strs)); + assert(ARRAY_SIZE(operator_strs) == (ir_quadop_vector + 1)); + return operator_strs[op]; +} + +const char *ir_expression::operator_string() +{ + return operator_string(this->operation); +} + +const char* +depth_layout_string(ir_depth_layout layout) +{ + switch(layout) { + case ir_depth_layout_none: return ""; + case ir_depth_layout_any: return "depth_any"; + case ir_depth_layout_greater: return "depth_greater"; + case ir_depth_layout_less: return "depth_less"; + case ir_depth_layout_unchanged: return "depth_unchanged"; + + default: + assert(0); + return ""; + } +} + +ir_expression_operation +ir_expression::get_operator(const char *str) +{ + const int operator_count = sizeof(operator_strs) / sizeof(operator_strs[0]); + for (int op = 0; op < operator_count; op++) { + if (strcmp(str, operator_strs[op]) == 0) + return (ir_expression_operation) op; + } + return (ir_expression_operation) -1; +} + +ir_variable * +ir_expression::variable_referenced() const +{ + switch (operation) { + case ir_binop_vector_extract: + case ir_triop_vector_insert: + /* We get these for things like a[0] where a is a vector type. In these + * cases we want variable_referenced() to return the actual vector + * variable this is wrapping. + */ + return operands[0]->variable_referenced(); + default: + return ir_rvalue::variable_referenced(); + } +} + +ir_constant::ir_constant() + : ir_rvalue(ir_type_constant) +{ +} + +ir_constant::ir_constant(const struct glsl_type *type, + const ir_constant_data *data) + : ir_rvalue(ir_type_constant) +{ + assert((type->base_type >= GLSL_TYPE_UINT) + && (type->base_type <= GLSL_TYPE_BOOL)); + + this->type = type; + memcpy(& this->value, data, sizeof(this->value)); +} + +ir_constant::ir_constant(float f, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.f[i] = f; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.f[i] = 0; + } +} + +ir_constant::ir_constant(double d, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.d[i] = d; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.d[i] = 0.0; + } +} + +ir_constant::ir_constant(unsigned int u, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_UINT, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.u[i] = u; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.u[i] = 0; + } +} + +ir_constant::ir_constant(int integer, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_INT, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.i[i] = integer; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.i[i] = 0; + } +} + +ir_constant::ir_constant(bool b, unsigned vector_elements) + : ir_rvalue(ir_type_constant) +{ + assert(vector_elements <= 4); + this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, vector_elements, 1); + for (unsigned i = 0; i < vector_elements; i++) { + this->value.b[i] = b; + } + for (unsigned i = vector_elements; i < 16; i++) { + this->value.b[i] = false; + } +} + +ir_constant::ir_constant(const ir_constant *c, unsigned i) + : ir_rvalue(ir_type_constant) +{ + this->type = c->type->get_base_type(); + + switch (this->type->base_type) { + case GLSL_TYPE_UINT: this->value.u[0] = c->value.u[i]; break; + case GLSL_TYPE_INT: this->value.i[0] = c->value.i[i]; break; + case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break; + case GLSL_TYPE_BOOL: this->value.b[0] = c->value.b[i]; break; + case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break; + default: assert(!"Should not get here."); break; + } +} + +ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list) + : ir_rvalue(ir_type_constant) +{ + this->type = type; + + assert(type->is_scalar() || type->is_vector() || type->is_matrix() + || type->is_record() || type->is_array()); + + if (type->is_array()) { + this->array_elements = ralloc_array(this, ir_constant *, type->length); + unsigned i = 0; + foreach_in_list(ir_constant, value, value_list) { + assert(value->as_constant() != NULL); + + this->array_elements[i++] = value; + } + return; + } + + /* If the constant is a record, the types of each of the entries in + * value_list must be a 1-for-1 match with the structure components. Each + * entry must also be a constant. Just move the nodes from the value_list + * to the list in the ir_constant. + */ + /* FINISHME: Should there be some type checking and / or assertions here? */ + /* FINISHME: Should the new constant take ownership of the nodes from + * FINISHME: value_list, or should it make copies? + */ + if (type->is_record()) { + value_list->move_nodes_to(& this->components); + return; + } + + for (unsigned i = 0; i < 16; i++) { + this->value.u[i] = 0; + } + + ir_constant *value = (ir_constant *) (value_list->head); + + /* Constructors with exactly one scalar argument are special for vectors + * and matrices. For vectors, the scalar value is replicated to fill all + * the components. For matrices, the scalar fills the components of the + * diagonal while the rest is filled with 0. + */ + if (value->type->is_scalar() && value->next->is_tail_sentinel()) { + if (type->is_matrix()) { + /* Matrix - fill diagonal (rest is already set to 0) */ + assert(type->base_type == GLSL_TYPE_FLOAT || + type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned i = 0; i < type->matrix_columns; i++) { + if (type->base_type == GLSL_TYPE_FLOAT) + this->value.f[i * type->vector_elements + i] = + value->value.f[0]; + else + this->value.d[i * type->vector_elements + i] = + value->value.d[0]; + } + } else { + /* Vector or scalar - fill all components */ + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + for (unsigned i = 0; i < type->components(); i++) + this->value.u[i] = value->value.u[0]; + break; + case GLSL_TYPE_FLOAT: + for (unsigned i = 0; i < type->components(); i++) + this->value.f[i] = value->value.f[0]; + break; + case GLSL_TYPE_DOUBLE: + for (unsigned i = 0; i < type->components(); i++) + this->value.d[i] = value->value.d[0]; + break; + case GLSL_TYPE_BOOL: + for (unsigned i = 0; i < type->components(); i++) + this->value.b[i] = value->value.b[0]; + break; + default: + assert(!"Should not get here."); + break; + } + } + return; + } + + if (type->is_matrix() && value->type->is_matrix()) { + assert(value->next->is_tail_sentinel()); + + /* From section 5.4.2 of the GLSL 1.20 spec: + * "If a matrix is constructed from a matrix, then each component + * (column i, row j) in the result that has a corresponding component + * (column i, row j) in the argument will be initialized from there." + */ + unsigned cols = MIN2(type->matrix_columns, value->type->matrix_columns); + unsigned rows = MIN2(type->vector_elements, value->type->vector_elements); + for (unsigned i = 0; i < cols; i++) { + for (unsigned j = 0; j < rows; j++) { + const unsigned src = i * value->type->vector_elements + j; + const unsigned dst = i * type->vector_elements + j; + this->value.f[dst] = value->value.f[src]; + } + } + + /* "All other components will be initialized to the identity matrix." */ + for (unsigned i = cols; i < type->matrix_columns; i++) + this->value.f[i * type->vector_elements + i] = 1.0; + + return; + } + + /* Use each component from each entry in the value_list to initialize one + * component of the constant being constructed. + */ + for (unsigned i = 0; i < type->components(); /* empty */) { + assert(value->as_constant() != NULL); + assert(!value->is_tail_sentinel()); + + for (unsigned j = 0; j < value->type->components(); j++) { + switch (type->base_type) { + case GLSL_TYPE_UINT: + this->value.u[i] = value->get_uint_component(j); + break; + case GLSL_TYPE_INT: + this->value.i[i] = value->get_int_component(j); + break; + case GLSL_TYPE_FLOAT: + this->value.f[i] = value->get_float_component(j); + break; + case GLSL_TYPE_BOOL: + this->value.b[i] = value->get_bool_component(j); + break; + case GLSL_TYPE_DOUBLE: + this->value.d[i] = value->get_double_component(j); + break; + default: + /* FINISHME: What to do? Exceptions are not the answer. + */ + break; + } + + i++; + if (i >= type->components()) + break; + } + + value = (ir_constant *) value->next; + } +} + +ir_constant * +ir_constant::zero(void *mem_ctx, const glsl_type *type) +{ + assert(type->is_scalar() || type->is_vector() || type->is_matrix() + || type->is_record() || type->is_array()); + + ir_constant *c = new(mem_ctx) ir_constant; + c->type = type; + memset(&c->value, 0, sizeof(c->value)); + + if (type->is_array()) { + c->array_elements = ralloc_array(c, ir_constant *, type->length); + + for (unsigned i = 0; i < type->length; i++) + c->array_elements[i] = ir_constant::zero(c, type->fields.array); + } + + if (type->is_record()) { + for (unsigned i = 0; i < type->length; i++) { + ir_constant *comp = ir_constant::zero(mem_ctx, type->fields.structure[i].type); + c->components.push_tail(comp); + } + } + + return c; +} + +bool +ir_constant::get_bool_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return this->value.u[i] != 0; + case GLSL_TYPE_INT: return this->value.i[i] != 0; + case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0; + case GLSL_TYPE_BOOL: return this->value.b[i]; + case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return false; +} + +float +ir_constant::get_float_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return (float) this->value.u[i]; + case GLSL_TYPE_INT: return (float) this->value.i[i]; + case GLSL_TYPE_FLOAT: return this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0f : 0.0f; + case GLSL_TYPE_DOUBLE: return (float) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0.0; +} + +double +ir_constant::get_double_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return (double) this->value.u[i]; + case GLSL_TYPE_INT: return (double) this->value.i[i]; + case GLSL_TYPE_FLOAT: return (double) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0 : 0.0; + case GLSL_TYPE_DOUBLE: return this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0.0; +} + +int +ir_constant::get_int_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return this->value.u[i]; + case GLSL_TYPE_INT: return this->value.i[i]; + case GLSL_TYPE_FLOAT: return (int) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; + case GLSL_TYPE_DOUBLE: return (int) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0; +} + +unsigned +ir_constant::get_uint_component(unsigned i) const +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: return this->value.u[i]; + case GLSL_TYPE_INT: return this->value.i[i]; + case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i]; + case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; + case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i]; + default: assert(!"Should not get here."); break; + } + + /* Must return something to make the compiler happy. This is clearly an + * error case. + */ + return 0; +} + +ir_constant * +ir_constant::get_array_element(unsigned i) const +{ + assert(this->type->is_array()); + + /* From page 35 (page 41 of the PDF) of the GLSL 1.20 spec: + * + * "Behavior is undefined if a shader subscripts an array with an index + * less than 0 or greater than or equal to the size the array was + * declared with." + * + * Most out-of-bounds accesses are removed before things could get this far. + * There are cases where non-constant array index values can get constant + * folded. + */ + if (int(i) < 0) + i = 0; + else if (i >= this->type->length) + i = this->type->length - 1; + + return array_elements[i]; +} + +ir_constant * +ir_constant::get_record_field(const char *name) +{ + int idx = this->type->field_index(name); + + if (idx < 0) + return NULL; + + if (this->components.is_empty()) + return NULL; + + exec_node *node = this->components.head; + for (int i = 0; i < idx; i++) { + node = node->next; + + /* If the end of the list is encountered before the element matching the + * requested field is found, return NULL. + */ + if (node->is_tail_sentinel()) + return NULL; + } + + return (ir_constant *) node; +} + +void +ir_constant::copy_offset(ir_constant *src, int offset) +{ + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: { + unsigned int size = src->type->components(); + assert (size <= this->type->components() - offset); + for (unsigned int i=0; itype->base_type) { + case GLSL_TYPE_UINT: + value.u[i+offset] = src->get_uint_component(i); + break; + case GLSL_TYPE_INT: + value.i[i+offset] = src->get_int_component(i); + break; + case GLSL_TYPE_FLOAT: + value.f[i+offset] = src->get_float_component(i); + break; + case GLSL_TYPE_BOOL: + value.b[i+offset] = src->get_bool_component(i); + break; + case GLSL_TYPE_DOUBLE: + value.d[i+offset] = src->get_double_component(i); + break; + default: // Shut up the compiler + break; + } + } + break; + } + + case GLSL_TYPE_STRUCT: { + assert (src->type == this->type); + this->components.make_empty(); + foreach_in_list(ir_constant, orig, &src->components) { + this->components.push_tail(orig->clone(this, NULL)); + } + break; + } + + case GLSL_TYPE_ARRAY: { + assert (src->type == this->type); + for (unsigned i = 0; i < this->type->length; i++) { + this->array_elements[i] = src->array_elements[i]->clone(this, NULL); + } + break; + } + + default: + assert(!"Should not get here."); + break; + } +} + +void +ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask) +{ + assert (!type->is_array() && !type->is_record()); + + if (!type->is_vector() && !type->is_matrix()) { + offset = 0; + mask = 1; + } + + int id = 0; + for (int i=0; i<4; i++) { + if (mask & (1 << i)) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + value.u[i+offset] = src->get_uint_component(id++); + break; + case GLSL_TYPE_INT: + value.i[i+offset] = src->get_int_component(id++); + break; + case GLSL_TYPE_FLOAT: + value.f[i+offset] = src->get_float_component(id++); + break; + case GLSL_TYPE_BOOL: + value.b[i+offset] = src->get_bool_component(id++); + break; + case GLSL_TYPE_DOUBLE: + value.d[i+offset] = src->get_double_component(id++); + break; + default: + assert(!"Should not get here."); + return; + } + } + } +} + +bool +ir_constant::has_value(const ir_constant *c) const +{ + if (this->type != c->type) + return false; + + if (this->type->is_array()) { + for (unsigned i = 0; i < this->type->length; i++) { + if (!this->array_elements[i]->has_value(c->array_elements[i])) + return false; + } + return true; + } + + if (this->type->base_type == GLSL_TYPE_STRUCT) { + const exec_node *a_node = this->components.head; + const exec_node *b_node = c->components.head; + + while (!a_node->is_tail_sentinel()) { + assert(!b_node->is_tail_sentinel()); + + const ir_constant *const a_field = (ir_constant *) a_node; + const ir_constant *const b_field = (ir_constant *) b_node; + + if (!a_field->has_value(b_field)) + return false; + + a_node = a_node->next; + b_node = b_node->next; + } + + return true; + } + + for (unsigned i = 0; i < this->type->components(); i++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + if (this->value.u[i] != c->value.u[i]) + return false; + break; + case GLSL_TYPE_INT: + if (this->value.i[i] != c->value.i[i]) + return false; + break; + case GLSL_TYPE_FLOAT: + if (this->value.f[i] != c->value.f[i]) + return false; + break; + case GLSL_TYPE_BOOL: + if (this->value.b[i] != c->value.b[i]) + return false; + break; + case GLSL_TYPE_DOUBLE: + if (this->value.d[i] != c->value.d[i]) + return false; + break; + default: + assert(!"Should not get here."); + return false; + } + } + + return true; +} + +bool +ir_constant::is_value(float f, int i) const +{ + if (!this->type->is_scalar() && !this->type->is_vector()) + return false; + + /* Only accept boolean values for 0/1. */ + if (int(bool(i)) != i && this->type->is_boolean()) + return false; + + for (unsigned c = 0; c < this->type->vector_elements; c++) { + switch (this->type->base_type) { + case GLSL_TYPE_FLOAT: + if (this->value.f[c] != f) + return false; + break; + case GLSL_TYPE_INT: + if (this->value.i[c] != i) + return false; + break; + case GLSL_TYPE_UINT: + if (this->value.u[c] != unsigned(i)) + return false; + break; + case GLSL_TYPE_BOOL: + if (this->value.b[c] != bool(i)) + return false; + break; + case GLSL_TYPE_DOUBLE: + if (this->value.d[c] != double(f)) + return false; + break; + default: + /* The only other base types are structures, arrays, and samplers. + * Samplers cannot be constants, and the others should have been + * filtered out above. + */ + assert(!"Should not get here."); + return false; + } + } + + return true; +} + +bool +ir_constant::is_zero() const +{ + return is_value(0.0, 0); +} + +bool +ir_constant::is_one() const +{ + return is_value(1.0, 1); +} + +bool +ir_constant::is_negative_one() const +{ + return is_value(-1.0, -1); +} + +bool +ir_constant::is_uint16_constant() const +{ + if (!type->is_integer()) + return false; + + return value.u[0] < (1 << 16); +} + +ir_loop::ir_loop() + : ir_instruction(ir_type_loop) +{ +} + + +ir_dereference_variable::ir_dereference_variable(ir_variable *var) + : ir_dereference(ir_type_dereference_variable) +{ + assert(var != NULL); + + this->var = var; + this->type = var->type; +} + + +ir_dereference_array::ir_dereference_array(ir_rvalue *value, + ir_rvalue *array_index) + : ir_dereference(ir_type_dereference_array) +{ + this->array_index = array_index; + this->set_array(value); +} + + +ir_dereference_array::ir_dereference_array(ir_variable *var, + ir_rvalue *array_index) + : ir_dereference(ir_type_dereference_array) +{ + void *ctx = ralloc_parent(var); + + this->array_index = array_index; + this->set_array(new(ctx) ir_dereference_variable(var)); +} + + +void +ir_dereference_array::set_array(ir_rvalue *value) +{ + assert(value != NULL); + + this->array = value; + + const glsl_type *const vt = this->array->type; + + if (vt->is_array()) { + type = vt->fields.array; + } else if (vt->is_matrix()) { + type = vt->column_type(); + } else if (vt->is_vector()) { + type = vt->get_base_type(); + } +} + + +ir_dereference_record::ir_dereference_record(ir_rvalue *value, + const char *field) + : ir_dereference(ir_type_dereference_record) +{ + assert(value != NULL); + + this->record = value; + this->field = ralloc_strdup(this, field); + this->type = this->record->type->field_type(field); +} + + +ir_dereference_record::ir_dereference_record(ir_variable *var, + const char *field) + : ir_dereference(ir_type_dereference_record) +{ + void *ctx = ralloc_parent(var); + + this->record = new(ctx) ir_dereference_variable(var); + this->field = ralloc_strdup(this, field); + this->type = this->record->type->field_type(field); +} + +bool +ir_dereference::is_lvalue() const +{ + ir_variable *var = this->variable_referenced(); + + /* Every l-value derference chain eventually ends in a variable. + */ + if ((var == NULL) || var->data.read_only) + return false; + + /* From section 4.1.7 of the GLSL 4.40 spec: + * + * "Opaque variables cannot be treated as l-values; hence cannot + * be used as out or inout function parameters, nor can they be + * assigned into." + */ + if (this->type->contains_opaque()) + return false; + + return true; +} + + +static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" }; + +const char *ir_texture::opcode_string() +{ + assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs)); + return tex_opcode_strs[op]; +} + +ir_texture_opcode +ir_texture::get_opcode(const char *str) +{ + const int count = sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]); + for (int op = 0; op < count; op++) { + if (strcmp(str, tex_opcode_strs[op]) == 0) + return (ir_texture_opcode) op; + } + return (ir_texture_opcode) -1; +} + + +void +ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type) +{ + assert(sampler != NULL); + assert(type != NULL); + this->sampler = sampler; + this->type = type; + + if (this->op == ir_txs || this->op == ir_query_levels || + this->op == ir_texture_samples) { + assert(type->base_type == GLSL_TYPE_INT); + } else if (this->op == ir_lod) { + assert(type->vector_elements == 2); + assert(type->base_type == GLSL_TYPE_FLOAT); + } else if (this->op == ir_samples_identical) { + assert(type == glsl_type::bool_type); + assert(sampler->type->base_type == GLSL_TYPE_SAMPLER); + assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS); + } else { + assert(sampler->type->sampler_type == (int) type->base_type); + if (sampler->type->sampler_shadow) + assert(type->vector_elements == 4 || type->vector_elements == 1); + else + assert(type->vector_elements == 4); + } +} + + +void +ir_swizzle::init_mask(const unsigned *comp, unsigned count) +{ + assert((count >= 1) && (count <= 4)); + + memset(&this->mask, 0, sizeof(this->mask)); + this->mask.num_components = count; + + unsigned dup_mask = 0; + switch (count) { + case 4: + assert(comp[3] <= 3); + dup_mask |= (1U << comp[3]) + & ((1U << comp[0]) | (1U << comp[1]) | (1U << comp[2])); + this->mask.w = comp[3]; + + case 3: + assert(comp[2] <= 3); + dup_mask |= (1U << comp[2]) + & ((1U << comp[0]) | (1U << comp[1])); + this->mask.z = comp[2]; + + case 2: + assert(comp[1] <= 3); + dup_mask |= (1U << comp[1]) + & ((1U << comp[0])); + this->mask.y = comp[1]; + + case 1: + assert(comp[0] <= 3); + this->mask.x = comp[0]; + } + + this->mask.has_duplicates = dup_mask != 0; + + /* Based on the number of elements in the swizzle and the base type + * (i.e., float, int, unsigned, or bool) of the vector being swizzled, + * generate the type of the resulting value. + */ + type = glsl_type::get_instance(val->type->base_type, mask.num_components, 1); +} + +ir_swizzle::ir_swizzle(ir_rvalue *val, unsigned x, unsigned y, unsigned z, + unsigned w, unsigned count) + : ir_rvalue(ir_type_swizzle), val(val) +{ + const unsigned components[4] = { x, y, z, w }; + this->init_mask(components, count); +} + +ir_swizzle::ir_swizzle(ir_rvalue *val, const unsigned *comp, + unsigned count) + : ir_rvalue(ir_type_swizzle), val(val) +{ + this->init_mask(comp, count); +} + +ir_swizzle::ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask) + : ir_rvalue(ir_type_swizzle) +{ + this->val = val; + this->mask = mask; + this->type = glsl_type::get_instance(val->type->base_type, + mask.num_components, 1); +} + +#define X 1 +#define R 5 +#define S 9 +#define I 13 + +ir_swizzle * +ir_swizzle::create(ir_rvalue *val, const char *str, unsigned vector_length) +{ + void *ctx = ralloc_parent(val); + + /* For each possible swizzle character, this table encodes the value in + * \c idx_map that represents the 0th element of the vector. For invalid + * swizzle characters (e.g., 'k'), a special value is used that will allow + * detection of errors. + */ + static const unsigned char base_idx[26] = { + /* a b c d e f g h i j k l m */ + R, R, I, I, I, I, R, I, I, I, I, I, I, + /* n o p q r s t u v w x y z */ + I, I, S, S, R, S, S, I, I, X, X, X, X + }; + + /* Each valid swizzle character has an entry in the previous table. This + * table encodes the base index encoded in the previous table plus the actual + * index of the swizzle character. When processing swizzles, the first + * character in the string is indexed in the previous table. Each character + * in the string is indexed in this table, and the value found there has the + * value form the first table subtracted. The result must be on the range + * [0,3]. + * + * For example, the string "wzyx" will get X from the first table. Each of + * the charcaters will get X+3, X+2, X+1, and X+0 from this table. After + * subtraction, the swizzle values are { 3, 2, 1, 0 }. + * + * The string "wzrg" will get X from the first table. Each of the characters + * will get X+3, X+2, R+0, and R+1 from this table. After subtraction, the + * swizzle values are { 3, 2, 4, 5 }. Since 4 and 5 are outside the range + * [0,3], the error is detected. + */ + static const unsigned char idx_map[26] = { + /* a b c d e f g h i j k l m */ + R+3, R+2, 0, 0, 0, 0, R+1, 0, 0, 0, 0, 0, 0, + /* n o p q r s t u v w x y z */ + 0, 0, S+2, S+3, R+0, S+0, S+1, 0, 0, X+3, X+0, X+1, X+2 + }; + + int swiz_idx[4] = { 0, 0, 0, 0 }; + unsigned i; + + + /* Validate the first character in the swizzle string and look up the base + * index value as described above. + */ + if ((str[0] < 'a') || (str[0] > 'z')) + return NULL; + + const unsigned base = base_idx[str[0] - 'a']; + + + for (i = 0; (i < 4) && (str[i] != '\0'); i++) { + /* Validate the next character, and, as described above, convert it to a + * swizzle index. + */ + if ((str[i] < 'a') || (str[i] > 'z')) + return NULL; + + swiz_idx[i] = idx_map[str[i] - 'a'] - base; + if ((swiz_idx[i] < 0) || (swiz_idx[i] >= (int) vector_length)) + return NULL; + } + + if (str[i] != '\0') + return NULL; + + return new(ctx) ir_swizzle(val, swiz_idx[0], swiz_idx[1], swiz_idx[2], + swiz_idx[3], i); +} + +#undef X +#undef R +#undef S +#undef I + +ir_variable * +ir_swizzle::variable_referenced() const +{ + return this->val->variable_referenced(); +} + + +bool ir_variable::temporaries_allocate_names = false; + +const char ir_variable::tmp_name[] = "compiler_temp"; + +ir_variable::ir_variable(const struct glsl_type *type, const char *name, + ir_variable_mode mode) + : ir_instruction(ir_type_variable) +{ + this->type = type; + + if (mode == ir_var_temporary && !ir_variable::temporaries_allocate_names) + name = NULL; + + /* The ir_variable clone method may call this constructor with name set to + * tmp_name. + */ + assert(name != NULL + || mode == ir_var_temporary + || mode == ir_var_function_in + || mode == ir_var_function_out + || mode == ir_var_function_inout); + assert(name != ir_variable::tmp_name + || mode == ir_var_temporary); + if (mode == ir_var_temporary + && (name == NULL || name == ir_variable::tmp_name)) { + this->name = ir_variable::tmp_name; + } else { + this->name = ralloc_strdup(this, name); + } + + this->u.max_ifc_array_access = NULL; + + this->data.explicit_location = false; + this->data.has_initializer = false; + this->data.location = -1; + this->data.location_frac = 0; + this->data.binding = 0; + this->data.warn_extension_index = 0; + this->constant_value = NULL; + this->constant_initializer = NULL; + this->data.origin_upper_left = false; + this->data.pixel_center_integer = false; + this->data.depth_layout = ir_depth_layout_none; + this->data.used = false; + this->data.always_active_io = false; + this->data.read_only = false; + this->data.centroid = false; + this->data.sample = false; + this->data.patch = false; + this->data.invariant = false; + this->data.how_declared = ir_var_declared_normally; + this->data.mode = mode; + this->data.interpolation = INTERP_QUALIFIER_NONE; + this->data.max_array_access = 0; + this->data.offset = 0; + this->data.precision = GLSL_PRECISION_NONE; + this->data.image_read_only = false; + this->data.image_write_only = false; + this->data.image_coherent = false; + this->data.image_volatile = false; + this->data.image_restrict = false; + this->data.from_ssbo_unsized_array = false; + + if (type != NULL) { + if (type->base_type == GLSL_TYPE_SAMPLER) + this->data.read_only = true; + + if (type->is_interface()) + this->init_interface_type(type); + else if (type->without_array()->is_interface()) + this->init_interface_type(type->without_array()); + } +} + + +const char * +interpolation_string(unsigned interpolation) +{ + switch (interpolation) { + case INTERP_QUALIFIER_NONE: return "no"; + case INTERP_QUALIFIER_SMOOTH: return "smooth"; + case INTERP_QUALIFIER_FLAT: return "flat"; + case INTERP_QUALIFIER_NOPERSPECTIVE: return "noperspective"; + } + + assert(!"Should not get here."); + return ""; +} + + +glsl_interp_qualifier +ir_variable::determine_interpolation_mode(bool flat_shade) +{ + if (this->data.interpolation != INTERP_QUALIFIER_NONE) + return (glsl_interp_qualifier) this->data.interpolation; + int location = this->data.location; + bool is_gl_Color = + location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1; + if (flat_shade && is_gl_Color) + return INTERP_QUALIFIER_FLAT; + else + return INTERP_QUALIFIER_SMOOTH; +} + +const char *const ir_variable::warn_extension_table[] = { + "", + "GL_ARB_shader_stencil_export", + "GL_AMD_shader_stencil_export", +}; + +void +ir_variable::enable_extension_warning(const char *extension) +{ + for (unsigned i = 0; i < ARRAY_SIZE(warn_extension_table); i++) { + if (strcmp(warn_extension_table[i], extension) == 0) { + this->data.warn_extension_index = i; + return; + } + } + + assert(!"Should not get here."); + this->data.warn_extension_index = 0; +} + +const char * +ir_variable::get_extension_warning() const +{ + return this->data.warn_extension_index == 0 + ? NULL : warn_extension_table[this->data.warn_extension_index]; +} + +ir_function_signature::ir_function_signature(const glsl_type *return_type, + builtin_available_predicate b) + : ir_instruction(ir_type_function_signature), + return_type(return_type), is_defined(false), is_intrinsic(false), + builtin_avail(b), _function(NULL) +{ + this->origin = NULL; +} + + +bool +ir_function_signature::is_builtin() const +{ + return builtin_avail != NULL; +} + + +bool +ir_function_signature::is_builtin_available(const _mesa_glsl_parse_state *state) const +{ + /* We can't call the predicate without a state pointer, so just say that + * the signature is available. At compile time, we need the filtering, + * but also receive a valid state pointer. At link time, we're resolving + * imported built-in prototypes to their definitions, which will always + * be an exact match. So we can skip the filtering. + */ + if (state == NULL) + return true; + + assert(builtin_avail != NULL); + return builtin_avail(state); +} + + +static bool +modes_match(unsigned a, unsigned b) +{ + if (a == b) + return true; + + /* Accept "in" vs. "const in" */ + if ((a == ir_var_const_in && b == ir_var_function_in) || + (b == ir_var_const_in && a == ir_var_function_in)) + return true; + + return false; +} + + +const char * +ir_function_signature::qualifiers_match(exec_list *params) +{ + /* check that the qualifiers match. */ + foreach_two_lists(a_node, &this->parameters, b_node, params) { + ir_variable *a = (ir_variable *) a_node; + ir_variable *b = (ir_variable *) b_node; + + if (a->data.read_only != b->data.read_only || + !modes_match(a->data.mode, b->data.mode) || + a->data.interpolation != b->data.interpolation || + a->data.centroid != b->data.centroid || + a->data.sample != b->data.sample || + a->data.patch != b->data.patch || + a->data.image_read_only != b->data.image_read_only || + a->data.image_write_only != b->data.image_write_only || + a->data.image_coherent != b->data.image_coherent || + a->data.image_volatile != b->data.image_volatile || + a->data.image_restrict != b->data.image_restrict) { + + /* parameter a's qualifiers don't match */ + return a->name; + } + } + return NULL; +} + + +void +ir_function_signature::replace_parameters(exec_list *new_params) +{ + /* Destroy all of the previous parameter information. If the previous + * parameter information comes from the function prototype, it may either + * specify incorrect parameter names or not have names at all. + */ + new_params->move_nodes_to(¶meters); +} + + +ir_function::ir_function(const char *name) + : ir_instruction(ir_type_function) +{ + this->subroutine_index = -1; + this->name = ralloc_strdup(this, name); +} + + +bool +ir_function::has_user_signature() +{ + foreach_in_list(ir_function_signature, sig, &this->signatures) { + if (!sig->is_builtin()) + return true; + } + return false; +} + + +ir_rvalue * +ir_rvalue::error_value(void *mem_ctx) +{ + ir_rvalue *v = new(mem_ctx) ir_rvalue(ir_type_unset); + + v->type = glsl_type::error_type; + return v; +} + + +void +visit_exec_list(exec_list *list, ir_visitor *visitor) +{ + foreach_in_list_safe(ir_instruction, node, list) { + node->accept(visitor); + } +} + + +static void +steal_memory(ir_instruction *ir, void *new_ctx) +{ + ir_variable *var = ir->as_variable(); + ir_function *fn = ir->as_function(); + ir_constant *constant = ir->as_constant(); + if (var != NULL && var->constant_value != NULL) + steal_memory(var->constant_value, ir); + + if (var != NULL && var->constant_initializer != NULL) + steal_memory(var->constant_initializer, ir); + + if (fn != NULL && fn->subroutine_types) + ralloc_steal(new_ctx, fn->subroutine_types); + + /* The components of aggregate constants are not visited by the normal + * visitor, so steal their values by hand. + */ + if (constant != NULL) { + if (constant->type->is_record()) { + foreach_in_list(ir_constant, field, &constant->components) { + steal_memory(field, ir); + } + } else if (constant->type->is_array()) { + for (unsigned int i = 0; i < constant->type->length; i++) { + steal_memory(constant->array_elements[i], ir); + } + } + } + + ralloc_steal(new_ctx, ir); +} + + +void +reparent_ir(exec_list *list, void *mem_ctx) +{ + foreach_in_list(ir_instruction, node, list) { + visit_tree(node, steal_memory, mem_ctx); + } +} + + +static ir_rvalue * +try_min_one(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (!expr || expr->operation != ir_binop_min) + return NULL; + + if (expr->operands[0]->is_one()) + return expr->operands[1]; + + if (expr->operands[1]->is_one()) + return expr->operands[0]; + + return NULL; +} + +static ir_rvalue * +try_max_zero(ir_rvalue *ir) +{ + ir_expression *expr = ir->as_expression(); + + if (!expr || expr->operation != ir_binop_max) + return NULL; + + if (expr->operands[0]->is_zero()) + return expr->operands[1]; + + if (expr->operands[1]->is_zero()) + return expr->operands[0]; + + return NULL; +} + +ir_rvalue * +ir_rvalue::as_rvalue_to_saturate() +{ + ir_expression *expr = this->as_expression(); + + if (!expr) + return NULL; + + ir_rvalue *max_zero = try_max_zero(expr); + if (max_zero) { + return try_min_one(max_zero); + } else { + ir_rvalue *min_one = try_min_one(expr); + if (min_one) { + return try_max_zero(min_one); + } + } + + return NULL; +} + + +unsigned +vertices_per_prim(GLenum prim) +{ + switch (prim) { + case GL_POINTS: + return 1; + case GL_LINES: + return 2; + case GL_TRIANGLES: + return 3; + case GL_LINES_ADJACENCY: + return 4; + case GL_TRIANGLES_ADJACENCY: + return 6; + default: + assert(!"Bad primitive"); + return 3; + } +} + +/** + * Generate a string describing the mode of a variable + */ +const char * +mode_string(const ir_variable *var) +{ + switch (var->data.mode) { + case ir_var_auto: + return (var->data.read_only) ? "global constant" : "global variable"; + + case ir_var_uniform: + return "uniform"; + + case ir_var_shader_storage: + return "buffer"; + + case ir_var_shader_in: + return "shader input"; + + case ir_var_shader_out: + return "shader output"; + + case ir_var_function_in: + case ir_var_const_in: + return "function input"; + + case ir_var_function_out: + return "function output"; + + case ir_var_function_inout: + return "function inout"; + + case ir_var_system_value: + return "shader input"; + + case ir_var_temporary: + return "compiler temporary"; + + case ir_var_mode_count: + break; + } + + assert(!"Should not get here."); + return "invalid variable"; +} diff --git a/src/compiler/glsl/ir.h b/src/compiler/glsl/ir.h new file mode 100644 index 00000000000..bd7b5506343 --- /dev/null +++ b/src/compiler/glsl/ir.h @@ -0,0 +1,2632 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_H +#define IR_H + +#include +#include + +#include "util/ralloc.h" +#include "compiler/glsl_types.h" +#include "list.h" +#include "ir_visitor.h" +#include "ir_hierarchical_visitor.h" +#include "main/mtypes.h" + +#ifdef __cplusplus + +/** + * \defgroup IR Intermediate representation nodes + * + * @{ + */ + +/** + * Class tags + * + * Each concrete class derived from \c ir_instruction has a value in this + * enumerant. The value for the type is stored in \c ir_instruction::ir_type + * by the constructor. While using type tags is not very C++, it is extremely + * convenient. For example, during debugging you can simply inspect + * \c ir_instruction::ir_type to find out the actual type of the object. + * + * In addition, it is possible to use a switch-statement based on \c + * \c ir_instruction::ir_type to select different behavior for different object + * types. For functions that have only slight differences for several object + * types, this allows writing very straightforward, readable code. + */ +enum ir_node_type { + ir_type_dereference_array, + ir_type_dereference_record, + ir_type_dereference_variable, + ir_type_constant, + ir_type_expression, + ir_type_swizzle, + ir_type_texture, + ir_type_variable, + ir_type_assignment, + ir_type_call, + ir_type_function, + ir_type_function_signature, + ir_type_if, + ir_type_loop, + ir_type_loop_jump, + ir_type_return, + ir_type_discard, + ir_type_emit_vertex, + ir_type_end_primitive, + ir_type_barrier, + ir_type_max, /**< maximum ir_type enum number, for validation */ + ir_type_unset = ir_type_max +}; + + +/** + * Base class of all IR instructions + */ +class ir_instruction : public exec_node { +public: + enum ir_node_type ir_type; + + /** + * GCC 4.7+ and clang warn when deleting an ir_instruction unless + * there's a virtual destructor present. Because we almost + * universally use ralloc for our memory management of + * ir_instructions, the destructor doesn't need to do any work. + */ + virtual ~ir_instruction() + { + } + + /** ir_print_visitor helper for debugging. */ + void print(void) const; + void fprint(FILE *f) const; + + virtual void accept(ir_visitor *) = 0; + virtual ir_visitor_status accept(ir_hierarchical_visitor *) = 0; + virtual ir_instruction *clone(void *mem_ctx, + struct hash_table *ht) const = 0; + + bool is_rvalue() const + { + return ir_type == ir_type_dereference_array || + ir_type == ir_type_dereference_record || + ir_type == ir_type_dereference_variable || + ir_type == ir_type_constant || + ir_type == ir_type_expression || + ir_type == ir_type_swizzle || + ir_type == ir_type_texture; + } + + bool is_dereference() const + { + return ir_type == ir_type_dereference_array || + ir_type == ir_type_dereference_record || + ir_type == ir_type_dereference_variable; + } + + bool is_jump() const + { + return ir_type == ir_type_loop_jump || + ir_type == ir_type_return || + ir_type == ir_type_discard; + } + + /** + * \name IR instruction downcast functions + * + * These functions either cast the object to a derived class or return + * \c NULL if the object's type does not match the specified derived class. + * Additional downcast functions will be added as needed. + */ + /*@{*/ + #define AS_BASE(TYPE) \ + class ir_##TYPE *as_##TYPE() \ + { \ + assume(this != NULL); \ + return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ + } \ + const class ir_##TYPE *as_##TYPE() const \ + { \ + assume(this != NULL); \ + return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ + } + + AS_BASE(rvalue) + AS_BASE(dereference) + AS_BASE(jump) + #undef AS_BASE + + #define AS_CHILD(TYPE) \ + class ir_##TYPE * as_##TYPE() \ + { \ + assume(this != NULL); \ + return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \ + } \ + const class ir_##TYPE * as_##TYPE() const \ + { \ + assume(this != NULL); \ + return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \ + } + AS_CHILD(variable) + AS_CHILD(function) + AS_CHILD(dereference_array) + AS_CHILD(dereference_variable) + AS_CHILD(dereference_record) + AS_CHILD(expression) + AS_CHILD(loop) + AS_CHILD(assignment) + AS_CHILD(call) + AS_CHILD(return) + AS_CHILD(if) + AS_CHILD(swizzle) + AS_CHILD(texture) + AS_CHILD(constant) + AS_CHILD(discard) + #undef AS_CHILD + /*@}*/ + + /** + * IR equality method: Return true if the referenced instruction would + * return the same value as this one. + * + * This intended to be used for CSE and algebraic optimizations, on rvalues + * in particular. No support for other instruction types (assignments, + * jumps, calls, etc.) is planned. + */ + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + +protected: + ir_instruction(enum ir_node_type t) + : ir_type(t) + { + } + +private: + ir_instruction() + { + assert(!"Should not get here."); + } +}; + + +/** + * The base class for all "values"/expression trees. + */ +class ir_rvalue : public ir_instruction { +public: + const struct glsl_type *type; + + virtual ir_rvalue *clone(void *mem_ctx, struct hash_table *) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + ir_rvalue *as_rvalue_to_saturate(); + + virtual bool is_lvalue() const + { + return false; + } + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return NULL; + } + + + /** + * If an r-value is a reference to a whole variable, get that variable + * + * \return + * Pointer to a variable that is completely dereferenced by the r-value. If + * the r-value is not a dereference or the dereference does not access the + * entire variable (i.e., it's just one array element, struct field), \c NULL + * is returned. + */ + virtual ir_variable *whole_variable_referenced() + { + return NULL; + } + + /** + * Determine if an r-value has the value zero + * + * The base implementation of this function always returns \c false. The + * \c ir_constant class over-rides this function to return \c true \b only + * for vector and scalar types that have all elements set to the value + * zero (or \c false for booleans). + * + * \sa ir_constant::has_value, ir_rvalue::is_one, ir_rvalue::is_negative_one + */ + virtual bool is_zero() const; + + /** + * Determine if an r-value has the value one + * + * The base implementation of this function always returns \c false. The + * \c ir_constant class over-rides this function to return \c true \b only + * for vector and scalar types that have all elements set to the value + * one (or \c true for booleans). + * + * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_negative_one + */ + virtual bool is_one() const; + + /** + * Determine if an r-value has the value negative one + * + * The base implementation of this function always returns \c false. The + * \c ir_constant class over-rides this function to return \c true \b only + * for vector and scalar types that have all elements set to the value + * negative one. For boolean types, the result is always \c false. + * + * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_one + */ + virtual bool is_negative_one() const; + + /** + * Determine if an r-value is an unsigned integer constant which can be + * stored in 16 bits. + * + * \sa ir_constant::is_uint16_constant. + */ + virtual bool is_uint16_constant() const { return false; } + + /** + * Return a generic value of error_type. + * + * Allocation will be performed with 'mem_ctx' as ralloc owner. + */ + static ir_rvalue *error_value(void *mem_ctx); + +protected: + ir_rvalue(enum ir_node_type t); +}; + + +/** + * Variable storage classes + */ +enum ir_variable_mode { + ir_var_auto = 0, /**< Function local variables and globals. */ + ir_var_uniform, /**< Variable declared as a uniform. */ + ir_var_shader_storage, /**< Variable declared as an ssbo. */ + ir_var_shader_shared, /**< Variable declared as shared. */ + ir_var_shader_in, + ir_var_shader_out, + ir_var_function_in, + ir_var_function_out, + ir_var_function_inout, + ir_var_const_in, /**< "in" param that must be a constant expression */ + ir_var_system_value, /**< Ex: front-face, instance-id, etc. */ + ir_var_temporary, /**< Temporary variable generated during compilation. */ + ir_var_mode_count /**< Number of variable modes */ +}; + +/** + * Enum keeping track of how a variable was declared. For error checking of + * the gl_PerVertex redeclaration rules. + */ +enum ir_var_declaration_type { + /** + * Normal declaration (for most variables, this means an explicit + * declaration. Exception: temporaries are always implicitly declared, but + * they still use ir_var_declared_normally). + * + * Note: an ir_variable that represents a named interface block uses + * ir_var_declared_normally. + */ + ir_var_declared_normally = 0, + + /** + * Variable was explicitly declared (or re-declared) in an unnamed + * interface block. + */ + ir_var_declared_in_block, + + /** + * Variable is an implicitly declared built-in that has not been explicitly + * re-declared by the shader. + */ + ir_var_declared_implicitly, + + /** + * Variable is implicitly generated by the compiler and should not be + * visible via the API. + */ + ir_var_hidden, +}; + +/** + * \brief Layout qualifiers for gl_FragDepth. + * + * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared + * with a layout qualifier. + */ +enum ir_depth_layout { + ir_depth_layout_none, /**< No depth layout is specified. */ + ir_depth_layout_any, + ir_depth_layout_greater, + ir_depth_layout_less, + ir_depth_layout_unchanged +}; + +/** + * \brief Convert depth layout qualifier to string. + */ +const char* +depth_layout_string(ir_depth_layout layout); + +/** + * Description of built-in state associated with a uniform + * + * \sa ir_variable::state_slots + */ +struct ir_state_slot { + int tokens[5]; + int swizzle; +}; + + +/** + * Get the string value for an interpolation qualifier + * + * \return The string that would be used in a shader to specify \c + * mode will be returned. + * + * This function is used to generate error messages of the form "shader + * uses %s interpolation qualifier", so in the case where there is no + * interpolation qualifier, it returns "no". + * + * This function should only be used on a shader input or output variable. + */ +const char *interpolation_string(unsigned interpolation); + + +class ir_variable : public ir_instruction { +public: + ir_variable(const struct glsl_type *, const char *, ir_variable_mode); + + virtual ir_variable *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + + /** + * Determine how this variable should be interpolated based on its + * interpolation qualifier (if present), whether it is gl_Color or + * gl_SecondaryColor, and whether flatshading is enabled in the current GL + * state. + * + * The return value will always be either INTERP_QUALIFIER_SMOOTH, + * INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT. + */ + glsl_interp_qualifier determine_interpolation_mode(bool flat_shade); + + /** + * Determine whether or not a variable is part of a uniform or + * shader storage block. + */ + inline bool is_in_buffer_block() const + { + return (this->data.mode == ir_var_uniform || + this->data.mode == ir_var_shader_storage) && + this->interface_type != NULL; + } + + /** + * Determine whether or not a variable is part of a shader storage block. + */ + inline bool is_in_shader_storage_block() const + { + return this->data.mode == ir_var_shader_storage && + this->interface_type != NULL; + } + + /** + * Determine whether or not a variable is the declaration of an interface + * block + * + * For the first declaration below, there will be an \c ir_variable named + * "instance" whose type and whose instance_type will be the same + * \cglsl_type. For the second declaration, there will be an \c ir_variable + * named "f" whose type is float and whose instance_type is B2. + * + * "instance" is an interface instance variable, but "f" is not. + * + * uniform B1 { + * float f; + * } instance; + * + * uniform B2 { + * float f; + * }; + */ + inline bool is_interface_instance() const + { + return this->type->without_array() == this->interface_type; + } + + /** + * Set this->interface_type on a newly created variable. + */ + void init_interface_type(const struct glsl_type *type) + { + assert(this->interface_type == NULL); + this->interface_type = type; + if (this->is_interface_instance()) { + this->u.max_ifc_array_access = + rzalloc_array(this, unsigned, type->length); + } + } + + /** + * Change this->interface_type on a variable that previously had a + * different, but compatible, interface_type. This is used during linking + * to set the size of arrays in interface blocks. + */ + void change_interface_type(const struct glsl_type *type) + { + if (this->u.max_ifc_array_access != NULL) { + /* max_ifc_array_access has already been allocated, so make sure the + * new interface has the same number of fields as the old one. + */ + assert(this->interface_type->length == type->length); + } + this->interface_type = type; + } + + /** + * Change this->interface_type on a variable that previously had a + * different, and incompatible, interface_type. This is used during + * compilation to handle redeclaration of the built-in gl_PerVertex + * interface block. + */ + void reinit_interface_type(const struct glsl_type *type) + { + if (this->u.max_ifc_array_access != NULL) { +#ifndef NDEBUG + /* Redeclaring gl_PerVertex is only allowed if none of the built-ins + * it defines have been accessed yet; so it's safe to throw away the + * old max_ifc_array_access pointer, since all of its values are + * zero. + */ + for (unsigned i = 0; i < this->interface_type->length; i++) + assert(this->u.max_ifc_array_access[i] == 0); +#endif + ralloc_free(this->u.max_ifc_array_access); + this->u.max_ifc_array_access = NULL; + } + this->interface_type = NULL; + init_interface_type(type); + } + + const glsl_type *get_interface_type() const + { + return this->interface_type; + } + + /** + * Get the max_ifc_array_access pointer + * + * A "set" function is not needed because the array is dynmically allocated + * as necessary. + */ + inline unsigned *get_max_ifc_array_access() + { + assert(this->data._num_state_slots == 0); + return this->u.max_ifc_array_access; + } + + inline unsigned get_num_state_slots() const + { + assert(!this->is_interface_instance() + || this->data._num_state_slots == 0); + return this->data._num_state_slots; + } + + inline void set_num_state_slots(unsigned n) + { + assert(!this->is_interface_instance() + || n == 0); + this->data._num_state_slots = n; + } + + inline ir_state_slot *get_state_slots() + { + return this->is_interface_instance() ? NULL : this->u.state_slots; + } + + inline const ir_state_slot *get_state_slots() const + { + return this->is_interface_instance() ? NULL : this->u.state_slots; + } + + inline ir_state_slot *allocate_state_slots(unsigned n) + { + assert(!this->is_interface_instance()); + + this->u.state_slots = ralloc_array(this, ir_state_slot, n); + this->data._num_state_slots = 0; + + if (this->u.state_slots != NULL) + this->data._num_state_slots = n; + + return this->u.state_slots; + } + + inline bool is_name_ralloced() const + { + return this->name != ir_variable::tmp_name; + } + + /** + * Enable emitting extension warnings for this variable + */ + void enable_extension_warning(const char *extension); + + /** + * Get the extension warning string for this variable + * + * If warnings are not enabled, \c NULL is returned. + */ + const char *get_extension_warning() const; + + /** + * Declared type of the variable + */ + const struct glsl_type *type; + + /** + * Declared name of the variable + */ + const char *name; + + struct ir_variable_data { + + /** + * Is the variable read-only? + * + * This is set for variables declared as \c const, shader inputs, + * and uniforms. + */ + unsigned read_only:1; + unsigned centroid:1; + unsigned sample:1; + unsigned patch:1; + unsigned invariant:1; + unsigned precise:1; + + /** + * Has this variable been used for reading or writing? + * + * Several GLSL semantic checks require knowledge of whether or not a + * variable has been used. For example, it is an error to redeclare a + * variable as invariant after it has been used. + * + * This is only maintained in the ast_to_hir.cpp path, not in + * Mesa's fixed function or ARB program paths. + */ + unsigned used:1; + + /** + * Has this variable been statically assigned? + * + * This answers whether the variable was assigned in any path of + * the shader during ast_to_hir. This doesn't answer whether it is + * still written after dead code removal, nor is it maintained in + * non-ast_to_hir.cpp (GLSL parsing) paths. + */ + unsigned assigned:1; + + /** + * When separate shader programs are enabled, only input/outputs between + * the stages of a multi-stage separate program can be safely removed + * from the shader interface. Other input/outputs must remains active. + */ + unsigned always_active_io:1; + + /** + * Enum indicating how the variable was declared. See + * ir_var_declaration_type. + * + * This is used to detect certain kinds of illegal variable redeclarations. + */ + unsigned how_declared:2; + + /** + * Storage class of the variable. + * + * \sa ir_variable_mode + */ + unsigned mode:4; + + /** + * Interpolation mode for shader inputs / outputs + * + * \sa ir_variable_interpolation + */ + unsigned interpolation:2; + + /** + * \name ARB_fragment_coord_conventions + * @{ + */ + unsigned origin_upper_left:1; + unsigned pixel_center_integer:1; + /*@}*/ + + /** + * Was the location explicitly set in the shader? + * + * If the location is explicitly set in the shader, it \b cannot be changed + * by the linker or by the API (e.g., calls to \c glBindAttribLocation have + * no effect). + */ + unsigned explicit_location:1; + unsigned explicit_index:1; + + /** + * Was an initial binding explicitly set in the shader? + * + * If so, constant_value contains an integer ir_constant representing the + * initial binding point. + */ + unsigned explicit_binding:1; + + /** + * Does this variable have an initializer? + * + * This is used by the linker to cross-validiate initializers of global + * variables. + */ + unsigned has_initializer:1; + + /** + * Is this variable a generic output or input that has not yet been matched + * up to a variable in another stage of the pipeline? + * + * This is used by the linker as scratch storage while assigning locations + * to generic inputs and outputs. + */ + unsigned is_unmatched_generic_inout:1; + + /** + * If non-zero, then this variable may be packed along with other variables + * into a single varying slot, so this offset should be applied when + * accessing components. For example, an offset of 1 means that the x + * component of this variable is actually stored in component y of the + * location specified by \c location. + */ + unsigned location_frac:2; + + /** + * Layout of the matrix. Uses glsl_matrix_layout values. + */ + unsigned matrix_layout:2; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was not an array. + * + * Note that this variable and \c from_named_ifc_block_array will never + * both be non-zero. + */ + unsigned from_named_ifc_block_nonarray:1; + + /** + * Non-zero if this variable was created by lowering a named interface + * block which was an array. + * + * Note that this variable and \c from_named_ifc_block_nonarray will never + * both be non-zero. + */ + unsigned from_named_ifc_block_array:1; + + /** + * Non-zero if the variable must be a shader input. This is useful for + * constraints on function parameters. + */ + unsigned must_be_shader_input:1; + + /** + * Output index for dual source blending. + * + * \note + * The GLSL spec only allows the values 0 or 1 for the index in \b dual + * source blending. + */ + unsigned index:1; + + /** + * Precision qualifier. + * + * In desktop GLSL we do not care about precision qualifiers at all, in + * fact, the spec says that precision qualifiers are ignored. + * + * To make things easy, we make it so that this field is always + * GLSL_PRECISION_NONE on desktop shaders. This way all the variables + * have the same precision value and the checks we add in the compiler + * for this field will never break a desktop shader compile. + */ + unsigned precision:2; + + /** + * \brief Layout qualifier for gl_FragDepth. + * + * This is not equal to \c ir_depth_layout_none if and only if this + * variable is \c gl_FragDepth and a layout qualifier is specified. + */ + ir_depth_layout depth_layout:3; + + /** + * ARB_shader_image_load_store qualifiers. + */ + unsigned image_read_only:1; /**< "readonly" qualifier. */ + unsigned image_write_only:1; /**< "writeonly" qualifier. */ + unsigned image_coherent:1; + unsigned image_volatile:1; + unsigned image_restrict:1; + + /** + * ARB_shader_storage_buffer_object + */ + unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */ + + /** + * Emit a warning if this variable is accessed. + */ + private: + uint8_t warn_extension_index; + + public: + /** Image internal format if specified explicitly, otherwise GL_NONE. */ + uint16_t image_format; + + private: + /** + * Number of state slots used + * + * \note + * This could be stored in as few as 7-bits, if necessary. If it is made + * smaller, add an assertion to \c ir_variable::allocate_state_slots to + * be safe. + */ + uint16_t _num_state_slots; + + public: + /** + * Initial binding point for a sampler, atomic, or UBO. + * + * For array types, this represents the binding point for the first element. + */ + int16_t binding; + + /** + * Storage location of the base of this variable + * + * The precise meaning of this field depends on the nature of the variable. + * + * - Vertex shader input: one of the values from \c gl_vert_attrib. + * - Vertex shader output: one of the values from \c gl_varying_slot. + * - Geometry shader input: one of the values from \c gl_varying_slot. + * - Geometry shader output: one of the values from \c gl_varying_slot. + * - Fragment shader input: one of the values from \c gl_varying_slot. + * - Fragment shader output: one of the values from \c gl_frag_result. + * - Uniforms: Per-stage uniform slot number for default uniform block. + * - Uniforms: Index within the uniform block definition for UBO members. + * - Non-UBO Uniforms: explicit location until linking then reused to + * store uniform slot number. + * - Other: This field is not currently used. + * + * If the variable is a uniform, shader input, or shader output, and the + * slot has not been assigned, the value will be -1. + */ + int location; + + /** + * Vertex stream output identifier. + */ + unsigned stream; + + /** + * Location an atomic counter is stored at. + */ + unsigned offset; + + /** + * Highest element accessed with a constant expression array index + * + * Not used for non-array variables. + */ + unsigned max_array_access; + + /** + * Allow (only) ir_variable direct access private members. + */ + friend class ir_variable; + } data; + + /** + * Value assigned in the initializer of a variable declared "const" + */ + ir_constant *constant_value; + + /** + * Constant expression assigned in the initializer of the variable + * + * \warning + * This field and \c ::constant_value are distinct. Even if the two fields + * refer to constants with the same value, they must point to separate + * objects. + */ + ir_constant *constant_initializer; + +private: + static const char *const warn_extension_table[]; + + union { + /** + * For variables which satisfy the is_interface_instance() predicate, + * this points to an array of integers such that if the ith member of + * the interface block is an array, max_ifc_array_access[i] is the + * maximum array element of that member that has been accessed. If the + * ith member of the interface block is not an array, + * max_ifc_array_access[i] is unused. + * + * For variables whose type is not an interface block, this pointer is + * NULL. + */ + unsigned *max_ifc_array_access; + + /** + * Built-in state that backs this uniform + * + * Once set at variable creation, \c state_slots must remain invariant. + * + * If the variable is not a uniform, \c _num_state_slots will be zero + * and \c state_slots will be \c NULL. + */ + ir_state_slot *state_slots; + } u; + + /** + * For variables that are in an interface block or are an instance of an + * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. + * + * \sa ir_variable::location + */ + const glsl_type *interface_type; + + /** + * Name used for anonymous compiler temporaries + */ + static const char tmp_name[]; + +public: + /** + * Should the construct keep names for ir_var_temporary variables? + * + * When this global is false, names passed to the constructor for + * \c ir_var_temporary variables will be dropped. Instead, the variable will + * be named "compiler_temp". This name will be in static storage. + * + * \warning + * \b NEVER change the mode of an \c ir_var_temporary. + * + * \warning + * This variable is \b not thread-safe. It is global, \b not + * per-context. It begins life false. A context can, at some point, make + * it true. From that point on, it will be true forever. This should be + * okay since it will only be set true while debugging. + */ + static bool temporaries_allocate_names; +}; + +/** + * A function that returns whether a built-in function is available in the + * current shading language (based on version, ES or desktop, and extensions). + */ +typedef bool (*builtin_available_predicate)(const _mesa_glsl_parse_state *); + +/*@{*/ +/** + * The representation of a function instance; may be the full definition or + * simply a prototype. + */ +class ir_function_signature : public ir_instruction { + /* An ir_function_signature will be part of the list of signatures in + * an ir_function. + */ +public: + ir_function_signature(const glsl_type *return_type, + builtin_available_predicate builtin_avail = NULL); + + virtual ir_function_signature *clone(void *mem_ctx, + struct hash_table *ht) const; + ir_function_signature *clone_prototype(void *mem_ctx, + struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Attempt to evaluate this function as a constant expression, + * given a list of the actual parameters and the variable context. + * Returns NULL for non-built-ins. + */ + ir_constant *constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context); + + /** + * Get the name of the function for which this is a signature + */ + const char *function_name() const; + + /** + * Get a handle to the function for which this is a signature + * + * There is no setter function, this function returns a \c const pointer, + * and \c ir_function_signature::_function is private for a reason. The + * only way to make a connection between a function and function signature + * is via \c ir_function::add_signature. This helps ensure that certain + * invariants (i.e., a function signature is in the list of signatures for + * its \c _function) are met. + * + * \sa ir_function::add_signature + */ + inline const class ir_function *function() const + { + return this->_function; + } + + /** + * Check whether the qualifiers match between this signature's parameters + * and the supplied parameter list. If not, returns the name of the first + * parameter with mismatched qualifiers (for use in error messages). + */ + const char *qualifiers_match(exec_list *params); + + /** + * Replace the current parameter list with the given one. This is useful + * if the current information came from a prototype, and either has invalid + * or missing parameter names. + */ + void replace_parameters(exec_list *new_params); + + /** + * Function return type. + * + * \note This discards the optional precision qualifier. + */ + const struct glsl_type *return_type; + + /** + * List of ir_variable of function parameters. + * + * This represents the storage. The paramaters passed in a particular + * call will be in ir_call::actual_paramaters. + */ + struct exec_list parameters; + + /** Whether or not this function has a body (which may be empty). */ + unsigned is_defined:1; + + /** Whether or not this function signature is a built-in. */ + bool is_builtin() const; + + /** + * Whether or not this function is an intrinsic to be implemented + * by the driver. + */ + bool is_intrinsic; + + /** Whether or not a built-in is available for this shader. */ + bool is_builtin_available(const _mesa_glsl_parse_state *state) const; + + /** Body of instructions in the function. */ + struct exec_list body; + +private: + /** + * A function pointer to a predicate that answers whether a built-in + * function is available in the current shader. NULL if not a built-in. + */ + builtin_available_predicate builtin_avail; + + /** Function of which this signature is one overload. */ + class ir_function *_function; + + /** Function signature of which this one is a prototype clone */ + const ir_function_signature *origin; + + friend class ir_function; + + /** + * Helper function to run a list of instructions for constant + * expression evaluation. + * + * The hash table represents the values of the visible variables. + * There are no scoping issues because the table is indexed on + * ir_variable pointers, not variable names. + * + * Returns false if the expression is not constant, true otherwise, + * and the value in *result if result is non-NULL. + */ + bool constant_expression_evaluate_expression_list(const struct exec_list &body, + struct hash_table *variable_context, + ir_constant **result); +}; + + +/** + * Header for tracking multiple overloaded functions with the same name. + * Contains a list of ir_function_signatures representing each of the + * actual functions. + */ +class ir_function : public ir_instruction { +public: + ir_function(const char *name); + + virtual ir_function *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + void add_signature(ir_function_signature *sig) + { + sig->_function = this; + this->signatures.push_tail(sig); + } + + /** + * Find a signature that matches a set of actual parameters, taking implicit + * conversions into account. Also flags whether the match was exact. + */ + ir_function_signature *matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_param, + bool allow_builtins, + bool *match_is_exact); + + /** + * Find a signature that matches a set of actual parameters, taking implicit + * conversions into account. + */ + ir_function_signature *matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_param, + bool allow_builtins); + + /** + * Find a signature that exactly matches a set of actual parameters without + * any implicit type conversions. + */ + ir_function_signature *exact_matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_ps); + + /** + * Name of the function. + */ + const char *name; + + /** Whether or not this function has a signature that isn't a built-in. */ + bool has_user_signature(); + + /** + * List of ir_function_signature for each overloaded function with this name. + */ + struct exec_list signatures; + + /** + * is this function a subroutine type declaration + * e.g. subroutine void type1(float arg1); + */ + bool is_subroutine; + + /** + * is this function associated to a subroutine type + * e.g. subroutine (type1, type2) function_name { function_body }; + * would have num_subroutine_types 2, + * and pointers to the type1 and type2 types. + */ + int num_subroutine_types; + const struct glsl_type **subroutine_types; + + int subroutine_index; +}; + +inline const char *ir_function_signature::function_name() const +{ + return this->_function->name; +} +/*@}*/ + + +/** + * IR instruction representing high-level if-statements + */ +class ir_if : public ir_instruction { +public: + ir_if(ir_rvalue *condition) + : ir_instruction(ir_type_if), condition(condition) + { + } + + virtual ir_if *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *condition; + /** List of ir_instruction for the body of the then branch */ + exec_list then_instructions; + /** List of ir_instruction for the body of the else branch */ + exec_list else_instructions; +}; + + +/** + * IR instruction representing a high-level loop structure. + */ +class ir_loop : public ir_instruction { +public: + ir_loop(); + + virtual ir_loop *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** List of ir_instruction that make up the body of the loop. */ + exec_list body_instructions; +}; + + +class ir_assignment : public ir_instruction { +public: + ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, ir_rvalue *condition = NULL); + + /** + * Construct an assignment with an explicit write mask + * + * \note + * Since a write mask is supplied, the LHS must already be a bare + * \c ir_dereference. The cannot be any swizzles in the LHS. + */ + ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, ir_rvalue *condition, + unsigned write_mask); + + virtual ir_assignment *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Get a whole variable written by an assignment + * + * If the LHS of the assignment writes a whole variable, the variable is + * returned. Otherwise \c NULL is returned. Examples of whole-variable + * assignment are: + * + * - Assigning to a scalar + * - Assigning to all components of a vector + * - Whole array (or matrix) assignment + * - Whole structure assignment + */ + ir_variable *whole_variable_written(); + + /** + * Set the LHS of an assignment + */ + void set_lhs(ir_rvalue *lhs); + + /** + * Left-hand side of the assignment. + * + * This should be treated as read only. If you need to set the LHS of an + * assignment, use \c ir_assignment::set_lhs. + */ + ir_dereference *lhs; + + /** + * Value being assigned + */ + ir_rvalue *rhs; + + /** + * Optional condition for the assignment. + */ + ir_rvalue *condition; + + + /** + * Component mask written + * + * For non-vector types in the LHS, this field will be zero. For vector + * types, a bit will be set for each component that is written. Note that + * for \c vec2 and \c vec3 types only the lower bits will ever be set. + * + * A partially-set write mask means that each enabled channel gets + * the value from a consecutive channel of the rhs. For example, + * to write just .xyw of gl_FrontColor with color: + * + * (assign (constant bool (1)) (xyw) + * (var_ref gl_FragColor) + * (swiz xyw (var_ref color))) + */ + unsigned write_mask:4; +}; + +/* Update ir_expression::get_num_operands() and operator_strs when + * updating this list. + */ +enum ir_expression_operation { + ir_unop_bit_not, + ir_unop_logic_not, + ir_unop_neg, + ir_unop_abs, + ir_unop_sign, + ir_unop_rcp, + ir_unop_rsq, + ir_unop_sqrt, + ir_unop_exp, /**< Log base e on gentype */ + ir_unop_log, /**< Natural log on gentype */ + ir_unop_exp2, + ir_unop_log2, + ir_unop_f2i, /**< Float-to-integer conversion. */ + ir_unop_f2u, /**< Float-to-unsigned conversion. */ + ir_unop_i2f, /**< Integer-to-float conversion. */ + ir_unop_f2b, /**< Float-to-boolean conversion */ + ir_unop_b2f, /**< Boolean-to-float conversion */ + ir_unop_i2b, /**< int-to-boolean conversion */ + ir_unop_b2i, /**< Boolean-to-int conversion */ + ir_unop_u2f, /**< Unsigned-to-float conversion. */ + ir_unop_i2u, /**< Integer-to-unsigned conversion. */ + ir_unop_u2i, /**< Unsigned-to-integer conversion. */ + ir_unop_d2f, /**< Double-to-float conversion. */ + ir_unop_f2d, /**< Float-to-double conversion. */ + ir_unop_d2i, /**< Double-to-integer conversion. */ + ir_unop_i2d, /**< Integer-to-double conversion. */ + ir_unop_d2u, /**< Double-to-unsigned conversion. */ + ir_unop_u2d, /**< Unsigned-to-double conversion. */ + ir_unop_d2b, /**< Double-to-boolean conversion. */ + ir_unop_bitcast_i2f, /**< Bit-identical int-to-float "conversion" */ + ir_unop_bitcast_f2i, /**< Bit-identical float-to-int "conversion" */ + ir_unop_bitcast_u2f, /**< Bit-identical uint-to-float "conversion" */ + ir_unop_bitcast_f2u, /**< Bit-identical float-to-uint "conversion" */ + + /** + * \name Unary floating-point rounding operations. + */ + /*@{*/ + ir_unop_trunc, + ir_unop_ceil, + ir_unop_floor, + ir_unop_fract, + ir_unop_round_even, + /*@}*/ + + /** + * \name Trigonometric operations. + */ + /*@{*/ + ir_unop_sin, + ir_unop_cos, + /*@}*/ + + /** + * \name Partial derivatives. + */ + /*@{*/ + ir_unop_dFdx, + ir_unop_dFdx_coarse, + ir_unop_dFdx_fine, + ir_unop_dFdy, + ir_unop_dFdy_coarse, + ir_unop_dFdy_fine, + /*@}*/ + + /** + * \name Floating point pack and unpack operations. + */ + /*@{*/ + ir_unop_pack_snorm_2x16, + ir_unop_pack_snorm_4x8, + ir_unop_pack_unorm_2x16, + ir_unop_pack_unorm_4x8, + ir_unop_pack_half_2x16, + ir_unop_unpack_snorm_2x16, + ir_unop_unpack_snorm_4x8, + ir_unop_unpack_unorm_2x16, + ir_unop_unpack_unorm_4x8, + ir_unop_unpack_half_2x16, + /*@}*/ + + /** + * \name Lowered floating point unpacking operations. + * + * \see lower_packing_builtins_visitor::split_unpack_half_2x16 + */ + /*@{*/ + ir_unop_unpack_half_2x16_split_x, + ir_unop_unpack_half_2x16_split_y, + /*@}*/ + + /** + * \name Bit operations, part of ARB_gpu_shader5. + */ + /*@{*/ + ir_unop_bitfield_reverse, + ir_unop_bit_count, + ir_unop_find_msb, + ir_unop_find_lsb, + /*@}*/ + + ir_unop_saturate, + + /** + * \name Double packing, part of ARB_gpu_shader_fp64. + */ + /*@{*/ + ir_unop_pack_double_2x32, + ir_unop_unpack_double_2x32, + /*@}*/ + + ir_unop_frexp_sig, + ir_unop_frexp_exp, + + ir_unop_noise, + + ir_unop_subroutine_to_int, + /** + * Interpolate fs input at centroid + * + * operand0 is the fs input. + */ + ir_unop_interpolate_at_centroid, + + /** + * Ask the driver for the total size of a buffer block. + * + * operand0 is the ir_constant buffer block index in the linked shader. + */ + ir_unop_get_buffer_size, + + /** + * Calculate length of an unsized array inside a buffer block. + * This opcode is going to be replaced in a lowering pass inside + * the linker. + * + * operand0 is the unsized array's ir_value for the calculation + * of its length. + */ + ir_unop_ssbo_unsized_array_length, + + /** + * A sentinel marking the last of the unary operations. + */ + ir_last_unop = ir_unop_ssbo_unsized_array_length, + + ir_binop_add, + ir_binop_sub, + ir_binop_mul, /**< Floating-point or low 32-bit integer multiply. */ + ir_binop_imul_high, /**< Calculates the high 32-bits of a 64-bit multiply. */ + ir_binop_div, + + /** + * Returns the carry resulting from the addition of the two arguments. + */ + /*@{*/ + ir_binop_carry, + /*@}*/ + + /** + * Returns the borrow resulting from the subtraction of the second argument + * from the first argument. + */ + /*@{*/ + ir_binop_borrow, + /*@}*/ + + /** + * Takes one of two combinations of arguments: + * + * - mod(vecN, vecN) + * - mod(vecN, float) + * + * Does not take integer types. + */ + ir_binop_mod, + + /** + * \name Binary comparison operators which return a boolean vector. + * The type of both operands must be equal. + */ + /*@{*/ + ir_binop_less, + ir_binop_greater, + ir_binop_lequal, + ir_binop_gequal, + ir_binop_equal, + ir_binop_nequal, + /** + * Returns single boolean for whether all components of operands[0] + * equal the components of operands[1]. + */ + ir_binop_all_equal, + /** + * Returns single boolean for whether any component of operands[0] + * is not equal to the corresponding component of operands[1]. + */ + ir_binop_any_nequal, + /*@}*/ + + /** + * \name Bit-wise binary operations. + */ + /*@{*/ + ir_binop_lshift, + ir_binop_rshift, + ir_binop_bit_and, + ir_binop_bit_xor, + ir_binop_bit_or, + /*@}*/ + + ir_binop_logic_and, + ir_binop_logic_xor, + ir_binop_logic_or, + + ir_binop_dot, + ir_binop_min, + ir_binop_max, + + ir_binop_pow, + + /** + * \name Lowered floating point packing operations. + * + * \see lower_packing_builtins_visitor::split_pack_half_2x16 + */ + /*@{*/ + ir_binop_pack_half_2x16_split, + /*@}*/ + + /** + * Load a value the size of a given GLSL type from a uniform block. + * + * operand0 is the ir_constant uniform block index in the linked shader. + * operand1 is a byte offset within the uniform block. + */ + ir_binop_ubo_load, + + /** + * \name Multiplies a number by two to a power, part of ARB_gpu_shader5. + */ + /*@{*/ + ir_binop_ldexp, + /*@}*/ + + /** + * Extract a scalar from a vector + * + * operand0 is the vector + * operand1 is the index of the field to read from operand0 + */ + ir_binop_vector_extract, + + /** + * Interpolate fs input at offset + * + * operand0 is the fs input + * operand1 is the offset from the pixel center + */ + ir_binop_interpolate_at_offset, + + /** + * Interpolate fs input at sample position + * + * operand0 is the fs input + * operand1 is the sample ID + */ + ir_binop_interpolate_at_sample, + + /** + * A sentinel marking the last of the binary operations. + */ + ir_last_binop = ir_binop_interpolate_at_sample, + + /** + * \name Fused floating-point multiply-add, part of ARB_gpu_shader5. + */ + /*@{*/ + ir_triop_fma, + /*@}*/ + + ir_triop_lrp, + + /** + * \name Conditional Select + * + * A vector conditional select instruction (like ?:, but operating per- + * component on vectors). + * + * \see lower_instructions_visitor::ldexp_to_arith + */ + /*@{*/ + ir_triop_csel, + /*@}*/ + + ir_triop_bitfield_extract, + + /** + * Generate a value with one field of a vector changed + * + * operand0 is the vector + * operand1 is the value to write into the vector result + * operand2 is the index in operand0 to be modified + */ + ir_triop_vector_insert, + + /** + * A sentinel marking the last of the ternary operations. + */ + ir_last_triop = ir_triop_vector_insert, + + ir_quadop_bitfield_insert, + + ir_quadop_vector, + + /** + * A sentinel marking the last of the ternary operations. + */ + ir_last_quadop = ir_quadop_vector, + + /** + * A sentinel marking the last of all operations. + */ + ir_last_opcode = ir_quadop_vector +}; + +class ir_expression : public ir_rvalue { +public: + ir_expression(int op, const struct glsl_type *type, + ir_rvalue *op0, ir_rvalue *op1 = NULL, + ir_rvalue *op2 = NULL, ir_rvalue *op3 = NULL); + + /** + * Constructor for unary operation expressions + */ + ir_expression(int op, ir_rvalue *); + + /** + * Constructor for binary operation expressions + */ + ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1); + + /** + * Constructor for ternary operation expressions + */ + ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const; + + /** + * Attempt to constant-fold the expression + * + * The "variable_context" hash table links ir_variable * to ir_constant * + * that represent the variables' values. \c NULL represents an empty + * context. + * + * If the expression cannot be constant folded, this method will return + * \c NULL. + */ + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + /** + * Determine the number of operands used by an expression + */ + static unsigned int get_num_operands(ir_expression_operation); + + /** + * Determine the number of operands used by an expression + */ + unsigned int get_num_operands() const + { + return (this->operation == ir_quadop_vector) + ? this->type->vector_elements : get_num_operands(operation); + } + + /** + * Return whether the expression operates on vectors horizontally. + */ + bool is_horizontal() const + { + return operation == ir_binop_all_equal || + operation == ir_binop_any_nequal || + operation == ir_binop_dot || + operation == ir_binop_vector_extract || + operation == ir_triop_vector_insert || + operation == ir_quadop_vector; + } + + /** + * Return a string representing this expression's operator. + */ + const char *operator_string(); + + /** + * Return a string representing this expression's operator. + */ + static const char *operator_string(ir_expression_operation); + + + /** + * Do a reverse-lookup to translate the given string into an operator. + */ + static ir_expression_operation get_operator(const char *); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual ir_variable *variable_referenced() const; + + ir_expression_operation operation; + ir_rvalue *operands[4]; +}; + + +/** + * HIR instruction representing a high-level function call, containing a list + * of parameters and returning a value in the supplied temporary. + */ +class ir_call : public ir_instruction { +public: + ir_call(ir_function_signature *callee, + ir_dereference_variable *return_deref, + exec_list *actual_parameters) + : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(NULL), array_idx(NULL) + { + assert(callee->return_type != NULL); + actual_parameters->move_nodes_to(& this->actual_parameters); + this->use_builtin = callee->is_builtin(); + } + + ir_call(ir_function_signature *callee, + ir_dereference_variable *return_deref, + exec_list *actual_parameters, + ir_variable *var, ir_rvalue *array_idx) + : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(var), array_idx(array_idx) + { + assert(callee->return_type != NULL); + actual_parameters->move_nodes_to(& this->actual_parameters); + this->use_builtin = callee->is_builtin(); + } + + virtual ir_call *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Get the name of the function being called. + */ + const char *callee_name() const + { + return callee->function_name(); + } + + /** + * Generates an inline version of the function before @ir, + * storing the return value in return_deref. + */ + void generate_inline(ir_instruction *ir); + + /** + * Storage for the function's return value. + * This must be NULL if the return type is void. + */ + ir_dereference_variable *return_deref; + + /** + * The specific function signature being called. + */ + ir_function_signature *callee; + + /* List of ir_rvalue of paramaters passed in this call. */ + exec_list actual_parameters; + + /** Should this call only bind to a built-in function? */ + bool use_builtin; + + /* + * ARB_shader_subroutine support - + * the subroutine uniform variable and array index + * rvalue to be used in the lowering pass later. + */ + ir_variable *sub_var; + ir_rvalue *array_idx; +}; + + +/** + * \name Jump-like IR instructions. + * + * These include \c break, \c continue, \c return, and \c discard. + */ +/*@{*/ +class ir_jump : public ir_instruction { +protected: + ir_jump(enum ir_node_type t) + : ir_instruction(t) + { + } +}; + +class ir_return : public ir_jump { +public: + ir_return() + : ir_jump(ir_type_return), value(NULL) + { + } + + ir_return(ir_rvalue *value) + : ir_jump(ir_type_return), value(value) + { + } + + virtual ir_return *clone(void *mem_ctx, struct hash_table *) const; + + ir_rvalue *get_value() const + { + return value; + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *value; +}; + + +/** + * Jump instructions used inside loops + * + * These include \c break and \c continue. The \c break within a loop is + * different from the \c break within a switch-statement. + * + * \sa ir_switch_jump + */ +class ir_loop_jump : public ir_jump { +public: + enum jump_mode { + jump_break, + jump_continue + }; + + ir_loop_jump(jump_mode mode) + : ir_jump(ir_type_loop_jump) + { + this->mode = mode; + } + + virtual ir_loop_jump *clone(void *mem_ctx, struct hash_table *) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + bool is_break() const + { + return mode == jump_break; + } + + bool is_continue() const + { + return mode == jump_continue; + } + + /** Mode selector for the jump instruction. */ + enum jump_mode mode; +}; + +/** + * IR instruction representing discard statements. + */ +class ir_discard : public ir_jump { +public: + ir_discard() + : ir_jump(ir_type_discard) + { + this->condition = NULL; + } + + ir_discard(ir_rvalue *cond) + : ir_jump(ir_type_discard) + { + this->condition = cond; + } + + virtual ir_discard *clone(void *mem_ctx, struct hash_table *ht) const; + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *condition; +}; +/*@}*/ + + +/** + * Texture sampling opcodes used in ir_texture + */ +enum ir_texture_opcode { + ir_tex, /**< Regular texture look-up */ + ir_txb, /**< Texture look-up with LOD bias */ + ir_txl, /**< Texture look-up with explicit LOD */ + ir_txd, /**< Texture look-up with partial derivatvies */ + ir_txf, /**< Texel fetch with explicit LOD */ + ir_txf_ms, /**< Multisample texture fetch */ + ir_txs, /**< Texture size */ + ir_lod, /**< Texture lod query */ + ir_tg4, /**< Texture gather */ + ir_query_levels, /**< Texture levels query */ + ir_texture_samples, /**< Texture samples query */ + ir_samples_identical, /**< Query whether all samples are definitely identical. */ +}; + + +/** + * IR instruction to sample a texture + * + * The specific form of the IR instruction depends on the \c mode value + * selected from \c ir_texture_opcodes. In the printed IR, these will + * appear as: + * + * Texel offset (0 or an expression) + * | Projection divisor + * | | Shadow comparitor + * | | | + * v v v + * (tex 0 1 ( )) + * (txb 0 1 ( ) ) + * (txl 0 1 ( ) ) + * (txd 0 1 ( ) (dPdx dPdy)) + * (txf 0 ) + * (txf_ms + * ) + * (txs ) + * (lod ) + * (tg4 ) + * (query_levels ) + * (samples_identical ) + */ +class ir_texture : public ir_rvalue { +public: + ir_texture(enum ir_texture_opcode op) + : ir_rvalue(ir_type_texture), + op(op), sampler(NULL), coordinate(NULL), projector(NULL), + shadow_comparitor(NULL), offset(NULL) + { + memset(&lod_info, 0, sizeof(lod_info)); + } + + virtual ir_texture *clone(void *mem_ctx, struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Return a string representing the ir_texture_opcode. + */ + const char *opcode_string(); + + /** Set the sampler and type. */ + void set_sampler(ir_dereference *sampler, const glsl_type *type); + + /** + * Do a reverse-lookup to translate a string into an ir_texture_opcode. + */ + static ir_texture_opcode get_opcode(const char *); + + enum ir_texture_opcode op; + + /** Sampler to use for the texture access. */ + ir_dereference *sampler; + + /** Texture coordinate to sample */ + ir_rvalue *coordinate; + + /** + * Value used for projective divide. + * + * If there is no projective divide (the common case), this will be + * \c NULL. Optimization passes should check for this to point to a constant + * of 1.0 and replace that with \c NULL. + */ + ir_rvalue *projector; + + /** + * Coordinate used for comparison on shadow look-ups. + * + * If there is no shadow comparison, this will be \c NULL. For the + * \c ir_txf opcode, this *must* be \c NULL. + */ + ir_rvalue *shadow_comparitor; + + /** Texel offset. */ + ir_rvalue *offset; + + union { + ir_rvalue *lod; /**< Floating point LOD */ + ir_rvalue *bias; /**< Floating point LOD bias */ + ir_rvalue *sample_index; /**< MSAA sample index */ + ir_rvalue *component; /**< Gather component selector */ + struct { + ir_rvalue *dPdx; /**< Partial derivative of coordinate wrt X */ + ir_rvalue *dPdy; /**< Partial derivative of coordinate wrt Y */ + } grad; + } lod_info; +}; + + +struct ir_swizzle_mask { + unsigned x:2; + unsigned y:2; + unsigned z:2; + unsigned w:2; + + /** + * Number of components in the swizzle. + */ + unsigned num_components:3; + + /** + * Does the swizzle contain duplicate components? + * + * L-value swizzles cannot contain duplicate components. + */ + unsigned has_duplicates:1; +}; + + +class ir_swizzle : public ir_rvalue { +public: + ir_swizzle(ir_rvalue *, unsigned x, unsigned y, unsigned z, unsigned w, + unsigned count); + + ir_swizzle(ir_rvalue *val, const unsigned *components, unsigned count); + + ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask); + + virtual ir_swizzle *clone(void *mem_ctx, struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + /** + * Construct an ir_swizzle from the textual representation. Can fail. + */ + static ir_swizzle *create(ir_rvalue *, const char *, unsigned vector_length); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + bool is_lvalue() const + { + return val->is_lvalue() && !mask.has_duplicates; + } + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const; + + ir_rvalue *val; + ir_swizzle_mask mask; + +private: + /** + * Initialize the mask component of a swizzle + * + * This is used by the \c ir_swizzle constructors. + */ + void init_mask(const unsigned *components, unsigned count); +}; + + +class ir_dereference : public ir_rvalue { +public: + virtual ir_dereference *clone(void *mem_ctx, struct hash_table *) const = 0; + + bool is_lvalue() const; + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const = 0; + +protected: + ir_dereference(enum ir_node_type t) + : ir_rvalue(t) + { + } +}; + + +class ir_dereference_variable : public ir_dereference { +public: + ir_dereference_variable(ir_variable *var); + + virtual ir_dereference_variable *clone(void *mem_ctx, + struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return this->var; + } + + virtual ir_variable *whole_variable_referenced() + { + /* ir_dereference_variable objects always dereference the entire + * variable. However, if this dereference is dereferenced by anything + * else, the complete deferefernce chain is not a whole-variable + * dereference. This method should only be called on the top most + * ir_rvalue in a dereference chain. + */ + return this->var; + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + /** + * Object being dereferenced. + */ + ir_variable *var; +}; + + +class ir_dereference_array : public ir_dereference { +public: + ir_dereference_array(ir_rvalue *value, ir_rvalue *array_index); + + ir_dereference_array(ir_variable *var, ir_rvalue *array_index); + + virtual ir_dereference_array *clone(void *mem_ctx, + struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return this->array->variable_referenced(); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *array; + ir_rvalue *array_index; + +private: + void set_array(ir_rvalue *value); +}; + + +class ir_dereference_record : public ir_dereference { +public: + ir_dereference_record(ir_rvalue *value, const char *field); + + ir_dereference_record(ir_variable *var, const char *field); + + virtual ir_dereference_record *clone(void *mem_ctx, + struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + /** + * Get the variable that is ultimately referenced by an r-value + */ + virtual ir_variable *variable_referenced() const + { + return this->record->variable_referenced(); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + ir_rvalue *record; + const char *field; +}; + + +/** + * Data stored in an ir_constant + */ +union ir_constant_data { + unsigned u[16]; + int i[16]; + float f[16]; + bool b[16]; + double d[16]; +}; + + +class ir_constant : public ir_rvalue { +public: + ir_constant(const struct glsl_type *type, const ir_constant_data *data); + ir_constant(bool b, unsigned vector_elements=1); + ir_constant(unsigned int u, unsigned vector_elements=1); + ir_constant(int i, unsigned vector_elements=1); + ir_constant(float f, unsigned vector_elements=1); + ir_constant(double d, unsigned vector_elements=1); + + /** + * Construct an ir_constant from a list of ir_constant values + */ + ir_constant(const struct glsl_type *type, exec_list *values); + + /** + * Construct an ir_constant from a scalar component of another ir_constant + * + * The new \c ir_constant inherits the type of the component from the + * source constant. + * + * \note + * In the case of a matrix constant, the new constant is a scalar, \b not + * a vector. + */ + ir_constant(const ir_constant *c, unsigned i); + + /** + * Return a new ir_constant of the specified type containing all zeros. + */ + static ir_constant *zero(void *mem_ctx, const glsl_type *type); + + virtual ir_constant *clone(void *mem_ctx, struct hash_table *) const; + + virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + virtual bool equals(const ir_instruction *ir, + enum ir_node_type ignore = ir_type_unset) const; + + /** + * Get a particular component of a constant as a specific type + * + * This is useful, for example, to get a value from an integer constant + * as a float or bool. This appears frequently when constructors are + * called with all constant parameters. + */ + /*@{*/ + bool get_bool_component(unsigned i) const; + float get_float_component(unsigned i) const; + double get_double_component(unsigned i) const; + int get_int_component(unsigned i) const; + unsigned get_uint_component(unsigned i) const; + /*@}*/ + + ir_constant *get_array_element(unsigned i) const; + + ir_constant *get_record_field(const char *name); + + /** + * Copy the values on another constant at a given offset. + * + * The offset is ignored for array or struct copies, it's only for + * scalars or vectors into vectors or matrices. + * + * With identical types on both sides and zero offset it's clone() + * without creating a new object. + */ + + void copy_offset(ir_constant *src, int offset); + + /** + * Copy the values on another constant at a given offset and + * following an assign-like mask. + * + * The mask is ignored for scalars. + * + * Note that this function only handles what assign can handle, + * i.e. at most a vector as source and a column of a matrix as + * destination. + */ + + void copy_masked_offset(ir_constant *src, int offset, unsigned int mask); + + /** + * Determine whether a constant has the same value as another constant + * + * \sa ir_constant::is_zero, ir_constant::is_one, + * ir_constant::is_negative_one + */ + bool has_value(const ir_constant *) const; + + /** + * Return true if this ir_constant represents the given value. + * + * For vectors, this checks that each component is the given value. + */ + virtual bool is_value(float f, int i) const; + virtual bool is_zero() const; + virtual bool is_one() const; + virtual bool is_negative_one() const; + + /** + * Return true for constants that could be stored as 16-bit unsigned values. + * + * Note that this will return true even for signed integer ir_constants, as + * long as the value is non-negative and fits in 16-bits. + */ + virtual bool is_uint16_constant() const; + + /** + * Value of the constant. + * + * The field used to back the values supplied by the constant is determined + * by the type associated with the \c ir_instruction. Constants may be + * scalars, vectors, or matrices. + */ + union ir_constant_data value; + + /* Array elements */ + ir_constant **array_elements; + + /* Structure fields */ + exec_list components; + +private: + /** + * Parameterless constructor only used by the clone method + */ + ir_constant(void); +}; + +/** + * IR instruction to emit a vertex in a geometry shader. + */ +class ir_emit_vertex : public ir_instruction { +public: + ir_emit_vertex(ir_rvalue *stream) + : ir_instruction(ir_type_emit_vertex), + stream(stream) + { + assert(stream); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_emit_vertex *clone(void *mem_ctx, struct hash_table *ht) const + { + return new(mem_ctx) ir_emit_vertex(this->stream->clone(mem_ctx, ht)); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + int stream_id() const + { + return stream->as_constant()->value.i[0]; + } + + ir_rvalue *stream; +}; + +/** + * IR instruction to complete the current primitive and start a new one in a + * geometry shader. + */ +class ir_end_primitive : public ir_instruction { +public: + ir_end_primitive(ir_rvalue *stream) + : ir_instruction(ir_type_end_primitive), + stream(stream) + { + assert(stream); + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_end_primitive *clone(void *mem_ctx, struct hash_table *ht) const + { + return new(mem_ctx) ir_end_primitive(this->stream->clone(mem_ctx, ht)); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); + + int stream_id() const + { + return stream->as_constant()->value.i[0]; + } + + ir_rvalue *stream; +}; + +/** + * IR instruction for tessellation control and compute shader barrier. + */ +class ir_barrier : public ir_instruction { +public: + ir_barrier() + : ir_instruction(ir_type_barrier) + { + } + + virtual void accept(ir_visitor *v) + { + v->visit(this); + } + + virtual ir_barrier *clone(void *mem_ctx, struct hash_table *) const + { + return new(mem_ctx) ir_barrier(); + } + + virtual ir_visitor_status accept(ir_hierarchical_visitor *); +}; + +/*@}*/ + +/** + * Apply a visitor to each IR node in a list + */ +void +visit_exec_list(exec_list *list, ir_visitor *visitor); + +/** + * Validate invariants on each IR node in a list + */ +void validate_ir_tree(exec_list *instructions); + +struct _mesa_glsl_parse_state; +struct gl_shader_program; + +/** + * Detect whether an unlinked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c _mesa_glsl_error will be called to emit error messages for each function + * that is in the recursion cycle. + */ +void +detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, + exec_list *instructions); + +/** + * Detect whether a linked shader contains static recursion + * + * If the list of instructions is determined to contain static recursion, + * \c link_error_printf will be called to emit error messages for each function + * that is in the recursion cycle. In addition, + * \c gl_shader_program::LinkStatus will be set to false. + */ +void +detect_recursion_linked(struct gl_shader_program *prog, + exec_list *instructions); + +/** + * Make a clone of each IR instruction in a list + * + * \param in List of IR instructions that are to be cloned + * \param out List to hold the cloned instructions + */ +void +clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in); + +extern void +_mesa_glsl_initialize_variables(exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +extern void +_mesa_glsl_initialize_derived_variables(gl_shader *shader); + +extern void +_mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state); + +extern void +_mesa_glsl_initialize_builtin_functions(); + +extern ir_function_signature * +_mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, + const char *name, exec_list *actual_parameters); + +extern ir_function * +_mesa_glsl_find_builtin_function_by_name(const char *name); + +extern gl_shader * +_mesa_glsl_get_builtin_function_shader(void); + +extern ir_function_signature * +_mesa_get_main_function_signature(gl_shader *sh); + +extern void +_mesa_glsl_release_functions(void); + +extern void +_mesa_glsl_release_builtin_functions(void); + +extern void +reparent_ir(exec_list *list, void *mem_ctx); + +struct glsl_symbol_table; + +extern void +import_prototypes(const exec_list *source, exec_list *dest, + struct glsl_symbol_table *symbols, void *mem_ctx); + +extern bool +ir_has_call(ir_instruction *ir); + +extern void +do_set_program_inouts(exec_list *instructions, struct gl_program *prog, + gl_shader_stage shader_stage); + +extern char * +prototype_string(const glsl_type *return_type, const char *name, + exec_list *parameters); + +const char * +mode_string(const ir_variable *var); + +/** + * Built-in / reserved GL variables names start with "gl_" + */ +static inline bool +is_gl_identifier(const char *s) +{ + return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_'; +} + +extern "C" { +#endif /* __cplusplus */ + +extern void _mesa_print_ir(FILE *f, struct exec_list *instructions, + struct _mesa_glsl_parse_state *state); + +extern void +fprint_ir(FILE *f, const void *instruction); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +unsigned +vertices_per_prim(GLenum prim); + +#endif /* IR_H */ diff --git a/src/compiler/glsl/ir_basic_block.cpp b/src/compiler/glsl/ir_basic_block.cpp new file mode 100644 index 00000000000..15481aa47f6 --- /dev/null +++ b/src/compiler/glsl/ir_basic_block.cpp @@ -0,0 +1,99 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_basic_block.cpp + * + * Basic block analysis of instruction streams. + */ + +#include "ir.h" +#include "ir_basic_block.h" + +/** + * Calls a user function for every basic block in the instruction stream. + * + * Basic block analysis is pretty easy in our IR thanks to the lack of + * unstructured control flow. We've got: + * + * ir_loop (for () {}, while () {}, do {} while ()) + * ir_loop_jump ( + * ir_if () {} + * ir_return + * ir_call() + * + * Note that the basic blocks returned by this don't encompass all + * operations performed by the program -- for example, if conditions + * don't get returned, nor do the assignments that will be generated + * for ir_call parameters. + */ +void call_for_basic_blocks(exec_list *instructions, + void (*callback)(ir_instruction *first, + ir_instruction *last, + void *data), + void *data) +{ + ir_instruction *leader = NULL; + ir_instruction *last = NULL; + + foreach_in_list(ir_instruction, ir, instructions) { + ir_if *ir_if; + ir_loop *ir_loop; + ir_function *ir_function; + + if (!leader) + leader = ir; + + if ((ir_if = ir->as_if())) { + callback(leader, ir, data); + leader = NULL; + + call_for_basic_blocks(&ir_if->then_instructions, callback, data); + call_for_basic_blocks(&ir_if->else_instructions, callback, data); + } else if ((ir_loop = ir->as_loop())) { + callback(leader, ir, data); + leader = NULL; + call_for_basic_blocks(&ir_loop->body_instructions, callback, data); + } else if (ir->as_jump() || ir->as_call()) { + callback(leader, ir, data); + leader = NULL; + } else if ((ir_function = ir->as_function())) { + /* A function definition doesn't interrupt our basic block + * since execution doesn't go into it. We should process the + * bodies of its signatures for BBs, though. + * + * Note that we miss an opportunity for producing more + * maximal BBs between the instructions that precede main() + * and the body of main(). Perhaps those instructions ought + * to live inside of main(). + */ + foreach_in_list(ir_function_signature, ir_sig, &ir_function->signatures) { + call_for_basic_blocks(&ir_sig->body, callback, data); + } + } + last = ir; + } + if (leader) { + callback(leader, last, data); + } +} diff --git a/src/compiler/glsl/ir_basic_block.h b/src/compiler/glsl/ir_basic_block.h new file mode 100644 index 00000000000..dbd678b5c4f --- /dev/null +++ b/src/compiler/glsl/ir_basic_block.h @@ -0,0 +1,28 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +void call_for_basic_blocks(exec_list *instructions, + void (*callback)(ir_instruction *first, + ir_instruction *last, + void *data), + void *data); diff --git a/src/compiler/glsl/ir_builder.cpp b/src/compiler/glsl/ir_builder.cpp new file mode 100644 index 00000000000..c9cf1240dfe --- /dev/null +++ b/src/compiler/glsl/ir_builder.cpp @@ -0,0 +1,612 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "ir_builder.h" +#include "program/prog_instruction.h" + +using namespace ir_builder; + +namespace ir_builder { + +void +ir_factory::emit(ir_instruction *ir) +{ + instructions->push_tail(ir); +} + +ir_variable * +ir_factory::make_temp(const glsl_type *type, const char *name) +{ + ir_variable *var; + + var = new(mem_ctx) ir_variable(type, name, ir_var_temporary); + emit(var); + + return var; +} + +ir_assignment * +assign(deref lhs, operand rhs, operand condition, int writemask) +{ + void *mem_ctx = ralloc_parent(lhs.val); + + ir_assignment *assign = new(mem_ctx) ir_assignment(lhs.val, + rhs.val, + condition.val, + writemask); + + return assign; +} + +ir_assignment * +assign(deref lhs, operand rhs) +{ + return assign(lhs, rhs, (1 << lhs.val->type->vector_elements) - 1); +} + +ir_assignment * +assign(deref lhs, operand rhs, int writemask) +{ + return assign(lhs, rhs, (ir_rvalue *) NULL, writemask); +} + +ir_assignment * +assign(deref lhs, operand rhs, operand condition) +{ + return assign(lhs, rhs, condition, (1 << lhs.val->type->vector_elements) - 1); +} + +ir_return * +ret(operand retval) +{ + void *mem_ctx = ralloc_parent(retval.val); + return new(mem_ctx) ir_return(retval.val); +} + +ir_swizzle * +swizzle(operand a, int swizzle, int components) +{ + void *mem_ctx = ralloc_parent(a.val); + + return new(mem_ctx) ir_swizzle(a.val, + GET_SWZ(swizzle, 0), + GET_SWZ(swizzle, 1), + GET_SWZ(swizzle, 2), + GET_SWZ(swizzle, 3), + components); +} + +ir_swizzle * +swizzle_for_size(operand a, unsigned components) +{ + void *mem_ctx = ralloc_parent(a.val); + + if (a.val->type->vector_elements < components) + components = a.val->type->vector_elements; + + unsigned s[4] = { 0, 1, 2, 3 }; + for (int i = components; i < 4; i++) + s[i] = components - 1; + + return new(mem_ctx) ir_swizzle(a.val, s, components); +} + +ir_swizzle * +swizzle_xxxx(operand a) +{ + return swizzle(a, SWIZZLE_XXXX, 4); +} + +ir_swizzle * +swizzle_yyyy(operand a) +{ + return swizzle(a, SWIZZLE_YYYY, 4); +} + +ir_swizzle * +swizzle_zzzz(operand a) +{ + return swizzle(a, SWIZZLE_ZZZZ, 4); +} + +ir_swizzle * +swizzle_wwww(operand a) +{ + return swizzle(a, SWIZZLE_WWWW, 4); +} + +ir_swizzle * +swizzle_x(operand a) +{ + return swizzle(a, SWIZZLE_XXXX, 1); +} + +ir_swizzle * +swizzle_y(operand a) +{ + return swizzle(a, SWIZZLE_YYYY, 1); +} + +ir_swizzle * +swizzle_z(operand a) +{ + return swizzle(a, SWIZZLE_ZZZZ, 1); +} + +ir_swizzle * +swizzle_w(operand a) +{ + return swizzle(a, SWIZZLE_WWWW, 1); +} + +ir_swizzle * +swizzle_xy(operand a) +{ + return swizzle(a, SWIZZLE_XYZW, 2); +} + +ir_swizzle * +swizzle_xyz(operand a) +{ + return swizzle(a, SWIZZLE_XYZW, 3); +} + +ir_swizzle * +swizzle_xyzw(operand a) +{ + return swizzle(a, SWIZZLE_XYZW, 4); +} + +ir_expression * +expr(ir_expression_operation op, operand a) +{ + void *mem_ctx = ralloc_parent(a.val); + + return new(mem_ctx) ir_expression(op, a.val); +} + +ir_expression * +expr(ir_expression_operation op, operand a, operand b) +{ + void *mem_ctx = ralloc_parent(a.val); + + return new(mem_ctx) ir_expression(op, a.val, b.val); +} + +ir_expression * +expr(ir_expression_operation op, operand a, operand b, operand c) +{ + void *mem_ctx = ralloc_parent(a.val); + + return new(mem_ctx) ir_expression(op, a.val, b.val, c.val); +} + +ir_expression *add(operand a, operand b) +{ + return expr(ir_binop_add, a, b); +} + +ir_expression *sub(operand a, operand b) +{ + return expr(ir_binop_sub, a, b); +} + +ir_expression *min2(operand a, operand b) +{ + return expr(ir_binop_min, a, b); +} + +ir_expression *max2(operand a, operand b) +{ + return expr(ir_binop_max, a, b); +} + +ir_expression *mul(operand a, operand b) +{ + return expr(ir_binop_mul, a, b); +} + +ir_expression *imul_high(operand a, operand b) +{ + return expr(ir_binop_imul_high, a, b); +} + +ir_expression *div(operand a, operand b) +{ + return expr(ir_binop_div, a, b); +} + +ir_expression *carry(operand a, operand b) +{ + return expr(ir_binop_carry, a, b); +} + +ir_expression *borrow(operand a, operand b) +{ + return expr(ir_binop_borrow, a, b); +} + +ir_expression *trunc(operand a) +{ + return expr(ir_unop_trunc, a); +} + +ir_expression *round_even(operand a) +{ + return expr(ir_unop_round_even, a); +} + +ir_expression *fract(operand a) +{ + return expr(ir_unop_fract, a); +} + +/* dot for vectors, mul for scalars */ +ir_expression *dot(operand a, operand b) +{ + assert(a.val->type == b.val->type); + + if (a.val->type->vector_elements == 1) + return expr(ir_binop_mul, a, b); + + return expr(ir_binop_dot, a, b); +} + +ir_expression* +clamp(operand a, operand b, operand c) +{ + return expr(ir_binop_min, expr(ir_binop_max, a, b), c); +} + +ir_expression * +saturate(operand a) +{ + return expr(ir_unop_saturate, a); +} + +ir_expression * +abs(operand a) +{ + return expr(ir_unop_abs, a); +} + +ir_expression * +neg(operand a) +{ + return expr(ir_unop_neg, a); +} + +ir_expression * +sin(operand a) +{ + return expr(ir_unop_sin, a); +} + +ir_expression * +cos(operand a) +{ + return expr(ir_unop_cos, a); +} + +ir_expression * +exp(operand a) +{ + return expr(ir_unop_exp, a); +} + +ir_expression * +rsq(operand a) +{ + return expr(ir_unop_rsq, a); +} + +ir_expression * +sqrt(operand a) +{ + return expr(ir_unop_sqrt, a); +} + +ir_expression * +log(operand a) +{ + return expr(ir_unop_log, a); +} + +ir_expression * +sign(operand a) +{ + return expr(ir_unop_sign, a); +} + +ir_expression * +subr_to_int(operand a) +{ + return expr(ir_unop_subroutine_to_int, a); +} + +ir_expression* +equal(operand a, operand b) +{ + return expr(ir_binop_equal, a, b); +} + +ir_expression* +nequal(operand a, operand b) +{ + return expr(ir_binop_nequal, a, b); +} + +ir_expression* +less(operand a, operand b) +{ + return expr(ir_binop_less, a, b); +} + +ir_expression* +greater(operand a, operand b) +{ + return expr(ir_binop_greater, a, b); +} + +ir_expression* +lequal(operand a, operand b) +{ + return expr(ir_binop_lequal, a, b); +} + +ir_expression* +gequal(operand a, operand b) +{ + return expr(ir_binop_gequal, a, b); +} + +ir_expression* +logic_not(operand a) +{ + return expr(ir_unop_logic_not, a); +} + +ir_expression* +logic_and(operand a, operand b) +{ + return expr(ir_binop_logic_and, a, b); +} + +ir_expression* +logic_or(operand a, operand b) +{ + return expr(ir_binop_logic_or, a, b); +} + +ir_expression* +bit_not(operand a) +{ + return expr(ir_unop_bit_not, a); +} + +ir_expression* +bit_and(operand a, operand b) +{ + return expr(ir_binop_bit_and, a, b); +} + +ir_expression* +bit_or(operand a, operand b) +{ + return expr(ir_binop_bit_or, a, b); +} + +ir_expression* +lshift(operand a, operand b) +{ + return expr(ir_binop_lshift, a, b); +} + +ir_expression* +rshift(operand a, operand b) +{ + return expr(ir_binop_rshift, a, b); +} + +ir_expression* +f2i(operand a) +{ + return expr(ir_unop_f2i, a); +} + +ir_expression* +bitcast_f2i(operand a) +{ + return expr(ir_unop_bitcast_f2i, a); +} + +ir_expression* +i2f(operand a) +{ + return expr(ir_unop_i2f, a); +} + +ir_expression* +bitcast_i2f(operand a) +{ + return expr(ir_unop_bitcast_i2f, a); +} + +ir_expression* +i2u(operand a) +{ + return expr(ir_unop_i2u, a); +} + +ir_expression* +u2i(operand a) +{ + return expr(ir_unop_u2i, a); +} + +ir_expression* +f2u(operand a) +{ + return expr(ir_unop_f2u, a); +} + +ir_expression* +bitcast_f2u(operand a) +{ + return expr(ir_unop_bitcast_f2u, a); +} + +ir_expression* +u2f(operand a) +{ + return expr(ir_unop_u2f, a); +} + +ir_expression* +bitcast_u2f(operand a) +{ + return expr(ir_unop_bitcast_u2f, a); +} + +ir_expression* +i2b(operand a) +{ + return expr(ir_unop_i2b, a); +} + +ir_expression* +b2i(operand a) +{ + return expr(ir_unop_b2i, a); +} + +ir_expression * +f2b(operand a) +{ + return expr(ir_unop_f2b, a); +} + +ir_expression * +b2f(operand a) +{ + return expr(ir_unop_b2f, a); +} + +ir_expression * +interpolate_at_centroid(operand a) +{ + return expr(ir_unop_interpolate_at_centroid, a); +} + +ir_expression * +interpolate_at_offset(operand a, operand b) +{ + return expr(ir_binop_interpolate_at_offset, a, b); +} + +ir_expression * +interpolate_at_sample(operand a, operand b) +{ + return expr(ir_binop_interpolate_at_sample, a, b); +} + +ir_expression * +f2d(operand a) +{ + return expr(ir_unop_f2d, a); +} + +ir_expression * +i2d(operand a) +{ + return expr(ir_unop_i2d, a); +} + +ir_expression * +u2d(operand a) +{ + return expr(ir_unop_u2d, a); +} + +ir_expression * +fma(operand a, operand b, operand c) +{ + return expr(ir_triop_fma, a, b, c); +} + +ir_expression * +lrp(operand x, operand y, operand a) +{ + return expr(ir_triop_lrp, x, y, a); +} + +ir_expression * +csel(operand a, operand b, operand c) +{ + return expr(ir_triop_csel, a, b, c); +} + +ir_expression * +bitfield_extract(operand a, operand b, operand c) +{ + return expr(ir_triop_bitfield_extract, a, b, c); +} + +ir_expression * +bitfield_insert(operand a, operand b, operand c, operand d) +{ + void *mem_ctx = ralloc_parent(a.val); + return new(mem_ctx) ir_expression(ir_quadop_bitfield_insert, + a.val->type, a.val, b.val, c.val, d.val); +} + +ir_if* +if_tree(operand condition, + ir_instruction *then_branch) +{ + assert(then_branch != NULL); + + void *mem_ctx = ralloc_parent(condition.val); + + ir_if *result = new(mem_ctx) ir_if(condition.val); + result->then_instructions.push_tail(then_branch); + return result; +} + +ir_if* +if_tree(operand condition, + ir_instruction *then_branch, + ir_instruction *else_branch) +{ + assert(then_branch != NULL); + assert(else_branch != NULL); + + void *mem_ctx = ralloc_parent(condition.val); + + ir_if *result = new(mem_ctx) ir_if(condition.val); + result->then_instructions.push_tail(then_branch); + result->else_instructions.push_tail(else_branch); + return result; +} + +} /* namespace ir_builder */ diff --git a/src/compiler/glsl/ir_builder.h b/src/compiler/glsl/ir_builder.h new file mode 100644 index 00000000000..b483ebf6269 --- /dev/null +++ b/src/compiler/glsl/ir_builder.h @@ -0,0 +1,230 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "ir.h" + +namespace ir_builder { + +#ifndef WRITEMASK_X +enum writemask { + WRITEMASK_X = 0x1, + WRITEMASK_Y = 0x2, + WRITEMASK_Z = 0x4, + WRITEMASK_W = 0x8, +}; +#endif + +/** + * This little class exists to let the helper expression generators + * take either an ir_rvalue * or an ir_variable * to be automatically + * dereferenced, while still providing compile-time type checking. + * + * You don't have to explicitly call the constructor -- C++ will see + * that you passed an ir_variable, and silently call the + * operand(ir_variable *var) constructor behind your back. + */ +class operand { +public: + operand(ir_rvalue *val) + : val(val) + { + } + + operand(ir_variable *var) + { + void *mem_ctx = ralloc_parent(var); + val = new(mem_ctx) ir_dereference_variable(var); + } + + ir_rvalue *val; +}; + +/** Automatic generator for ir_dereference_variable on assignment LHS. + * + * \sa operand + */ +class deref { +public: + deref(ir_dereference *val) + : val(val) + { + } + + deref(ir_variable *var) + { + void *mem_ctx = ralloc_parent(var); + val = new(mem_ctx) ir_dereference_variable(var); + } + + + ir_dereference *val; +}; + +class ir_factory { +public: + ir_factory(exec_list *instructions = NULL, void *mem_ctx = NULL) + : instructions(instructions), + mem_ctx(mem_ctx) + { + return; + } + + void emit(ir_instruction *ir); + ir_variable *make_temp(const glsl_type *type, const char *name); + + ir_constant* + constant(float f) + { + return new(mem_ctx) ir_constant(f); + } + + ir_constant* + constant(int i) + { + return new(mem_ctx) ir_constant(i); + } + + ir_constant* + constant(unsigned u) + { + return new(mem_ctx) ir_constant(u); + } + + ir_constant* + constant(bool b) + { + return new(mem_ctx) ir_constant(b); + } + + exec_list *instructions; + void *mem_ctx; +}; + +ir_assignment *assign(deref lhs, operand rhs); +ir_assignment *assign(deref lhs, operand rhs, int writemask); +ir_assignment *assign(deref lhs, operand rhs, operand condition); +ir_assignment *assign(deref lhs, operand rhs, operand condition, int writemask); + +ir_return *ret(operand retval); + +ir_expression *expr(ir_expression_operation op, operand a); +ir_expression *expr(ir_expression_operation op, operand a, operand b); +ir_expression *expr(ir_expression_operation op, operand a, operand b, operand c); +ir_expression *add(operand a, operand b); +ir_expression *sub(operand a, operand b); +ir_expression *mul(operand a, operand b); +ir_expression *imul_high(operand a, operand b); +ir_expression *div(operand a, operand b); +ir_expression *carry(operand a, operand b); +ir_expression *borrow(operand a, operand b); +ir_expression *trunc(operand a); +ir_expression *round_even(operand a); +ir_expression *fract(operand a); +ir_expression *dot(operand a, operand b); +ir_expression *clamp(operand a, operand b, operand c); +ir_expression *saturate(operand a); +ir_expression *abs(operand a); +ir_expression *neg(operand a); +ir_expression *sin(operand a); +ir_expression *cos(operand a); +ir_expression *exp(operand a); +ir_expression *rsq(operand a); +ir_expression *sqrt(operand a); +ir_expression *log(operand a); +ir_expression *sign(operand a); + +ir_expression *subr_to_int(operand a); +ir_expression *equal(operand a, operand b); +ir_expression *nequal(operand a, operand b); +ir_expression *less(operand a, operand b); +ir_expression *greater(operand a, operand b); +ir_expression *lequal(operand a, operand b); +ir_expression *gequal(operand a, operand b); + +ir_expression *logic_not(operand a); +ir_expression *logic_and(operand a, operand b); +ir_expression *logic_or(operand a, operand b); + +ir_expression *bit_not(operand a); +ir_expression *bit_or(operand a, operand b); +ir_expression *bit_and(operand a, operand b); +ir_expression *lshift(operand a, operand b); +ir_expression *rshift(operand a, operand b); + +ir_expression *f2i(operand a); +ir_expression *bitcast_f2i(operand a); +ir_expression *i2f(operand a); +ir_expression *bitcast_i2f(operand a); +ir_expression *f2u(operand a); +ir_expression *bitcast_f2u(operand a); +ir_expression *u2f(operand a); +ir_expression *bitcast_u2f(operand a); +ir_expression *i2u(operand a); +ir_expression *u2i(operand a); +ir_expression *b2i(operand a); +ir_expression *i2b(operand a); +ir_expression *f2b(operand a); +ir_expression *b2f(operand a); + +ir_expression *f2d(operand a); +ir_expression *i2d(operand a); +ir_expression *u2d(operand a); + +ir_expression *min2(operand a, operand b); +ir_expression *max2(operand a, operand b); + +ir_expression *interpolate_at_centroid(operand a); +ir_expression *interpolate_at_offset(operand a, operand b); +ir_expression *interpolate_at_sample(operand a, operand b); + +ir_expression *fma(operand a, operand b, operand c); +ir_expression *lrp(operand x, operand y, operand a); +ir_expression *csel(operand a, operand b, operand c); +ir_expression *bitfield_extract(operand a, operand b, operand c); +ir_expression *bitfield_insert(operand a, operand b, operand c, operand d); + +ir_swizzle *swizzle(operand a, int swizzle, int components); +/** + * Swizzle away later components, but preserve the ordering. + */ +ir_swizzle *swizzle_for_size(operand a, unsigned components); + +ir_swizzle *swizzle_xxxx(operand a); +ir_swizzle *swizzle_yyyy(operand a); +ir_swizzle *swizzle_zzzz(operand a); +ir_swizzle *swizzle_wwww(operand a); +ir_swizzle *swizzle_x(operand a); +ir_swizzle *swizzle_y(operand a); +ir_swizzle *swizzle_z(operand a); +ir_swizzle *swizzle_w(operand a); +ir_swizzle *swizzle_xy(operand a); +ir_swizzle *swizzle_xyz(operand a); +ir_swizzle *swizzle_xyzw(operand a); + +ir_if *if_tree(operand condition, + ir_instruction *then_branch); +ir_if *if_tree(operand condition, + ir_instruction *then_branch, + ir_instruction *else_branch); + +} /* namespace ir_builder */ diff --git a/src/compiler/glsl/ir_clone.cpp b/src/compiler/glsl/ir_clone.cpp new file mode 100644 index 00000000000..0965b0d3719 --- /dev/null +++ b/src/compiler/glsl/ir_clone.cpp @@ -0,0 +1,440 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include "main/compiler.h" +#include "ir.h" +#include "compiler/glsl_types.h" +#include "program/hash_table.h" + +ir_rvalue * +ir_rvalue::clone(void *mem_ctx, struct hash_table *) const +{ + /* The only possible instantiation is the generic error value. */ + return error_value(mem_ctx); +} + +/** + * Duplicate an IR variable + */ +ir_variable * +ir_variable::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_variable *var = new(mem_ctx) ir_variable(this->type, this->name, + (ir_variable_mode) this->data.mode); + + var->data.max_array_access = this->data.max_array_access; + if (this->is_interface_instance()) { + var->u.max_ifc_array_access = + rzalloc_array(var, unsigned, this->interface_type->length); + memcpy(var->u.max_ifc_array_access, this->u.max_ifc_array_access, + this->interface_type->length * sizeof(unsigned)); + } + + memcpy(&var->data, &this->data, sizeof(var->data)); + + if (this->get_state_slots()) { + ir_state_slot *s = var->allocate_state_slots(this->get_num_state_slots()); + memcpy(s, this->get_state_slots(), + sizeof(s[0]) * var->get_num_state_slots()); + } + + if (this->constant_value) + var->constant_value = this->constant_value->clone(mem_ctx, ht); + + if (this->constant_initializer) + var->constant_initializer = + this->constant_initializer->clone(mem_ctx, ht); + + var->interface_type = this->interface_type; + + if (ht) { + hash_table_insert(ht, var, (void *)const_cast(this)); + } + + return var; +} + +ir_swizzle * +ir_swizzle::clone(void *mem_ctx, struct hash_table *ht) const +{ + return new(mem_ctx) ir_swizzle(this->val->clone(mem_ctx, ht), this->mask); +} + +ir_return * +ir_return::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_rvalue *new_value = NULL; + + if (this->value) + new_value = this->value->clone(mem_ctx, ht); + + return new(mem_ctx) ir_return(new_value); +} + +ir_discard * +ir_discard::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_rvalue *new_condition = NULL; + + if (this->condition != NULL) + new_condition = this->condition->clone(mem_ctx, ht); + + return new(mem_ctx) ir_discard(new_condition); +} + +ir_loop_jump * +ir_loop_jump::clone(void *mem_ctx, struct hash_table *ht) const +{ + (void)ht; + + return new(mem_ctx) ir_loop_jump(this->mode); +} + +ir_if * +ir_if::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_if *new_if = new(mem_ctx) ir_if(this->condition->clone(mem_ctx, ht)); + + foreach_in_list(ir_instruction, ir, &this->then_instructions) { + new_if->then_instructions.push_tail(ir->clone(mem_ctx, ht)); + } + + foreach_in_list(ir_instruction, ir, &this->else_instructions) { + new_if->else_instructions.push_tail(ir->clone(mem_ctx, ht)); + } + + return new_if; +} + +ir_loop * +ir_loop::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_loop *new_loop = new(mem_ctx) ir_loop(); + + foreach_in_list(ir_instruction, ir, &this->body_instructions) { + new_loop->body_instructions.push_tail(ir->clone(mem_ctx, ht)); + } + + return new_loop; +} + +ir_call * +ir_call::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_dereference_variable *new_return_ref = NULL; + if (this->return_deref != NULL) + new_return_ref = this->return_deref->clone(mem_ctx, ht); + + exec_list new_parameters; + + foreach_in_list(ir_instruction, ir, &this->actual_parameters) { + new_parameters.push_tail(ir->clone(mem_ctx, ht)); + } + + return new(mem_ctx) ir_call(this->callee, new_return_ref, &new_parameters); +} + +ir_expression * +ir_expression::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_rvalue *op[ARRAY_SIZE(this->operands)] = { NULL, }; + unsigned int i; + + for (i = 0; i < get_num_operands(); i++) { + op[i] = this->operands[i]->clone(mem_ctx, ht); + } + + return new(mem_ctx) ir_expression(this->operation, this->type, + op[0], op[1], op[2], op[3]); +} + +ir_dereference_variable * +ir_dereference_variable::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_variable *new_var; + + if (ht) { + new_var = (ir_variable *)hash_table_find(ht, this->var); + if (!new_var) + new_var = this->var; + } else { + new_var = this->var; + } + + return new(mem_ctx) ir_dereference_variable(new_var); +} + +ir_dereference_array * +ir_dereference_array::clone(void *mem_ctx, struct hash_table *ht) const +{ + return new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, ht), + this->array_index->clone(mem_ctx, + ht)); +} + +ir_dereference_record * +ir_dereference_record::clone(void *mem_ctx, struct hash_table *ht) const +{ + return new(mem_ctx) ir_dereference_record(this->record->clone(mem_ctx, ht), + this->field); +} + +ir_texture * +ir_texture::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_texture *new_tex = new(mem_ctx) ir_texture(this->op); + new_tex->type = this->type; + + new_tex->sampler = this->sampler->clone(mem_ctx, ht); + if (this->coordinate) + new_tex->coordinate = this->coordinate->clone(mem_ctx, ht); + if (this->projector) + new_tex->projector = this->projector->clone(mem_ctx, ht); + if (this->shadow_comparitor) { + new_tex->shadow_comparitor = this->shadow_comparitor->clone(mem_ctx, ht); + } + + if (this->offset != NULL) + new_tex->offset = this->offset->clone(mem_ctx, ht); + + switch (this->op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht); + break; + case ir_txl: + case ir_txf: + case ir_txs: + new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht); + break; + case ir_txf_ms: + new_tex->lod_info.sample_index = this->lod_info.sample_index->clone(mem_ctx, ht); + break; + case ir_txd: + new_tex->lod_info.grad.dPdx = this->lod_info.grad.dPdx->clone(mem_ctx, ht); + new_tex->lod_info.grad.dPdy = this->lod_info.grad.dPdy->clone(mem_ctx, ht); + break; + case ir_tg4: + new_tex->lod_info.component = this->lod_info.component->clone(mem_ctx, ht); + break; + } + + return new_tex; +} + +ir_assignment * +ir_assignment::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_rvalue *new_condition = NULL; + + if (this->condition) + new_condition = this->condition->clone(mem_ctx, ht); + + ir_assignment *cloned = + new(mem_ctx) ir_assignment(this->lhs->clone(mem_ctx, ht), + this->rhs->clone(mem_ctx, ht), + new_condition); + cloned->write_mask = this->write_mask; + return cloned; +} + +ir_function * +ir_function::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_function *copy = new(mem_ctx) ir_function(this->name); + + copy->is_subroutine = this->is_subroutine; + copy->subroutine_index = this->subroutine_index; + copy->num_subroutine_types = this->num_subroutine_types; + copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types); + for (int i = 0; i < copy->num_subroutine_types; i++) + copy->subroutine_types[i] = this->subroutine_types[i]; + + foreach_in_list(const ir_function_signature, sig, &this->signatures) { + ir_function_signature *sig_copy = sig->clone(mem_ctx, ht); + copy->add_signature(sig_copy); + + if (ht != NULL) + hash_table_insert(ht, sig_copy, + (void *)const_cast(sig)); + } + + return copy; +} + +ir_function_signature * +ir_function_signature::clone(void *mem_ctx, struct hash_table *ht) const +{ + ir_function_signature *copy = this->clone_prototype(mem_ctx, ht); + + copy->is_defined = this->is_defined; + + /* Clone the instruction list. + */ + foreach_in_list(const ir_instruction, inst, &this->body) { + ir_instruction *const inst_copy = inst->clone(mem_ctx, ht); + copy->body.push_tail(inst_copy); + } + + return copy; +} + +ir_function_signature * +ir_function_signature::clone_prototype(void *mem_ctx, struct hash_table *ht) const +{ + ir_function_signature *copy = + new(mem_ctx) ir_function_signature(this->return_type); + + copy->is_defined = false; + copy->builtin_avail = this->builtin_avail; + copy->origin = this; + + /* Clone the parameter list, but NOT the body. + */ + foreach_in_list(const ir_variable, param, &this->parameters) { + assert(const_cast(param)->as_variable() != NULL); + + ir_variable *const param_copy = param->clone(mem_ctx, ht); + copy->parameters.push_tail(param_copy); + } + + return copy; +} + +ir_constant * +ir_constant::clone(void *mem_ctx, struct hash_table *ht) const +{ + (void)ht; + + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + return new(mem_ctx) ir_constant(this->type, &this->value); + + case GLSL_TYPE_STRUCT: { + ir_constant *c = new(mem_ctx) ir_constant; + + c->type = this->type; + for (exec_node *node = this->components.head + ; !node->is_tail_sentinel() + ; node = node->next) { + ir_constant *const orig = (ir_constant *) node; + + c->components.push_tail(orig->clone(mem_ctx, NULL)); + } + + return c; + } + + case GLSL_TYPE_ARRAY: { + ir_constant *c = new(mem_ctx) ir_constant; + + c->type = this->type; + c->array_elements = ralloc_array(c, ir_constant *, this->type->length); + for (unsigned i = 0; i < this->type->length; i++) { + c->array_elements[i] = this->array_elements[i]->clone(mem_ctx, NULL); + } + return c; + } + + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_INTERFACE: + assert(!"Should not get here."); + break; + } + + return NULL; +} + + +class fixup_ir_call_visitor : public ir_hierarchical_visitor { +public: + fixup_ir_call_visitor(struct hash_table *ht) + { + this->ht = ht; + } + + virtual ir_visitor_status visit_enter(ir_call *ir) + { + /* Try to find the function signature referenced by the ir_call in the + * table. If it is found, replace it with the value from the table. + */ + ir_function_signature *sig = + (ir_function_signature *) hash_table_find(this->ht, ir->callee); + if (sig != NULL) + ir->callee = sig; + + /* Since this may be used before function call parameters are flattened, + * the children also need to be processed. + */ + return visit_continue; + } + +private: + struct hash_table *ht; +}; + + +static void +fixup_function_calls(struct hash_table *ht, exec_list *instructions) +{ + fixup_ir_call_visitor v(ht); + v.run(instructions); +} + + +void +clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in) +{ + struct hash_table *ht = + hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); + + foreach_in_list(const ir_instruction, original, in) { + ir_instruction *copy = original->clone(mem_ctx, ht); + + out->push_tail(copy); + } + + /* Make a pass over the cloned tree to fix up ir_call nodes to point to the + * cloned ir_function_signature nodes. This cannot be done automatically + * during cloning because the ir_call might be a forward reference (i.e., + * the function signature that it references may not have been cloned yet). + */ + fixup_function_calls(ht, out); + + hash_table_dtor(ht); +} diff --git a/src/compiler/glsl/ir_constant_expression.cpp b/src/compiler/glsl/ir_constant_expression.cpp new file mode 100644 index 00000000000..fbbf7794da6 --- /dev/null +++ b/src/compiler/glsl/ir_constant_expression.cpp @@ -0,0 +1,2092 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_constant_expression.cpp + * Evaluate and process constant valued expressions + * + * In GLSL, constant valued expressions are used in several places. These + * must be processed and evaluated very early in the compilation process. + * + * * Sizes of arrays + * * Initializers for uniforms + * * Initializers for \c const variables + */ + +#include +#include "main/core.h" /* for MAX2, MIN2, CLAMP */ +#include "util/rounding.h" /* for _mesa_roundeven */ +#include "util/half_float.h" +#include "ir.h" +#include "compiler/glsl_types.h" +#include "program/hash_table.h" + +static float +dot_f(ir_constant *op0, ir_constant *op1) +{ + assert(op0->type->is_float() && op1->type->is_float()); + + float result = 0; + for (unsigned c = 0; c < op0->type->components(); c++) + result += op0->value.f[c] * op1->value.f[c]; + + return result; +} + +static double +dot_d(ir_constant *op0, ir_constant *op1) +{ + assert(op0->type->is_double() && op1->type->is_double()); + + double result = 0; + for (unsigned c = 0; c < op0->type->components(); c++) + result += op0->value.d[c] * op1->value.d[c]; + + return result; +} + +/* This method is the only one supported by gcc. Unions in particular + * are iffy, and read-through-converted-pointer is killed by strict + * aliasing. OTOH, the compiler sees through the memcpy, so the + * resulting asm is reasonable. + */ +static float +bitcast_u2f(unsigned int u) +{ + assert(sizeof(float) == sizeof(unsigned int)); + float f; + memcpy(&f, &u, sizeof(f)); + return f; +} + +static unsigned int +bitcast_f2u(float f) +{ + assert(sizeof(float) == sizeof(unsigned int)); + unsigned int u; + memcpy(&u, &f, sizeof(f)); + return u; +} + +/** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef uint8_t +(*pack_1x8_func_t)(float); + +/** + * Evaluate one component of a floating-point 2x16 unpacking function. + */ +typedef uint16_t +(*pack_1x16_func_t)(float); + +/** + * Evaluate one component of a floating-point 4x8 unpacking function. + */ +typedef float +(*unpack_1x8_func_t)(uint8_t); + +/** + * Evaluate one component of a floating-point 2x16 unpacking function. + */ +typedef float +(*unpack_1x16_func_t)(uint16_t); + +/** + * Evaluate a 2x16 floating-point packing function. + */ +static uint32_t +pack_2x16(pack_1x16_func_t pack_1x16, + float x, float y) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packSnorm2x16 + * ------------- + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * The specifications for the other packing functions contain similar + * language. + */ + uint32_t u = 0; + u |= ((uint32_t) pack_1x16(x) << 0); + u |= ((uint32_t) pack_1x16(y) << 16); + return u; +} + +/** + * Evaluate a 4x8 floating-point packing function. + */ +static uint32_t +pack_4x8(pack_1x8_func_t pack_1x8, + float x, float y, float z, float w) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packSnorm4x8 + * ------------ + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * The specifications for the other packing functions contain similar + * language. + */ + uint32_t u = 0; + u |= ((uint32_t) pack_1x8(x) << 0); + u |= ((uint32_t) pack_1x8(y) << 8); + u |= ((uint32_t) pack_1x8(z) << 16); + u |= ((uint32_t) pack_1x8(w) << 24); + return u; +} + +/** + * Evaluate a 2x16 floating-point unpacking function. + */ +static void +unpack_2x16(unpack_1x16_func_t unpack_1x16, + uint32_t u, + float *x, float *y) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackSnorm2x16 + * --------------- + * The first component of the returned vector will be extracted from + * the least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * The specifications for the other unpacking functions contain similar + * language. + */ + *x = unpack_1x16((uint16_t) (u & 0xffff)); + *y = unpack_1x16((uint16_t) (u >> 16)); +} + +/** + * Evaluate a 4x8 floating-point unpacking function. + */ +static void +unpack_4x8(unpack_1x8_func_t unpack_1x8, uint32_t u, + float *x, float *y, float *z, float *w) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackSnorm4x8 + * -------------- + * The first component of the returned vector will be extracted from + * the least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * The specifications for the other unpacking functions contain similar + * language. + */ + *x = unpack_1x8((uint8_t) (u & 0xff)); + *y = unpack_1x8((uint8_t) (u >> 8)); + *z = unpack_1x8((uint8_t) (u >> 16)); + *w = unpack_1x8((uint8_t) (u >> 24)); +} + +/** + * Evaluate one component of packSnorm4x8. + */ +static uint8_t +pack_snorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packSnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + */ + return (uint8_t) + _mesa_lroundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); +} + +/** + * Evaluate one component of packSnorm2x16. + */ +static uint16_t +pack_snorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packSnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) + */ + return (uint16_t) + _mesa_lroundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); +} + +/** + * Evaluate one component of unpackSnorm4x8. + */ +static float +unpack_snorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackSnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + */ + return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component of unpackSnorm2x16. + */ +static float +unpack_snorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackSnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) + */ + return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); +} + +/** + * Evaluate one component packUnorm4x8. + */ +static uint8_t +pack_unorm_1x8(float x) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * packUnorm4x8 + * ------------ + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + */ + return (uint8_t) (int) _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); +} + +/** + * Evaluate one component packUnorm2x16. + */ +static uint16_t +pack_unorm_1x16(float x) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * packUnorm2x16 + * ------------- + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) + */ + return (uint16_t) (int) + _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); +} + +/** + * Evaluate one component of unpackUnorm4x8. + */ +static float +unpack_unorm_1x8(uint8_t u) +{ + /* From section 8.4 of the GLSL 4.30 spec: + * + * unpackUnorm4x8 + * -------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + */ + return (float) u / 255.0f; +} + +/** + * Evaluate one component of unpackUnorm2x16. + */ +static float +unpack_unorm_1x16(uint16_t u) +{ + /* From section 8.4 of the GLSL ES 3.00 spec: + * + * unpackUnorm2x16 + * --------------- + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm2x16: f / 65535.0 + */ + return (float) u / 65535.0f; +} + +/** + * Evaluate one component of packHalf2x16. + */ +static uint16_t +pack_half_1x16(float x) +{ + return _mesa_float_to_half(x); +} + +/** + * Evaluate one component of unpackHalf2x16. + */ +static float +unpack_half_1x16(uint16_t u) +{ + return _mesa_half_to_float(u); +} + +/** + * Get the constant that is ultimately referenced by an r-value, in a constant + * expression evaluation context. + * + * The offset is used when the reference is to a specific column of a matrix. + */ +static bool +constant_referenced(const ir_dereference *deref, + struct hash_table *variable_context, + ir_constant *&store, int &offset) +{ + store = NULL; + offset = 0; + + if (variable_context == NULL) + return false; + + switch (deref->ir_type) { + case ir_type_dereference_array: { + const ir_dereference_array *const da = + (const ir_dereference_array *) deref; + + ir_constant *const index_c = + da->array_index->constant_expression_value(variable_context); + + if (!index_c || !index_c->type->is_scalar() || !index_c->type->is_integer()) + break; + + const int index = index_c->type->base_type == GLSL_TYPE_INT ? + index_c->get_int_component(0) : + index_c->get_uint_component(0); + + ir_constant *substore; + int suboffset; + + const ir_dereference *const deref = da->array->as_dereference(); + if (!deref) + break; + + if (!constant_referenced(deref, variable_context, substore, suboffset)) + break; + + const glsl_type *const vt = da->array->type; + if (vt->is_array()) { + store = substore->get_array_element(index); + offset = 0; + } else if (vt->is_matrix()) { + store = substore; + offset = index * vt->vector_elements; + } else if (vt->is_vector()) { + store = substore; + offset = suboffset + index; + } + + break; + } + + case ir_type_dereference_record: { + const ir_dereference_record *const dr = + (const ir_dereference_record *) deref; + + const ir_dereference *const deref = dr->record->as_dereference(); + if (!deref) + break; + + ir_constant *substore; + int suboffset; + + if (!constant_referenced(deref, variable_context, substore, suboffset)) + break; + + /* Since we're dropping it on the floor... + */ + assert(suboffset == 0); + + store = substore->get_record_field(dr->field); + break; + } + + case ir_type_dereference_variable: { + const ir_dereference_variable *const dv = + (const ir_dereference_variable *) deref; + + store = (ir_constant *) hash_table_find(variable_context, dv->var); + break; + } + + default: + assert(!"Should not get here."); + break; + } + + return store != NULL; +} + + +ir_constant * +ir_rvalue::constant_expression_value(struct hash_table *) +{ + assert(this->type->is_error()); + return NULL; +} + +ir_constant * +ir_expression::constant_expression_value(struct hash_table *variable_context) +{ + if (this->type->is_error()) + return NULL; + + ir_constant *op[ARRAY_SIZE(this->operands)] = { NULL, }; + ir_constant_data data; + + memset(&data, 0, sizeof(data)); + + for (unsigned operand = 0; operand < this->get_num_operands(); operand++) { + op[operand] = this->operands[operand]->constant_expression_value(variable_context); + if (!op[operand]) + return NULL; + } + + if (op[1] != NULL) + switch (this->operation) { + case ir_binop_lshift: + case ir_binop_rshift: + case ir_binop_ldexp: + case ir_binop_interpolate_at_offset: + case ir_binop_interpolate_at_sample: + case ir_binop_vector_extract: + case ir_triop_csel: + case ir_triop_bitfield_extract: + break; + + default: + assert(op[0]->type->base_type == op[1]->type->base_type); + break; + } + + bool op0_scalar = op[0]->type->is_scalar(); + bool op1_scalar = op[1] != NULL && op[1]->type->is_scalar(); + + /* When iterating over a vector or matrix's components, we want to increase + * the loop counter. However, for scalars, we want to stay at 0. + */ + unsigned c0_inc = op0_scalar ? 0 : 1; + unsigned c1_inc = op1_scalar ? 0 : 1; + unsigned components; + if (op1_scalar || !op[1]) { + components = op[0]->type->components(); + } else { + components = op[1]->type->components(); + } + + void *ctx = ralloc_parent(this); + + /* Handle array operations here, rather than below. */ + if (op[0]->type->is_array()) { + assert(op[1] != NULL && op[1]->type->is_array()); + switch (this->operation) { + case ir_binop_all_equal: + return new(ctx) ir_constant(op[0]->has_value(op[1])); + case ir_binop_any_nequal: + return new(ctx) ir_constant(!op[0]->has_value(op[1])); + default: + break; + } + return NULL; + } + + switch (this->operation) { + case ir_unop_bit_not: + switch (op[0]->type->base_type) { + case GLSL_TYPE_INT: + for (unsigned c = 0; c < components; c++) + data.i[c] = ~ op[0]->value.i[c]; + break; + case GLSL_TYPE_UINT: + for (unsigned c = 0; c < components; c++) + data.u[c] = ~ op[0]->value.u[c]; + break; + default: + assert(0); + } + break; + + case ir_unop_logic_not: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) + data.b[c] = !op[0]->value.b[c]; + break; + + case ir_unop_f2i: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = (int) op[0]->value.f[c]; + } + break; + case ir_unop_f2u: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = (unsigned) op[0]->value.f[c]; + } + break; + case ir_unop_i2f: + assert(op[0]->type->base_type == GLSL_TYPE_INT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = (float) op[0]->value.i[c]; + } + break; + case ir_unop_u2f: + assert(op[0]->type->base_type == GLSL_TYPE_UINT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = (float) op[0]->value.u[c]; + } + break; + case ir_unop_b2f: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = op[0]->value.b[c] ? 1.0F : 0.0F; + } + break; + case ir_unop_f2b: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.b[c] = op[0]->value.f[c] != 0.0F ? true : false; + } + break; + case ir_unop_b2i: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.u[c] = op[0]->value.b[c] ? 1 : 0; + } + break; + case ir_unop_i2b: + assert(op[0]->type->is_integer()); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.b[c] = op[0]->value.u[c] ? true : false; + } + break; + case ir_unop_u2i: + assert(op[0]->type->base_type == GLSL_TYPE_UINT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = op[0]->value.u[c]; + } + break; + case ir_unop_i2u: + assert(op[0]->type->base_type == GLSL_TYPE_INT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.u[c] = op[0]->value.i[c]; + } + break; + case ir_unop_bitcast_i2f: + assert(op[0]->type->base_type == GLSL_TYPE_INT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = bitcast_u2f(op[0]->value.i[c]); + } + break; + case ir_unop_bitcast_f2i: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = bitcast_f2u(op[0]->value.f[c]); + } + break; + case ir_unop_bitcast_u2f: + assert(op[0]->type->base_type == GLSL_TYPE_UINT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = bitcast_u2f(op[0]->value.u[c]); + } + break; + case ir_unop_bitcast_f2u: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.u[c] = bitcast_f2u(op[0]->value.f[c]); + } + break; + case ir_unop_d2f: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = op[0]->value.d[c]; + } + break; + case ir_unop_f2d: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.d[c] = op[0]->value.f[c]; + } + break; + case ir_unop_d2i: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.i[c] = op[0]->value.d[c]; + } + break; + case ir_unop_i2d: + assert(op[0]->type->base_type == GLSL_TYPE_INT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.d[c] = op[0]->value.i[c]; + } + break; + case ir_unop_d2u: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.u[c] = op[0]->value.d[c]; + } + break; + case ir_unop_u2d: + assert(op[0]->type->base_type == GLSL_TYPE_UINT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.d[c] = op[0]->value.u[c]; + } + break; + case ir_unop_d2b: + assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.b[c] = op[0]->value.d[c] != 0.0; + } + break; + case ir_unop_trunc: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = trunc(op[0]->value.d[c]); + else + data.f[c] = truncf(op[0]->value.f[c]); + } + break; + + case ir_unop_round_even: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = _mesa_roundeven(op[0]->value.d[c]); + else + data.f[c] = _mesa_roundevenf(op[0]->value.f[c]); + } + break; + + case ir_unop_ceil: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = ceil(op[0]->value.d[c]); + else + data.f[c] = ceilf(op[0]->value.f[c]); + } + break; + + case ir_unop_floor: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = floor(op[0]->value.d[c]); + else + data.f[c] = floorf(op[0]->value.f[c]); + } + break; + + case ir_unop_fract: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = 0; + break; + case GLSL_TYPE_INT: + data.i[c] = 0; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c] - floor(op[0]->value.f[c]); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c] - floor(op[0]->value.d[c]); + break; + default: + assert(0); + } + } + break; + + case ir_unop_sin: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = sinf(op[0]->value.f[c]); + } + break; + + case ir_unop_cos: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = cosf(op[0]->value.f[c]); + } + break; + + case ir_unop_neg: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = -((int) op[0]->value.u[c]); + break; + case GLSL_TYPE_INT: + data.i[c] = -op[0]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = -op[0]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = -op[0]->value.d[c]; + break; + default: + assert(0); + } + } + break; + + case ir_unop_abs: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c]; + if (data.i[c] < 0) + data.i[c] = -data.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = fabs(op[0]->value.f[c]); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = fabs(op[0]->value.d[c]); + break; + default: + assert(0); + } + } + break; + + case ir_unop_sign: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.i[c] > 0; + break; + case GLSL_TYPE_INT: + data.i[c] = (op[0]->value.i[c] > 0) - (op[0]->value.i[c] < 0); + break; + case GLSL_TYPE_FLOAT: + data.f[c] = float((op[0]->value.f[c] > 0)-(op[0]->value.f[c] < 0)); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = double((op[0]->value.d[c] > 0)-(op[0]->value.d[c] < 0)); + break; + default: + assert(0); + } + } + break; + + case ir_unop_rcp: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (this->type->base_type) { + case GLSL_TYPE_UINT: + if (op[0]->value.u[c] != 0.0) + data.u[c] = 1 / op[0]->value.u[c]; + break; + case GLSL_TYPE_INT: + if (op[0]->value.i[c] != 0.0) + data.i[c] = 1 / op[0]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + if (op[0]->value.f[c] != 0.0) + data.f[c] = 1.0F / op[0]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + if (op[0]->value.d[c] != 0.0) + data.d[c] = 1.0 / op[0]->value.d[c]; + break; + default: + assert(0); + } + } + break; + + case ir_unop_rsq: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = 1.0 / sqrt(op[0]->value.d[c]); + else + data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]); + } + break; + + case ir_unop_sqrt: + for (unsigned c = 0; c < op[0]->type->components(); c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = sqrt(op[0]->value.d[c]); + else + data.f[c] = sqrtf(op[0]->value.f[c]); + } + break; + + case ir_unop_exp: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = expf(op[0]->value.f[c]); + } + break; + + case ir_unop_exp2: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = exp2f(op[0]->value.f[c]); + } + break; + + case ir_unop_log: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = logf(op[0]->value.f[c]); + } + break; + + case ir_unop_log2: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = log2f(op[0]->value.f[c]); + } + break; + + case ir_unop_dFdx: + case ir_unop_dFdx_coarse: + case ir_unop_dFdx_fine: + case ir_unop_dFdy: + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = 0.0; + } + break; + + case ir_unop_pack_snorm_2x16: + assert(op[0]->type == glsl_type::vec2_type); + data.u[0] = pack_2x16(pack_snorm_1x16, + op[0]->value.f[0], + op[0]->value.f[1]); + break; + case ir_unop_pack_snorm_4x8: + assert(op[0]->type == glsl_type::vec4_type); + data.u[0] = pack_4x8(pack_snorm_1x8, + op[0]->value.f[0], + op[0]->value.f[1], + op[0]->value.f[2], + op[0]->value.f[3]); + break; + case ir_unop_unpack_snorm_2x16: + assert(op[0]->type == glsl_type::uint_type); + unpack_2x16(unpack_snorm_1x16, + op[0]->value.u[0], + &data.f[0], &data.f[1]); + break; + case ir_unop_unpack_snorm_4x8: + assert(op[0]->type == glsl_type::uint_type); + unpack_4x8(unpack_snorm_1x8, + op[0]->value.u[0], + &data.f[0], &data.f[1], &data.f[2], &data.f[3]); + break; + case ir_unop_pack_unorm_2x16: + assert(op[0]->type == glsl_type::vec2_type); + data.u[0] = pack_2x16(pack_unorm_1x16, + op[0]->value.f[0], + op[0]->value.f[1]); + break; + case ir_unop_pack_unorm_4x8: + assert(op[0]->type == glsl_type::vec4_type); + data.u[0] = pack_4x8(pack_unorm_1x8, + op[0]->value.f[0], + op[0]->value.f[1], + op[0]->value.f[2], + op[0]->value.f[3]); + break; + case ir_unop_unpack_unorm_2x16: + assert(op[0]->type == glsl_type::uint_type); + unpack_2x16(unpack_unorm_1x16, + op[0]->value.u[0], + &data.f[0], &data.f[1]); + break; + case ir_unop_unpack_unorm_4x8: + assert(op[0]->type == glsl_type::uint_type); + unpack_4x8(unpack_unorm_1x8, + op[0]->value.u[0], + &data.f[0], &data.f[1], &data.f[2], &data.f[3]); + break; + case ir_unop_pack_half_2x16: + assert(op[0]->type == glsl_type::vec2_type); + data.u[0] = pack_2x16(pack_half_1x16, + op[0]->value.f[0], + op[0]->value.f[1]); + break; + case ir_unop_unpack_half_2x16: + assert(op[0]->type == glsl_type::uint_type); + unpack_2x16(unpack_half_1x16, + op[0]->value.u[0], + &data.f[0], &data.f[1]); + break; + case ir_binop_pow: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + data.f[c] = powf(op[0]->value.f[c], op[1]->value.f[c]); + } + break; + + case ir_binop_dot: + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[0] = dot_d(op[0], op[1]); + else + data.f[0] = dot_f(op[0], op[1]); + break; + + case ir_binop_min: + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = MIN2(op[0]->value.u[c0], op[1]->value.u[c1]); + break; + case GLSL_TYPE_INT: + data.i[c] = MIN2(op[0]->value.i[c0], op[1]->value.i[c1]); + break; + case GLSL_TYPE_FLOAT: + data.f[c] = MIN2(op[0]->value.f[c0], op[1]->value.f[c1]); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = MIN2(op[0]->value.d[c0], op[1]->value.d[c1]); + break; + default: + assert(0); + } + } + + break; + case ir_binop_max: + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = MAX2(op[0]->value.u[c0], op[1]->value.u[c1]); + break; + case GLSL_TYPE_INT: + data.i[c] = MAX2(op[0]->value.i[c0], op[1]->value.i[c1]); + break; + case GLSL_TYPE_FLOAT: + data.f[c] = MAX2(op[0]->value.f[c0], op[1]->value.f[c1]); + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = MAX2(op[0]->value.d[c0], op[1]->value.d[c1]); + break; + default: + assert(0); + } + } + break; + + case ir_binop_add: + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] + op[1]->value.u[c1]; + break; + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] + op[1]->value.i[c1]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c0] + op[1]->value.f[c1]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] + op[1]->value.d[c1]; + break; + default: + assert(0); + } + } + + break; + case ir_binop_sub: + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] - op[1]->value.u[c1]; + break; + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] - op[1]->value.i[c1]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1]; + break; + default: + assert(0); + } + } + + break; + case ir_binop_mul: + /* Check for equal types, or unequal types involving scalars */ + if ((op[0]->type == op[1]->type && !op[0]->type->is_matrix()) + || op0_scalar || op1_scalar) { + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] * op[1]->value.u[c1]; + break; + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] * op[1]->value.i[c1]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c0] * op[1]->value.f[c1]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] * op[1]->value.d[c1]; + break; + default: + assert(0); + } + } + } else { + assert(op[0]->type->is_matrix() || op[1]->type->is_matrix()); + + /* Multiply an N-by-M matrix with an M-by-P matrix. Since either + * matrix can be a GLSL vector, either N or P can be 1. + * + * For vec*mat, the vector is treated as a row vector. This + * means the vector is a 1-row x M-column matrix. + * + * For mat*vec, the vector is treated as a column vector. Since + * matrix_columns is 1 for vectors, this just works. + */ + const unsigned n = op[0]->type->is_vector() + ? 1 : op[0]->type->vector_elements; + const unsigned m = op[1]->type->vector_elements; + const unsigned p = op[1]->type->matrix_columns; + for (unsigned j = 0; j < p; j++) { + for (unsigned i = 0; i < n; i++) { + for (unsigned k = 0; k < m; k++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[i+n*j] += op[0]->value.d[i+n*k]*op[1]->value.d[k+m*j]; + else + data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j]; + } + } + } + } + + break; + case ir_binop_div: + /* FINISHME: Emit warning when division-by-zero is detected. */ + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + if (op[1]->value.u[c1] == 0) { + data.u[c] = 0; + } else { + data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1]; + } + break; + case GLSL_TYPE_INT: + if (op[1]->value.i[c1] == 0) { + data.i[c] = 0; + } else { + data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1]; + } + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[0]->value.d[c0] / op[1]->value.d[c1]; + break; + default: + assert(0); + } + } + + break; + case ir_binop_mod: + /* FINISHME: Emit warning when division-by-zero is detected. */ + assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + if (op[1]->value.u[c1] == 0) { + data.u[c] = 0; + } else { + data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1]; + } + break; + case GLSL_TYPE_INT: + if (op[1]->value.i[c1] == 0) { + data.i[c] = 0; + } else { + data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1]; + } + break; + case GLSL_TYPE_FLOAT: + /* We don't use fmod because it rounds toward zero; GLSL specifies + * the use of floor. + */ + data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1] + * floorf(op[0]->value.f[c0] / op[1]->value.f[c1]); + break; + case GLSL_TYPE_DOUBLE: + /* We don't use fmod because it rounds toward zero; GLSL specifies + * the use of floor. + */ + data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1] + * floor(op[0]->value.d[c0] / op[1]->value.d[c1]); + break; + default: + assert(0); + } + } + + break; + + case ir_binop_logic_and: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) + data.b[c] = op[0]->value.b[c] && op[1]->value.b[c]; + break; + case ir_binop_logic_xor: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) + data.b[c] = op[0]->value.b[c] ^ op[1]->value.b[c]; + break; + case ir_binop_logic_or: + assert(op[0]->type->base_type == GLSL_TYPE_BOOL); + for (unsigned c = 0; c < op[0]->type->components(); c++) + data.b[c] = op[0]->value.b[c] || op[1]->value.b[c]; + break; + + case ir_binop_less: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] < op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] < op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] < op[1]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] < op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_greater: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] > op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] > op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] > op[1]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] > op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_lequal: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] <= op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] <= op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] <= op[1]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] <= op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_gequal: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < op[0]->type->components(); c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] >= op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] >= op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] >= op[1]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] >= op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_equal: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < components; c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] == op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] == op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] == op[1]->value.f[c]; + break; + case GLSL_TYPE_BOOL: + data.b[c] = op[0]->value.b[c] == op[1]->value.b[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] == op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_nequal: + assert(op[0]->type == op[1]->type); + for (unsigned c = 0; c < components; c++) { + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.b[c] = op[0]->value.u[c] != op[1]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.b[c] = op[0]->value.i[c] != op[1]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.b[c] = op[0]->value.f[c] != op[1]->value.f[c]; + break; + case GLSL_TYPE_BOOL: + data.b[c] = op[0]->value.b[c] != op[1]->value.b[c]; + break; + case GLSL_TYPE_DOUBLE: + data.b[c] = op[0]->value.d[c] != op[1]->value.d[c]; + break; + default: + assert(0); + } + } + break; + case ir_binop_all_equal: + data.b[0] = op[0]->has_value(op[1]); + break; + case ir_binop_any_nequal: + data.b[0] = !op[0]->has_value(op[1]); + break; + + case ir_binop_lshift: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + if (op[0]->type->base_type == GLSL_TYPE_INT && + op[1]->type->base_type == GLSL_TYPE_INT) { + data.i[c] = op[0]->value.i[c0] << op[1]->value.i[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_INT && + op[1]->type->base_type == GLSL_TYPE_UINT) { + data.i[c] = op[0]->value.i[c0] << op[1]->value.u[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_UINT && + op[1]->type->base_type == GLSL_TYPE_INT) { + data.u[c] = op[0]->value.u[c0] << op[1]->value.i[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_UINT && + op[1]->type->base_type == GLSL_TYPE_UINT) { + data.u[c] = op[0]->value.u[c0] << op[1]->value.u[c1]; + } + } + break; + + case ir_binop_rshift: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + if (op[0]->type->base_type == GLSL_TYPE_INT && + op[1]->type->base_type == GLSL_TYPE_INT) { + data.i[c] = op[0]->value.i[c0] >> op[1]->value.i[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_INT && + op[1]->type->base_type == GLSL_TYPE_UINT) { + data.i[c] = op[0]->value.i[c0] >> op[1]->value.u[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_UINT && + op[1]->type->base_type == GLSL_TYPE_INT) { + data.u[c] = op[0]->value.u[c0] >> op[1]->value.i[c1]; + + } else if (op[0]->type->base_type == GLSL_TYPE_UINT && + op[1]->type->base_type == GLSL_TYPE_UINT) { + data.u[c] = op[0]->value.u[c0] >> op[1]->value.u[c1]; + } + } + break; + + case ir_binop_bit_and: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] & op[1]->value.i[c1]; + break; + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] & op[1]->value.u[c1]; + break; + default: + assert(0); + } + } + break; + + case ir_binop_bit_or: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] | op[1]->value.i[c1]; + break; + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] | op[1]->value.u[c1]; + break; + default: + assert(0); + } + } + break; + + case ir_binop_vector_extract: { + const int c = CLAMP(op[1]->value.i[0], 0, + (int) op[0]->type->vector_elements - 1); + + switch (op[0]->type->base_type) { + case GLSL_TYPE_UINT: + data.u[0] = op[0]->value.u[c]; + break; + case GLSL_TYPE_INT: + data.i[0] = op[0]->value.i[c]; + break; + case GLSL_TYPE_FLOAT: + data.f[0] = op[0]->value.f[c]; + break; + case GLSL_TYPE_DOUBLE: + data.d[0] = op[0]->value.d[c]; + break; + case GLSL_TYPE_BOOL: + data.b[0] = op[0]->value.b[c]; + break; + default: + assert(0); + } + break; + } + + case ir_binop_bit_xor: + for (unsigned c = 0, c0 = 0, c1 = 0; + c < components; + c0 += c0_inc, c1 += c1_inc, c++) { + + switch (op[0]->type->base_type) { + case GLSL_TYPE_INT: + data.i[c] = op[0]->value.i[c0] ^ op[1]->value.i[c1]; + break; + case GLSL_TYPE_UINT: + data.u[c] = op[0]->value.u[c0] ^ op[1]->value.u[c1]; + break; + default: + assert(0); + } + } + break; + + case ir_unop_bitfield_reverse: + /* http://graphics.stanford.edu/~seander/bithacks.html#BitReverseObvious */ + for (unsigned c = 0; c < components; c++) { + unsigned int v = op[0]->value.u[c]; // input bits to be reversed + unsigned int r = v; // r will be reversed bits of v; first get LSB of v + int s = sizeof(v) * CHAR_BIT - 1; // extra shift needed at end + + for (v >>= 1; v; v >>= 1) { + r <<= 1; + r |= v & 1; + s--; + } + r <<= s; // shift when v's highest bits are zero + + data.u[c] = r; + } + break; + + case ir_unop_bit_count: + for (unsigned c = 0; c < components; c++) { + unsigned count = 0; + unsigned v = op[0]->value.u[c]; + + for (; v; count++) { + v &= v - 1; + } + data.u[c] = count; + } + break; + + case ir_unop_find_msb: + for (unsigned c = 0; c < components; c++) { + int v = op[0]->value.i[c]; + + if (v == 0 || (op[0]->type->base_type == GLSL_TYPE_INT && v == -1)) + data.i[c] = -1; + else { + int count = 0; + unsigned top_bit = op[0]->type->base_type == GLSL_TYPE_UINT + ? 0 : v & (1u << 31); + + while (((v & (1u << 31)) == top_bit) && count != 32) { + count++; + v <<= 1; + } + + data.i[c] = 31 - count; + } + } + break; + + case ir_unop_find_lsb: + for (unsigned c = 0; c < components; c++) { + if (op[0]->value.i[c] == 0) + data.i[c] = -1; + else { + unsigned pos = 0; + unsigned v = op[0]->value.u[c]; + + for (; !(v & 1); v >>= 1) { + pos++; + } + data.u[c] = pos; + } + } + break; + + case ir_unop_saturate: + for (unsigned c = 0; c < components; c++) { + data.f[c] = CLAMP(op[0]->value.f[c], 0.0f, 1.0f); + } + break; + case ir_unop_pack_double_2x32: { + /* XXX needs to be checked on big-endian */ + uint64_t temp; + temp = (uint64_t)op[0]->value.u[0] | ((uint64_t)op[0]->value.u[1] << 32); + data.d[0] = *(double *)&temp; + + break; + } + case ir_unop_unpack_double_2x32: + /* XXX needs to be checked on big-endian */ + data.u[0] = *(uint32_t *)&op[0]->value.d[0]; + data.u[1] = *((uint32_t *)&op[0]->value.d[0] + 1); + break; + + case ir_triop_bitfield_extract: { + for (unsigned c = 0; c < components; c++) { + int offset = op[1]->value.i[c]; + int bits = op[2]->value.i[c]; + + if (bits == 0) + data.u[c] = 0; + else if (offset < 0 || bits < 0) + data.u[c] = 0; /* Undefined, per spec. */ + else if (offset + bits > 32) + data.u[c] = 0; /* Undefined, per spec. */ + else { + if (op[0]->type->base_type == GLSL_TYPE_INT) { + /* int so that the right shift will sign-extend. */ + int value = op[0]->value.i[c]; + value <<= 32 - bits - offset; + value >>= 32 - bits; + data.i[c] = value; + } else { + unsigned value = op[0]->value.u[c]; + value <<= 32 - bits - offset; + value >>= 32 - bits; + data.u[c] = value; + } + } + } + break; + } + + case ir_binop_ldexp: + for (unsigned c = 0; c < components; c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) { + data.d[c] = ldexp(op[0]->value.d[c], op[1]->value.i[c]); + /* Flush subnormal values to zero. */ + if (!isnormal(data.d[c])) + data.d[c] = copysign(0.0, op[0]->value.d[c]); + } else { + data.f[c] = ldexpf(op[0]->value.f[c], op[1]->value.i[c]); + /* Flush subnormal values to zero. */ + if (!isnormal(data.f[c])) + data.f[c] = copysignf(0.0f, op[0]->value.f[c]); + } + } + break; + + case ir_triop_fma: + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT || + op[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[1]->type->base_type == GLSL_TYPE_FLOAT || + op[1]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[2]->type->base_type == GLSL_TYPE_FLOAT || + op[2]->type->base_type == GLSL_TYPE_DOUBLE); + + for (unsigned c = 0; c < components; c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = op[0]->value.d[c] * op[1]->value.d[c] + + op[2]->value.d[c]; + else + data.f[c] = op[0]->value.f[c] * op[1]->value.f[c] + + op[2]->value.f[c]; + } + break; + + case ir_triop_lrp: { + assert(op[0]->type->base_type == GLSL_TYPE_FLOAT || + op[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[1]->type->base_type == GLSL_TYPE_FLOAT || + op[1]->type->base_type == GLSL_TYPE_DOUBLE); + assert(op[2]->type->base_type == GLSL_TYPE_FLOAT || + op[2]->type->base_type == GLSL_TYPE_DOUBLE); + + unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1; + for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) { + if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = op[0]->value.d[c] * (1.0 - op[2]->value.d[c2]) + + (op[1]->value.d[c] * op[2]->value.d[c2]); + else + data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) + + (op[1]->value.f[c] * op[2]->value.f[c2]); + } + break; + } + + case ir_triop_csel: + for (unsigned c = 0; c < components; c++) { + if (op[1]->type->base_type == GLSL_TYPE_DOUBLE) + data.d[c] = op[0]->value.b[c] ? op[1]->value.d[c] + : op[2]->value.d[c]; + else + data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c] + : op[2]->value.u[c]; + } + break; + + case ir_triop_vector_insert: { + const unsigned idx = op[2]->value.u[0]; + + memcpy(&data, &op[0]->value, sizeof(data)); + + switch (this->type->base_type) { + case GLSL_TYPE_INT: + data.i[idx] = op[1]->value.i[0]; + break; + case GLSL_TYPE_UINT: + data.u[idx] = op[1]->value.u[0]; + break; + case GLSL_TYPE_FLOAT: + data.f[idx] = op[1]->value.f[0]; + break; + case GLSL_TYPE_BOOL: + data.b[idx] = op[1]->value.b[0]; + break; + case GLSL_TYPE_DOUBLE: + data.d[idx] = op[1]->value.d[0]; + break; + default: + assert(!"Should not get here."); + break; + } + break; + } + + case ir_quadop_bitfield_insert: { + for (unsigned c = 0; c < components; c++) { + int offset = op[2]->value.i[c]; + int bits = op[3]->value.i[c]; + + if (bits == 0) + data.u[c] = op[0]->value.u[c]; + else if (offset < 0 || bits < 0) + data.u[c] = 0; /* Undefined, per spec. */ + else if (offset + bits > 32) + data.u[c] = 0; /* Undefined, per spec. */ + else { + unsigned insert_mask = ((1ull << bits) - 1) << offset; + + unsigned insert = op[1]->value.u[c]; + insert <<= offset; + insert &= insert_mask; + + unsigned base = op[0]->value.u[c]; + base &= ~insert_mask; + + data.u[c] = base | insert; + } + } + break; + } + + case ir_quadop_vector: + for (unsigned c = 0; c < this->type->vector_elements; c++) { + switch (this->type->base_type) { + case GLSL_TYPE_INT: + data.i[c] = op[c]->value.i[0]; + break; + case GLSL_TYPE_UINT: + data.u[c] = op[c]->value.u[0]; + break; + case GLSL_TYPE_FLOAT: + data.f[c] = op[c]->value.f[0]; + break; + case GLSL_TYPE_DOUBLE: + data.d[c] = op[c]->value.d[0]; + break; + default: + assert(0); + } + } + break; + + default: + /* FINISHME: Should handle all expression types. */ + return NULL; + } + + return new(ctx) ir_constant(this->type, &data); +} + + +ir_constant * +ir_texture::constant_expression_value(struct hash_table *) +{ + /* texture lookups aren't constant expressions */ + return NULL; +} + + +ir_constant * +ir_swizzle::constant_expression_value(struct hash_table *variable_context) +{ + ir_constant *v = this->val->constant_expression_value(variable_context); + + if (v != NULL) { + ir_constant_data data = { { 0 } }; + + const unsigned swiz_idx[4] = { + this->mask.x, this->mask.y, this->mask.z, this->mask.w + }; + + for (unsigned i = 0; i < this->mask.num_components; i++) { + switch (v->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: data.u[i] = v->value.u[swiz_idx[i]]; break; + case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break; + case GLSL_TYPE_BOOL: data.b[i] = v->value.b[swiz_idx[i]]; break; + case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break; + default: assert(!"Should not get here."); break; + } + } + + void *ctx = ralloc_parent(this); + return new(ctx) ir_constant(this->type, &data); + } + return NULL; +} + + +ir_constant * +ir_dereference_variable::constant_expression_value(struct hash_table *variable_context) +{ + assert(var); + + /* Give priority to the context hashtable, if it exists */ + if (variable_context) { + ir_constant *value = (ir_constant *)hash_table_find(variable_context, var); + if(value) + return value; + } + + /* The constant_value of a uniform variable is its initializer, + * not the lifetime constant value of the uniform. + */ + if (var->data.mode == ir_var_uniform) + return NULL; + + if (!var->constant_value) + return NULL; + + return var->constant_value->clone(ralloc_parent(var), NULL); +} + + +ir_constant * +ir_dereference_array::constant_expression_value(struct hash_table *variable_context) +{ + ir_constant *array = this->array->constant_expression_value(variable_context); + ir_constant *idx = this->array_index->constant_expression_value(variable_context); + + if ((array != NULL) && (idx != NULL)) { + void *ctx = ralloc_parent(this); + if (array->type->is_matrix()) { + /* Array access of a matrix results in a vector. + */ + const unsigned column = idx->value.u[0]; + + const glsl_type *const column_type = array->type->column_type(); + + /* Offset in the constant matrix to the first element of the column + * to be extracted. + */ + const unsigned mat_idx = column * column_type->vector_elements; + + ir_constant_data data = { { 0 } }; + + switch (column_type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + for (unsigned i = 0; i < column_type->vector_elements; i++) + data.u[i] = array->value.u[mat_idx + i]; + + break; + + case GLSL_TYPE_FLOAT: + for (unsigned i = 0; i < column_type->vector_elements; i++) + data.f[i] = array->value.f[mat_idx + i]; + + break; + + case GLSL_TYPE_DOUBLE: + for (unsigned i = 0; i < column_type->vector_elements; i++) + data.d[i] = array->value.d[mat_idx + i]; + + break; + + default: + assert(!"Should not get here."); + break; + } + + return new(ctx) ir_constant(column_type, &data); + } else if (array->type->is_vector()) { + const unsigned component = idx->value.u[0]; + + return new(ctx) ir_constant(array, component); + } else { + const unsigned index = idx->value.u[0]; + return array->get_array_element(index)->clone(ctx, NULL); + } + } + return NULL; +} + + +ir_constant * +ir_dereference_record::constant_expression_value(struct hash_table *) +{ + ir_constant *v = this->record->constant_expression_value(); + + return (v != NULL) ? v->get_record_field(this->field) : NULL; +} + + +ir_constant * +ir_assignment::constant_expression_value(struct hash_table *) +{ + /* FINISHME: Handle CEs involving assignment (return RHS) */ + return NULL; +} + + +ir_constant * +ir_constant::constant_expression_value(struct hash_table *) +{ + return this; +} + + +ir_constant * +ir_call::constant_expression_value(struct hash_table *variable_context) +{ + return this->callee->constant_expression_value(&this->actual_parameters, variable_context); +} + + +bool ir_function_signature::constant_expression_evaluate_expression_list(const struct exec_list &body, + struct hash_table *variable_context, + ir_constant **result) +{ + foreach_in_list(ir_instruction, inst, &body) { + switch(inst->ir_type) { + + /* (declare () type symbol) */ + case ir_type_variable: { + ir_variable *var = inst->as_variable(); + hash_table_insert(variable_context, ir_constant::zero(this, var->type), var); + break; + } + + /* (assign [condition] (write-mask) (ref) (value)) */ + case ir_type_assignment: { + ir_assignment *asg = inst->as_assignment(); + if (asg->condition) { + ir_constant *cond = asg->condition->constant_expression_value(variable_context); + if (!cond) + return false; + if (!cond->get_bool_component(0)) + break; + } + + ir_constant *store = NULL; + int offset = 0; + + if (!constant_referenced(asg->lhs, variable_context, store, offset)) + return false; + + ir_constant *value = asg->rhs->constant_expression_value(variable_context); + + if (!value) + return false; + + store->copy_masked_offset(value, offset, asg->write_mask); + break; + } + + /* (return (expression)) */ + case ir_type_return: + assert (result); + *result = inst->as_return()->value->constant_expression_value(variable_context); + return *result != NULL; + + /* (call name (ref) (params))*/ + case ir_type_call: { + ir_call *call = inst->as_call(); + + /* Just say no to void functions in constant expressions. We + * don't need them at that point. + */ + + if (!call->return_deref) + return false; + + ir_constant *store = NULL; + int offset = 0; + + if (!constant_referenced(call->return_deref, variable_context, + store, offset)) + return false; + + ir_constant *value = call->constant_expression_value(variable_context); + + if(!value) + return false; + + store->copy_offset(value, offset); + break; + } + + /* (if condition (then-instructions) (else-instructions)) */ + case ir_type_if: { + ir_if *iif = inst->as_if(); + + ir_constant *cond = iif->condition->constant_expression_value(variable_context); + if (!cond || !cond->type->is_boolean()) + return false; + + exec_list &branch = cond->get_bool_component(0) ? iif->then_instructions : iif->else_instructions; + + *result = NULL; + if (!constant_expression_evaluate_expression_list(branch, variable_context, result)) + return false; + + /* If there was a return in the branch chosen, drop out now. */ + if (*result) + return true; + + break; + } + + /* Every other expression type, we drop out. */ + default: + return false; + } + } + + /* Reaching the end of the block is not an error condition */ + if (result) + *result = NULL; + + return true; +} + +ir_constant * +ir_function_signature::constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context) +{ + const glsl_type *type = this->return_type; + if (type == glsl_type::void_type) + return NULL; + + /* From the GLSL 1.20 spec, page 23: + * "Function calls to user-defined functions (non-built-in functions) + * cannot be used to form constant expressions." + */ + if (!this->is_builtin()) + return NULL; + + /* + * Of the builtin functions, only the texture lookups and the noise + * ones must not be used in constant expressions. They all include + * specific opcodes so they don't need to be special-cased at this + * point. + */ + + /* Initialize the table of dereferencable names with the function + * parameters. Verify their const-ness on the way. + * + * We expect the correctness of the number of parameters to have + * been checked earlier. + */ + hash_table *deref_hash = hash_table_ctor(8, hash_table_pointer_hash, + hash_table_pointer_compare); + + /* If "origin" is non-NULL, then the function body is there. So we + * have to use the variable objects from the object with the body, + * but the parameter instanciation on the current object. + */ + const exec_node *parameter_info = origin ? origin->parameters.head : parameters.head; + + foreach_in_list(ir_rvalue, n, actual_parameters) { + ir_constant *constant = n->constant_expression_value(variable_context); + if (constant == NULL) { + hash_table_dtor(deref_hash); + return NULL; + } + + + ir_variable *var = (ir_variable *)parameter_info; + hash_table_insert(deref_hash, constant, var); + + parameter_info = parameter_info->next; + } + + ir_constant *result = NULL; + + /* Now run the builtin function until something non-constant + * happens or we get the result. + */ + if (constant_expression_evaluate_expression_list(origin ? origin->body : body, deref_hash, &result) && result) + result = result->clone(ralloc_parent(this), NULL); + + hash_table_dtor(deref_hash); + + return result; +} diff --git a/src/compiler/glsl/ir_equals.cpp b/src/compiler/glsl/ir_equals.cpp new file mode 100644 index 00000000000..b86f4ea16bb --- /dev/null +++ b/src/compiler/glsl/ir_equals.cpp @@ -0,0 +1,211 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" + +/** + * Helper for checking equality when one instruction might be NULL, since you + * can't access a's vtable in that case. + */ +static bool +possibly_null_equals(const ir_instruction *a, const ir_instruction *b, + enum ir_node_type ignore) +{ + if (!a || !b) + return !a && !b; + + return a->equals(b, ignore); +} + +/** + * The base equality function: Return not equal for anything we don't know + * about. + */ +bool +ir_instruction::equals(const ir_instruction *, enum ir_node_type) const +{ + return false; +} + +bool +ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const +{ + const ir_constant *other = ir->as_constant(); + if (!other) + return false; + + if (type != other->type) + return false; + + for (unsigned i = 0; i < type->components(); i++) { + if (type->base_type == GLSL_TYPE_DOUBLE) { + if (value.d[i] != other->value.d[i]) + return false; + } else { + if (value.u[i] != other->value.u[i]) + return false; + } + } + + return true; +} + +bool +ir_dereference_variable::equals(const ir_instruction *ir, + enum ir_node_type) const +{ + const ir_dereference_variable *other = ir->as_dereference_variable(); + if (!other) + return false; + + return var == other->var; +} + +bool +ir_dereference_array::equals(const ir_instruction *ir, + enum ir_node_type ignore) const +{ + const ir_dereference_array *other = ir->as_dereference_array(); + if (!other) + return false; + + if (type != other->type) + return false; + + if (!array->equals(other->array, ignore)) + return false; + + if (!array_index->equals(other->array_index, ignore)) + return false; + + return true; +} + +bool +ir_swizzle::equals(const ir_instruction *ir, + enum ir_node_type ignore) const +{ + const ir_swizzle *other = ir->as_swizzle(); + if (!other) + return false; + + if (type != other->type) + return false; + + if (ignore != ir_type_swizzle) { + if (mask.x != other->mask.x || + mask.y != other->mask.y || + mask.z != other->mask.z || + mask.w != other->mask.w) { + return false; + } + } + + return val->equals(other->val, ignore); +} + +bool +ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const +{ + const ir_texture *other = ir->as_texture(); + if (!other) + return false; + + if (type != other->type) + return false; + + if (op != other->op) + return false; + + if (!possibly_null_equals(coordinate, other->coordinate, ignore)) + return false; + + if (!possibly_null_equals(projector, other->projector, ignore)) + return false; + + if (!possibly_null_equals(shadow_comparitor, other->shadow_comparitor, ignore)) + return false; + + if (!possibly_null_equals(offset, other->offset, ignore)) + return false; + + if (!sampler->equals(other->sampler, ignore)) + return false; + + switch (op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + if (!lod_info.bias->equals(other->lod_info.bias, ignore)) + return false; + break; + case ir_txl: + case ir_txf: + case ir_txs: + if (!lod_info.lod->equals(other->lod_info.lod, ignore)) + return false; + break; + case ir_txd: + if (!lod_info.grad.dPdx->equals(other->lod_info.grad.dPdx, ignore) || + !lod_info.grad.dPdy->equals(other->lod_info.grad.dPdy, ignore)) + return false; + break; + case ir_txf_ms: + if (!lod_info.sample_index->equals(other->lod_info.sample_index, ignore)) + return false; + break; + case ir_tg4: + if (!lod_info.component->equals(other->lod_info.component, ignore)) + return false; + break; + default: + assert(!"Unrecognized texture op"); + } + + return true; +} + +bool +ir_expression::equals(const ir_instruction *ir, enum ir_node_type ignore) const +{ + const ir_expression *other = ir->as_expression(); + if (!other) + return false; + + if (type != other->type) + return false; + + if (operation != other->operation) + return false; + + for (unsigned i = 0; i < get_num_operands(); i++) { + if (!operands[i]->equals(other->operands[i], ignore)) + return false; + } + + return true; +} diff --git a/src/compiler/glsl/ir_expression_flattening.cpp b/src/compiler/glsl/ir_expression_flattening.cpp new file mode 100644 index 00000000000..c13ae811d78 --- /dev/null +++ b/src/compiler/glsl/ir_expression_flattening.cpp @@ -0,0 +1,86 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_expression_flattening.cpp + * + * Takes the leaves of expression trees and makes them dereferences of + * assignments of the leaves to temporaries, according to a predicate. + * + * This is used for breaking down matrix operations, where it's easier to + * create a temporary and work on each of its vector components individually. + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" +#include "ir_expression_flattening.h" + +class ir_expression_flattening_visitor : public ir_rvalue_visitor { +public: + ir_expression_flattening_visitor(bool (*predicate)(ir_instruction *ir)) + { + this->predicate = predicate; + } + + virtual ~ir_expression_flattening_visitor() + { + /* empty */ + } + + void handle_rvalue(ir_rvalue **rvalue); + bool (*predicate)(ir_instruction *ir); +}; + +void +do_expression_flattening(exec_list *instructions, + bool (*predicate)(ir_instruction *ir)) +{ + ir_expression_flattening_visitor v(predicate); + + foreach_in_list(ir_instruction, ir, instructions) { + ir->accept(&v); + } +} + +void +ir_expression_flattening_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + ir_variable *var; + ir_assignment *assign; + ir_rvalue *ir = *rvalue; + + if (!ir || !this->predicate(ir)) + return; + + void *ctx = ralloc_parent(ir); + + var = new(ctx) ir_variable(ir->type, "flattening_tmp", ir_var_temporary); + base_ir->insert_before(var); + + assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var), + ir, + NULL); + base_ir->insert_before(assign); + + *rvalue = new(ctx) ir_dereference_variable(var); +} diff --git a/src/compiler/glsl/ir_expression_flattening.h b/src/compiler/glsl/ir_expression_flattening.h new file mode 100644 index 00000000000..2eda1590001 --- /dev/null +++ b/src/compiler/glsl/ir_expression_flattening.h @@ -0,0 +1,38 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +/** + * \file ir_expression_flattening.h + * + * Takes the leaves of expression trees and makes them dereferences of + * assignments of the leaves to temporaries, according to a predicate. + * + * This is used for automatic function inlining, where we want to take + * an expression containing a call and move the call out to its own + * assignment so that we can inline it at the appropriate place in the + * instruction stream. + */ + +void do_expression_flattening(exec_list *instructions, + bool (*predicate)(ir_instruction *ir)); diff --git a/src/compiler/glsl/ir_function.cpp b/src/compiler/glsl/ir_function.cpp new file mode 100644 index 00000000000..0b4cb4bd30d --- /dev/null +++ b/src/compiler/glsl/ir_function.cpp @@ -0,0 +1,404 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "compiler/glsl_types.h" +#include "ir.h" +#include "glsl_parser_extras.h" +#include "main/errors.h" + +typedef enum { + PARAMETER_LIST_NO_MATCH, + PARAMETER_LIST_EXACT_MATCH, + PARAMETER_LIST_INEXACT_MATCH /*< Match requires implicit conversion. */ +} parameter_list_match_t; + +/** + * \brief Check if two parameter lists match. + * + * \param list_a Parameters of the function definition. + * \param list_b Actual parameters passed to the function. + * \see matching_signature() + */ +static parameter_list_match_t +parameter_lists_match(_mesa_glsl_parse_state *state, + const exec_list *list_a, const exec_list *list_b) +{ + const exec_node *node_a = list_a->head; + const exec_node *node_b = list_b->head; + + /* This is set to true if there is an inexact match requiring an implicit + * conversion. */ + bool inexact_match = false; + + for (/* empty */ + ; !node_a->is_tail_sentinel() + ; node_a = node_a->next, node_b = node_b->next) { + /* If all of the parameters from the other parameter list have been + * exhausted, the lists have different length and, by definition, + * do not match. + */ + if (node_b->is_tail_sentinel()) + return PARAMETER_LIST_NO_MATCH; + + + const ir_variable *const param = (ir_variable *) node_a; + const ir_rvalue *const actual = (ir_rvalue *) node_b; + + if (param->type == actual->type) + continue; + + /* Try to find an implicit conversion from actual to param. */ + inexact_match = true; + switch ((enum ir_variable_mode)(param->data.mode)) { + case ir_var_auto: + case ir_var_uniform: + case ir_var_shader_storage: + case ir_var_temporary: + /* These are all error conditions. It is invalid for a parameter to + * a function to be declared as auto (not in, out, or inout) or + * as uniform. + */ + assert(0); + return PARAMETER_LIST_NO_MATCH; + + case ir_var_const_in: + case ir_var_function_in: + if (!actual->type->can_implicitly_convert_to(param->type, state)) + return PARAMETER_LIST_NO_MATCH; + break; + + case ir_var_function_out: + if (!param->type->can_implicitly_convert_to(actual->type, state)) + return PARAMETER_LIST_NO_MATCH; + break; + + case ir_var_function_inout: + /* Since there are no bi-directional automatic conversions (e.g., + * there is int -> float but no float -> int), inout parameters must + * be exact matches. + */ + return PARAMETER_LIST_NO_MATCH; + + default: + assert(false); + return PARAMETER_LIST_NO_MATCH; + } + } + + /* If all of the parameters from the other parameter list have been + * exhausted, the lists have different length and, by definition, do not + * match. + */ + if (!node_b->is_tail_sentinel()) + return PARAMETER_LIST_NO_MATCH; + + if (inexact_match) + return PARAMETER_LIST_INEXACT_MATCH; + else + return PARAMETER_LIST_EXACT_MATCH; +} + + +/* Classes of parameter match, sorted (mostly) best matches first. + * See is_better_parameter_match() below for the exceptions. + * */ +typedef enum { + PARAMETER_EXACT_MATCH, + PARAMETER_FLOAT_TO_DOUBLE, + PARAMETER_INT_TO_FLOAT, + PARAMETER_INT_TO_DOUBLE, + PARAMETER_OTHER_CONVERSION, +} parameter_match_t; + + +static parameter_match_t +get_parameter_match_type(const ir_variable *param, + const ir_rvalue *actual) +{ + const glsl_type *from_type; + const glsl_type *to_type; + + if (param->data.mode == ir_var_function_out) { + from_type = param->type; + to_type = actual->type; + } else { + from_type = actual->type; + to_type = param->type; + } + + if (from_type == to_type) + return PARAMETER_EXACT_MATCH; + + if (to_type->base_type == GLSL_TYPE_DOUBLE) { + if (from_type->base_type == GLSL_TYPE_FLOAT) + return PARAMETER_FLOAT_TO_DOUBLE; + return PARAMETER_INT_TO_DOUBLE; + } + + if (to_type->base_type == GLSL_TYPE_FLOAT) + return PARAMETER_INT_TO_FLOAT; + + /* int -> uint and any other oddball conversions */ + return PARAMETER_OTHER_CONVERSION; +} + + +static bool +is_better_parameter_match(parameter_match_t a_match, + parameter_match_t b_match) +{ + /* From section 6.1 of the GLSL 4.00 spec (and the ARB_gpu_shader5 spec): + * + * 1. An exact match is better than a match involving any implicit + * conversion. + * + * 2. A match involving an implicit conversion from float to double + * is better than match involving any other implicit conversion. + * + * [XXX: Not in GLSL 4.0: Only in ARB_gpu_shader5: + * 3. A match involving an implicit conversion from either int or uint + * to float is better than a match involving an implicit conversion + * from either int or uint to double.] + * + * If none of the rules above apply to a particular pair of conversions, + * neither conversion is considered better than the other. + * + * -- + * + * Notably, the int->uint conversion is *not* considered to be better + * or worse than int/uint->float or int/uint->double. + */ + + if (a_match >= PARAMETER_INT_TO_FLOAT && b_match == PARAMETER_OTHER_CONVERSION) + return false; + + return a_match < b_match; +} + + +static bool +is_best_inexact_overload(const exec_list *actual_parameters, + ir_function_signature **matches, + int num_matches, + ir_function_signature *sig) +{ + /* From section 6.1 of the GLSL 4.00 spec (and the ARB_gpu_shader5 spec): + * + * "A function definition A is considered a better + * match than function definition B if: + * + * * for at least one function argument, the conversion for that argument + * in A is better than the corresponding conversion in B; and + * + * * there is no function argument for which the conversion in B is better + * than the corresponding conversion in A. + * + * If a single function definition is considered a better match than every + * other matching function definition, it will be used. Otherwise, a + * semantic error occurs and the shader will fail to compile." + */ + for (ir_function_signature **other = matches; + other < matches + num_matches; other++) { + if (*other == sig) + continue; + + const exec_node *node_a = sig->parameters.head; + const exec_node *node_b = (*other)->parameters.head; + const exec_node *node_p = actual_parameters->head; + + bool better_for_some_parameter = false; + + for (/* empty */ + ; !node_a->is_tail_sentinel() + ; node_a = node_a->next, + node_b = node_b->next, + node_p = node_p->next) { + parameter_match_t a_match = get_parameter_match_type( + (const ir_variable *)node_a, + (const ir_rvalue *)node_p); + parameter_match_t b_match = get_parameter_match_type( + (const ir_variable *)node_b, + (const ir_rvalue *)node_p); + + if (is_better_parameter_match(a_match, b_match)) + better_for_some_parameter = true; + + if (is_better_parameter_match(b_match, a_match)) + return false; /* B is better for this parameter */ + } + + if (!better_for_some_parameter) + return false; /* A must be better than B for some parameter */ + + } + + return true; +} + + +static ir_function_signature * +choose_best_inexact_overload(_mesa_glsl_parse_state *state, + const exec_list *actual_parameters, + ir_function_signature **matches, + int num_matches) +{ + if (num_matches == 0) + return NULL; + + if (num_matches == 1) + return *matches; + + /* Without GLSL 4.0 / ARB_gpu_shader5, there is no overload resolution + * among multiple inexact matches. Note that state may be NULL here if + * called from the linker; in that case we assume everything supported in + * any GLSL version is available. */ + if (!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable) { + for (ir_function_signature **sig = matches; sig < matches + num_matches; sig++) { + if (is_best_inexact_overload(actual_parameters, matches, num_matches, *sig)) + return *sig; + } + } + + return NULL; /* no best candidate */ +} + + +ir_function_signature * +ir_function::matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_parameters, + bool allow_builtins) +{ + bool is_exact; + return matching_signature(state, actual_parameters, allow_builtins, + &is_exact); +} + +ir_function_signature * +ir_function::matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_parameters, + bool allow_builtins, + bool *is_exact) +{ + ir_function_signature **inexact_matches = NULL; + ir_function_signature **inexact_matches_temp; + ir_function_signature *match = NULL; + int num_inexact_matches = 0; + + /* From page 42 (page 49 of the PDF) of the GLSL 1.20 spec: + * + * "If an exact match is found, the other signatures are ignored, and + * the exact match is used. Otherwise, if no exact match is found, then + * the implicit conversions in Section 4.1.10 "Implicit Conversions" will + * be applied to the calling arguments if this can make their types match + * a signature. In this case, it is a semantic error if there are + * multiple ways to apply these conversions to the actual arguments of a + * call such that the call can be made to match multiple signatures." + */ + foreach_in_list(ir_function_signature, sig, &this->signatures) { + /* Skip over any built-ins that aren't available in this shader. */ + if (sig->is_builtin() && (!allow_builtins || + !sig->is_builtin_available(state))) + continue; + + switch (parameter_lists_match(state, & sig->parameters, actual_parameters)) { + case PARAMETER_LIST_EXACT_MATCH: + *is_exact = true; + free(inexact_matches); + return sig; + case PARAMETER_LIST_INEXACT_MATCH: + inexact_matches_temp = (ir_function_signature **) + realloc(inexact_matches, + sizeof(*inexact_matches) * + (num_inexact_matches + 1)); + if (inexact_matches_temp == NULL) { + _mesa_error_no_memory(__func__); + free(inexact_matches); + return NULL; + } + inexact_matches = inexact_matches_temp; + inexact_matches[num_inexact_matches++] = sig; + continue; + case PARAMETER_LIST_NO_MATCH: + continue; + default: + assert(false); + return NULL; + } + } + + /* There is no exact match (we would have returned it by now). If there + * are multiple inexact matches, the call is ambiguous, which is an error. + * + * FINISHME: Report a decent error. Returning NULL will likely result in + * FINISHME: a "no matching signature" error; it should report that the + * FINISHME: call is ambiguous. But reporting errors from here is hard. + */ + *is_exact = false; + + match = choose_best_inexact_overload(state, actual_parameters, + inexact_matches, num_inexact_matches); + + free(inexact_matches); + return match; +} + + +static bool +parameter_lists_match_exact(const exec_list *list_a, const exec_list *list_b) +{ + const exec_node *node_a = list_a->head; + const exec_node *node_b = list_b->head; + + for (/* empty */ + ; !node_a->is_tail_sentinel() && !node_b->is_tail_sentinel() + ; node_a = node_a->next, node_b = node_b->next) { + ir_variable *a = (ir_variable *) node_a; + ir_variable *b = (ir_variable *) node_b; + + /* If the types of the parameters do not match, the parameters lists + * are different. + */ + if (a->type != b->type) + return false; + } + + /* Unless both lists are exhausted, they differ in length and, by + * definition, do not match. + */ + return (node_a->is_tail_sentinel() == node_b->is_tail_sentinel()); +} + +ir_function_signature * +ir_function::exact_matching_signature(_mesa_glsl_parse_state *state, + const exec_list *actual_parameters) +{ + foreach_in_list(ir_function_signature, sig, &this->signatures) { + /* Skip over any built-ins that aren't available in this shader. */ + if (sig->is_builtin() && !sig->is_builtin_available(state)) + continue; + + if (parameter_lists_match_exact(&sig->parameters, actual_parameters)) + return sig; + } + return NULL; +} diff --git a/src/compiler/glsl/ir_function_can_inline.cpp b/src/compiler/glsl/ir_function_can_inline.cpp new file mode 100644 index 00000000000..3b1d15f80fc --- /dev/null +++ b/src/compiler/glsl/ir_function_can_inline.cpp @@ -0,0 +1,75 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_function_can_inline.cpp + * + * Determines if we can inline a function call using ir_function_inlining.cpp. + * + * The primary restriction is that we can't return from the function other + * than as the last instruction. In lower_jumps.cpp, we can lower return + * statements not at the end of the function to other control flow in order to + * deal with this restriction. + */ + +#include "ir.h" + +class ir_function_can_inline_visitor : public ir_hierarchical_visitor { +public: + ir_function_can_inline_visitor() + { + this->num_returns = 0; + } + + virtual ir_visitor_status visit_enter(ir_return *); + + int num_returns; +}; + +ir_visitor_status +ir_function_can_inline_visitor::visit_enter(ir_return *ir) +{ + (void) ir; + this->num_returns++; + return visit_continue; +} + +bool +can_inline(ir_call *call) +{ + ir_function_can_inline_visitor v; + const ir_function_signature *callee = call->callee; + if (!callee->is_defined) + return false; + + v.run((exec_list *) &callee->body); + + /* If the function is empty (no last instruction) or does not end with a + * return statement, we need to count the implicit return. + */ + ir_instruction *last = (ir_instruction *)callee->body.get_tail(); + if (last == NULL || !last->as_return()) + v.num_returns++; + + return v.num_returns == 1; +} diff --git a/src/compiler/glsl/ir_function_detect_recursion.cpp b/src/compiler/glsl/ir_function_detect_recursion.cpp new file mode 100644 index 00000000000..b2334d2e403 --- /dev/null +++ b/src/compiler/glsl/ir_function_detect_recursion.cpp @@ -0,0 +1,358 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_function_detect_recursion.cpp + * Determine whether a shader contains static recursion. + * + * Consider the (possibly disjoint) graph of function calls in a shader. If a + * program contains recursion, this graph will contain a cycle. If a function + * is part of a cycle, it will have a caller and it will have a callee (it + * calls another function). + * + * To detect recursion, the function call graph is constructed. The graph is + * repeatedly reduced by removing any function that either has no callees + * (leaf functions) or has no caller. Eventually the only functions that + * remain will be the functions in the cycles. + * + * The GLSL spec is a bit wishy-washy about recursion. + * + * From page 39 (page 45 of the PDF) of the GLSL 1.10 spec: + * + * "Behavior is undefined if recursion is used. Recursion means having any + * function appearing more than once at any one time in the run-time stack + * of function calls. That is, a function may not call itself either + * directly or indirectly. Compilers may give diagnostic messages when + * this is detectable at compile time, but not all such cases can be + * detected at compile time." + * + * From page 79 (page 85 of the PDF): + * + * "22) Should recursion be supported? + * + * DISCUSSION: Probably not necessary, but another example of limiting + * the language based on how it would directly map to hardware. One + * thought is that recursion would benefit ray tracing shaders. On the + * other hand, many recursion operations can also be implemented with the + * user managing the recursion through arrays. RenderMan doesn't support + * recursion. This could be added at a later date, if it proved to be + * necessary. + * + * RESOLVED on September 10, 2002: Implementations are not required to + * support recursion. + * + * CLOSED on September 10, 2002." + * + * From page 79 (page 85 of the PDF): + * + * "56) Is it an error for an implementation to support recursion if the + * specification says recursion is not supported? + * + * ADDED on September 10, 2002. + * + * DISCUSSION: This issues is related to Issue (22). If we say that + * recursion (or some other piece of functionality) is not supported, is + * it an error for an implementation to support it? Perhaps the + * specification should remain silent on these kind of things so that they + * could be gracefully added later as an extension or as part of the + * standard. + * + * RESOLUTION: Languages, in general, have programs that are not + * well-formed in ways a compiler cannot detect. Portability is only + * ensured for well-formed programs. Detecting recursion is an example of + * this. The language will say a well-formed program may not recurse, but + * compilers are not forced to detect that recursion may happen. + * + * CLOSED: November 29, 2002." + * + * In GLSL 1.10 the behavior of recursion is undefined. Compilers don't have + * to reject shaders (at compile-time or link-time) that contain recursion. + * Instead they could work, or crash, or kill a kitten. + * + * From page 44 (page 50 of the PDF) of the GLSL 1.20 spec: + * + * "Recursion is not allowed, not even statically. Static recursion is + * present if the static function call graph of the program contains + * cycles." + * + * This langauge clears things up a bit, but it still leaves a lot of + * questions unanswered. + * + * - Is the error generated at compile-time or link-time? + * + * - Is it an error to have a recursive function that is never statically + * called by main or any function called directly or indirectly by main? + * Technically speaking, such a function is not in the "static function + * call graph of the program" at all. + * + * \bug + * If a shader has multiple cycles, this algorithm may erroneously complain + * about functions that aren't in any cycle, but are in the part of the call + * tree that connects them. For example, if the call graph consists of a + * cycle between A and B, and a cycle between D and E, and B also calls C + * which calls D, then this algorithm will report C as a function which "has + * static recursion" even though it is not part of any cycle. + * + * A better algorithm for cycle detection that doesn't have this drawback can + * be found here: + * + * http://en.wikipedia.org/wiki/Tarjan%E2%80%99s_strongly_connected_components_algorithm + * + * \author Ian Romanick + */ +#include "main/core.h" +#include "ir.h" +#include "glsl_parser_extras.h" +#include "linker.h" +#include "program/hash_table.h" +#include "program.h" + +namespace { + +struct call_node : public exec_node { + class function *func; +}; + +class function { +public: + function(ir_function_signature *sig) + : sig(sig) + { + /* empty */ + } + + DECLARE_RALLOC_CXX_OPERATORS(function) + + ir_function_signature *sig; + + /** List of functions called by this function. */ + exec_list callees; + + /** List of functions that call this function. */ + exec_list callers; +}; + +class has_recursion_visitor : public ir_hierarchical_visitor { +public: + has_recursion_visitor() + : current(NULL) + { + progress = false; + this->mem_ctx = ralloc_context(NULL); + this->function_hash = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~has_recursion_visitor() + { + hash_table_dtor(this->function_hash); + ralloc_free(this->mem_ctx); + } + + function *get_function(ir_function_signature *sig) + { + function *f = (function *) hash_table_find(this->function_hash, sig); + if (f == NULL) { + f = new(mem_ctx) function(sig); + hash_table_insert(this->function_hash, f, sig); + } + + return f; + } + + virtual ir_visitor_status visit_enter(ir_function_signature *sig) + { + this->current = this->get_function(sig); + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_function_signature *sig) + { + (void) sig; + this->current = NULL; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_call *call) + { + /* At global scope this->current will be NULL. Since there is no way to + * call global scope, it can never be part of a cycle. Don't bother + * adding calls from global scope to the graph. + */ + if (this->current == NULL) + return visit_continue; + + function *const target = this->get_function(call->callee); + + /* Create a link from the caller to the callee. + */ + call_node *node = new(mem_ctx) call_node; + node->func = target; + this->current->callees.push_tail(node); + + /* Create a link from the callee to the caller. + */ + node = new(mem_ctx) call_node; + node->func = this->current; + target->callers.push_tail(node); + return visit_continue; + } + + function *current; + struct hash_table *function_hash; + void *mem_ctx; + bool progress; +}; + +} /* anonymous namespace */ + +static void +destroy_links(exec_list *list, function *f) +{ + foreach_in_list_safe(call_node, node, list) { + /* If this is the right function, remove it. Note that the loop cannot + * terminate now. There can be multiple links to a function if it is + * either called multiple times or calls multiple times. + */ + if (node->func == f) + node->remove(); + } +} + + +/** + * Remove a function if it has either no in or no out links + */ +static void +remove_unlinked_functions(const void *key, void *data, void *closure) +{ + has_recursion_visitor *visitor = (has_recursion_visitor *) closure; + function *f = (function *) data; + + if (f->callers.is_empty() || f->callees.is_empty()) { + while (!f->callers.is_empty()) { + struct call_node *n = (struct call_node *) f->callers.pop_head(); + destroy_links(& n->func->callees, f); + } + + while (!f->callees.is_empty()) { + struct call_node *n = (struct call_node *) f->callees.pop_head(); + destroy_links(& n->func->callers, f); + } + + hash_table_remove(visitor->function_hash, key); + visitor->progress = true; + } +} + + +static void +emit_errors_unlinked(const void *key, void *data, void *closure) +{ + struct _mesa_glsl_parse_state *state = + (struct _mesa_glsl_parse_state *) closure; + function *f = (function *) data; + YYLTYPE loc; + + (void) key; + + char *proto = prototype_string(f->sig->return_type, + f->sig->function_name(), + &f->sig->parameters); + + memset(&loc, 0, sizeof(loc)); + _mesa_glsl_error(&loc, state, + "function `%s' has static recursion", + proto); + ralloc_free(proto); +} + + +static void +emit_errors_linked(const void *key, void *data, void *closure) +{ + struct gl_shader_program *prog = + (struct gl_shader_program *) closure; + function *f = (function *) data; + + (void) key; + + char *proto = prototype_string(f->sig->return_type, + f->sig->function_name(), + &f->sig->parameters); + + linker_error(prog, "function `%s' has static recursion.\n", proto); + ralloc_free(proto); +} + + +void +detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, + exec_list *instructions) +{ + has_recursion_visitor v; + + /* Collect all of the information about which functions call which other + * functions. + */ + v.run(instructions); + + /* Remove from the set all of the functions that either have no caller or + * call no other functions. Repeat until no functions are removed. + */ + do { + v.progress = false; + hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); + } while (v.progress); + + + /* At this point any functions still in the hash must be part of a cycle. + */ + hash_table_call_foreach(v.function_hash, emit_errors_unlinked, state); +} + + +void +detect_recursion_linked(struct gl_shader_program *prog, + exec_list *instructions) +{ + has_recursion_visitor v; + + /* Collect all of the information about which functions call which other + * functions. + */ + v.run(instructions); + + /* Remove from the set all of the functions that either have no caller or + * call no other functions. Repeat until no functions are removed. + */ + do { + v.progress = false; + hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); + } while (v.progress); + + + /* At this point any functions still in the hash must be part of a cycle. + */ + hash_table_call_foreach(v.function_hash, emit_errors_linked, prog); +} diff --git a/src/compiler/glsl/ir_function_inlining.h b/src/compiler/glsl/ir_function_inlining.h new file mode 100644 index 00000000000..6db011bbcae --- /dev/null +++ b/src/compiler/glsl/ir_function_inlining.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_function_inlining.h + * + * Replaces calls to functions with the body of the function. + */ + +bool can_inline(ir_call *call); diff --git a/src/compiler/glsl/ir_hierarchical_visitor.cpp b/src/compiler/glsl/ir_hierarchical_visitor.cpp new file mode 100644 index 00000000000..1d23a776643 --- /dev/null +++ b/src/compiler/glsl/ir_hierarchical_visitor.cpp @@ -0,0 +1,383 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_hierarchical_visitor.h" + +ir_hierarchical_visitor::ir_hierarchical_visitor() +{ + this->base_ir = NULL; + this->callback_enter = NULL; + this->callback_leave = NULL; + this->data_enter = NULL; + this->data_leave = NULL; + this->in_assignee = false; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_rvalue *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_variable *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_constant *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_loop_jump *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_dereference_variable *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit(ir_barrier *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_loop *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_loop *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_function_signature *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_function_signature *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_function *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_function *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_expression *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_expression *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_texture *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_texture *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_swizzle *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_swizzle *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_dereference_array *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_dereference_array *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_dereference_record *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_dereference_record *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_assignment *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_assignment *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_call *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_call *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_return *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_return *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_discard *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_discard *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_if *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_if *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_emit_vertex *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_emit_vertex *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_enter(ir_end_primitive *ir) +{ + if (this->callback_enter != NULL) + this->callback_enter(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_hierarchical_visitor::visit_leave(ir_end_primitive *ir) +{ + if (this->callback_leave != NULL) + this->callback_leave(ir, this->data_leave); + + return visit_continue; +} + +void +ir_hierarchical_visitor::run(exec_list *instructions) +{ + visit_list_elements(this, instructions); +} + + +void +visit_tree(ir_instruction *ir, + void (*callback_enter)(class ir_instruction *ir, void *data), + void *data_enter, + void (*callback_leave)(class ir_instruction *ir, void *data), + void *data_leave) +{ + ir_hierarchical_visitor v; + + v.callback_enter = callback_enter; + v.callback_leave = callback_leave; + v.data_enter = data_enter; + v.data_leave = data_leave; + + ir->accept(&v); +} diff --git a/src/compiler/glsl/ir_hierarchical_visitor.h b/src/compiler/glsl/ir_hierarchical_visitor.h new file mode 100644 index 00000000000..28517b6e4f4 --- /dev/null +++ b/src/compiler/glsl/ir_hierarchical_visitor.h @@ -0,0 +1,209 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_HIERARCHICAL_VISITOR_H +#define IR_HIERARCHICAL_VISITOR_H + +/** + * Enumeration values returned by visit methods to guide processing + */ +enum ir_visitor_status { + visit_continue, /**< Continue visiting as normal. */ + visit_continue_with_parent, /**< Don't visit siblings, continue w/parent. */ + visit_stop /**< Stop visiting immediately. */ +}; + + +#ifdef __cplusplus +/** + * Base class of hierarchical visitors of IR instruction trees + * + * Hierarchical visitors differ from traditional visitors in a couple of + * important ways. Rather than having a single \c visit method for each + * subclass in the composite, there are three kinds of visit methods. + * Leaf-node classes have a traditional \c visit method. Internal-node + * classes have a \c visit_enter method, which is invoked just before + * processing child nodes, and a \c visit_leave method which is invoked just + * after processing child nodes. + * + * In addition, each visit method and the \c accept methods in the composite + * have a return value which guides the navigation. Any of the visit methods + * can choose to continue visiting the tree as normal (by returning \c + * visit_continue), terminate visiting any further nodes immediately (by + * returning \c visit_stop), or stop visiting sibling nodes (by returning \c + * visit_continue_with_parent). + * + * These two changes combine to allow nagivation of children to be implemented + * in the composite's \c accept method. The \c accept method for a leaf-node + * class will simply call the \c visit method, as usual, and pass its return + * value on. The \c accept method for internal-node classes will call the \c + * visit_enter method, call the \c accept method of each child node, and, + * finally, call the \c visit_leave method. If any of these return a value + * other that \c visit_continue, the correct action must be taken. + * + * The final benefit is that the hierarchical visitor base class need not be + * abstract. Default implementations of every \c visit, \c visit_enter, and + * \c visit_leave method can be provided. By default each of these methods + * simply returns \c visit_continue. This allows a significant reduction in + * derived class code. + * + * For more information about hierarchical visitors, see: + * + * http://c2.com/cgi/wiki?HierarchicalVisitorPattern + * http://c2.com/cgi/wiki?HierarchicalVisitorDiscussion + */ + +class ir_hierarchical_visitor { +public: + ir_hierarchical_visitor(); + + /** + * \name Visit methods for leaf-node classes + */ + /*@{*/ + virtual ir_visitor_status visit(class ir_rvalue *); + virtual ir_visitor_status visit(class ir_variable *); + virtual ir_visitor_status visit(class ir_constant *); + virtual ir_visitor_status visit(class ir_loop_jump *); + virtual ir_visitor_status visit(class ir_barrier *); + + /** + * ir_dereference_variable isn't technically a leaf, but it is treated as a + * leaf here for a couple reasons. By not automatically visiting the one + * child ir_variable node from the ir_dereference_variable, ir_variable + * nodes can always be handled as variable declarations. Code that used + * non-hierarchical visitors had to set an "in a dereference" flag to + * determine how to handle an ir_variable. By forcing the visitor to + * handle the ir_variable within the ir_dereference_variable visitor, this + * kludge can be avoided. + * + * In addition, I can envision no use for having separate enter and leave + * methods. Anything that could be done in the enter and leave methods + * that couldn't just be done in the visit method. + */ + virtual ir_visitor_status visit(class ir_dereference_variable *); + /*@}*/ + + /** + * \name Visit methods for internal-node classes + */ + /*@{*/ + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_leave(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_leave(class ir_function_signature *); + virtual ir_visitor_status visit_enter(class ir_function *); + virtual ir_visitor_status visit_leave(class ir_function *); + virtual ir_visitor_status visit_enter(class ir_expression *); + virtual ir_visitor_status visit_leave(class ir_expression *); + virtual ir_visitor_status visit_enter(class ir_texture *); + virtual ir_visitor_status visit_leave(class ir_texture *); + virtual ir_visitor_status visit_enter(class ir_swizzle *); + virtual ir_visitor_status visit_leave(class ir_swizzle *); + virtual ir_visitor_status visit_enter(class ir_dereference_array *); + virtual ir_visitor_status visit_leave(class ir_dereference_array *); + virtual ir_visitor_status visit_enter(class ir_dereference_record *); + virtual ir_visitor_status visit_leave(class ir_dereference_record *); + virtual ir_visitor_status visit_enter(class ir_assignment *); + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_leave(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_return *); + virtual ir_visitor_status visit_leave(class ir_return *); + virtual ir_visitor_status visit_enter(class ir_discard *); + virtual ir_visitor_status visit_leave(class ir_discard *); + virtual ir_visitor_status visit_enter(class ir_if *); + virtual ir_visitor_status visit_leave(class ir_if *); + virtual ir_visitor_status visit_enter(class ir_emit_vertex *); + virtual ir_visitor_status visit_leave(class ir_emit_vertex *); + virtual ir_visitor_status visit_enter(class ir_end_primitive *); + virtual ir_visitor_status visit_leave(class ir_end_primitive *); + /*@}*/ + + + /** + * Utility function to process a linked list of instructions with a visitor + */ + void run(struct exec_list *instructions); + + /* Some visitors may need to insert new variable declarations and + * assignments for portions of a subtree, which means they need a + * pointer to the current instruction in the stream, not just their + * node in the tree rooted at that instruction. + * + * This is implemented by visit_list_elements -- if the visitor is + * not called by it, nothing good will happen. + */ + class ir_instruction *base_ir; + + /** + * Callback function that is invoked on entry to each node visited. + * + * \warning + * Visitor classes derived from \c ir_hierarchical_visitor \b may \b not + * invoke this function. This can be used, for example, to cause the + * callback to be invoked on every node type except one. + */ + void (*callback_enter)(class ir_instruction *ir, void *data); + + /** + * Callback function that is invoked on exit of each node visited. + * + * \warning + * Visitor classes derived from \c ir_hierarchical_visitor \b may \b not + * invoke this function. This can be used, for example, to cause the + * callback to be invoked on every node type except one. + */ + void (*callback_leave)(class ir_instruction *ir, void *data); + + /** + * Extra data parameter passed to the per-node callback_enter function + */ + void *data_enter; + + /** + * Extra data parameter passed to the per-node callback_leave function + */ + void *data_leave; + + /** + * Currently in the LHS of an assignment? + * + * This is set and cleared by the \c ir_assignment::accept method. + */ + bool in_assignee; +}; + +void visit_tree(ir_instruction *ir, + void (*callback_enter)(class ir_instruction *ir, void *data), + void *data_enter, + void (*callback_leave)(class ir_instruction *ir, void *data) = NULL, + void *data_leave = NULL); + +ir_visitor_status visit_list_elements(ir_hierarchical_visitor *v, exec_list *l, + bool statement_list = true); +#endif /* __cplusplus */ + +#endif /* IR_HIERARCHICAL_VISITOR_H */ diff --git a/src/compiler/glsl/ir_hv_accept.cpp b/src/compiler/glsl/ir_hv_accept.cpp new file mode 100644 index 00000000000..213992af28c --- /dev/null +++ b/src/compiler/glsl/ir_hv_accept.cpp @@ -0,0 +1,439 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" + +/** + * \file ir_hv_accept.cpp + * Implementations of all hierarchical visitor accept methods for IR + * instructions. + */ + +/** + * Process a list of nodes using a hierarchical vistor. + * + * If statement_list is true (the default), this is a list of statements, so + * v->base_ir will be set to point to each statement just before iterating + * over it, and restored after iteration is complete. If statement_list is + * false, this is a list that appears inside a statement (e.g. a parameter + * list), so v->base_ir will be left alone. + * + * \warning + * This function will operate correctly if a node being processed is removed + * from the list. However, if nodes are added to the list after the node being + * processed, some of the added nodes may not be processed. + */ +ir_visitor_status +visit_list_elements(ir_hierarchical_visitor *v, exec_list *l, + bool statement_list) +{ + ir_instruction *prev_base_ir = v->base_ir; + + foreach_in_list_safe(ir_instruction, ir, l) { + if (statement_list) + v->base_ir = ir; + ir_visitor_status s = ir->accept(v); + + if (s != visit_continue) + return s; + } + if (statement_list) + v->base_ir = prev_base_ir; + + return visit_continue; +} + + +ir_visitor_status +ir_rvalue::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_variable::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_loop::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = visit_list_elements(v, &this->body_instructions); + if (s == visit_stop) + return s; + + return v->visit_leave(this); +} + + +ir_visitor_status +ir_loop_jump::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_function_signature::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = visit_list_elements(v, &this->parameters); + if (s == visit_stop) + return s; + + s = visit_list_elements(v, &this->body); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_function::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = visit_list_elements(v, &this->signatures, false); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_expression::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + for (unsigned i = 0; i < this->get_num_operands(); i++) { + switch (this->operands[i]->accept(v)) { + case visit_continue: + break; + + case visit_continue_with_parent: + // I wish for Java's labeled break-statement here. + goto done; + + case visit_stop: + return s; + } + } + +done: + return v->visit_leave(this); +} + +ir_visitor_status +ir_texture::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->sampler->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (this->coordinate) { + s = this->coordinate->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + if (this->projector) { + s = this->projector->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + if (this->shadow_comparitor) { + s = this->shadow_comparitor->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + if (this->offset) { + s = this->offset->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + switch (this->op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + s = this->lod_info.bias->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + case ir_txl: + case ir_txf: + case ir_txs: + s = this->lod_info.lod->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + case ir_txf_ms: + s = this->lod_info.sample_index->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + case ir_txd: + s = this->lod_info.grad.dPdx->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->lod_info.grad.dPdy->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + case ir_tg4: + s = this->lod_info.component->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + break; + } + + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_swizzle::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->val->accept(v); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_dereference_variable::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_dereference_array::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + /* The array index is not the target of the assignment, so clear the + * 'in_assignee' flag. Restore it after returning from the array index. + */ + const bool was_in_assignee = v->in_assignee; + v->in_assignee = false; + s = this->array_index->accept(v); + v->in_assignee = was_in_assignee; + + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->array->accept(v); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_dereference_record::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->record->accept(v); + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_assignment::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + v->in_assignee = true; + s = this->lhs->accept(v); + v->in_assignee = false; + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->rhs->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (this->condition) + s = this->condition->accept(v); + + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_constant::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} + + +ir_visitor_status +ir_call::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (this->return_deref != NULL) { + v->in_assignee = true; + s = this->return_deref->accept(v); + v->in_assignee = false; + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + s = visit_list_elements(v, &this->actual_parameters, false); + if (s == visit_stop) + return s; + + return v->visit_leave(this); +} + + +ir_visitor_status +ir_return::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + ir_rvalue *val = this->get_value(); + if (val) { + s = val->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + return v->visit_leave(this); +} + + +ir_visitor_status +ir_discard::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (this->condition != NULL) { + s = this->condition->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + } + + return v->visit_leave(this); +} + + +ir_visitor_status +ir_if::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->condition->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + if (s != visit_continue_with_parent) { + s = visit_list_elements(v, &this->then_instructions); + if (s == visit_stop) + return s; + } + + if (s != visit_continue_with_parent) { + s = visit_list_elements(v, &this->else_instructions); + if (s == visit_stop) + return s; + } + + return v->visit_leave(this); +} + +ir_visitor_status +ir_emit_vertex::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->stream->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + return (s == visit_stop) ? s : v->visit_leave(this); +} + + +ir_visitor_status +ir_end_primitive::accept(ir_hierarchical_visitor *v) +{ + ir_visitor_status s = v->visit_enter(this); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + s = this->stream->accept(v); + if (s != visit_continue) + return (s == visit_continue_with_parent) ? visit_continue : s; + + return (s == visit_stop) ? s : v->visit_leave(this); +} + +ir_visitor_status +ir_barrier::accept(ir_hierarchical_visitor *v) +{ + return v->visit(this); +} diff --git a/src/compiler/glsl/ir_import_prototypes.cpp b/src/compiler/glsl/ir_import_prototypes.cpp new file mode 100644 index 00000000000..b0429fbc3af --- /dev/null +++ b/src/compiler/glsl/ir_import_prototypes.cpp @@ -0,0 +1,125 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_import_prototypes.cpp + * Import function prototypes from one IR tree into another. + * + * \author Ian Romanick + */ +#include "ir.h" +#include "glsl_symbol_table.h" + +namespace { + +/** + * Visitor used to import function prototypes + * + * Normally the \c clone method of either \c ir_function or + * \c ir_function_signature could be used. However, we don't want a complete + * clone of the \c ir_function_signature. We want everything \b except the + * body of the function. + */ +class import_prototype_visitor : public ir_hierarchical_visitor { +public: + /** + */ + import_prototype_visitor(exec_list *list, glsl_symbol_table *symbols, + void *mem_ctx) + { + this->mem_ctx = mem_ctx; + this->list = list; + this->symbols = symbols; + this->function = NULL; + } + + virtual ir_visitor_status visit_enter(ir_function *ir) + { + assert(this->function == NULL); + + this->function = this->symbols->get_function(ir->name); + if (!this->function) { + this->function = new(this->mem_ctx) ir_function(ir->name); + + list->push_tail(this->function); + + /* Add the new function to the symbol table. + */ + this->symbols->add_function(this->function); + } + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_function *ir) + { + (void) ir; + assert(this->function != NULL); + + this->function = NULL; + return visit_continue; + } + + ir_visitor_status visit_enter(ir_function_signature *ir) + { + assert(this->function != NULL); + + ir_function_signature *copy = ir->clone_prototype(mem_ctx, NULL); + + this->function->add_signature(copy); + + /* Do not process child nodes of the ir_function_signature. There can + * never be any nodes inside the ir_function_signature that we care + * about. Instead continue with the next sibling. + */ + return visit_continue_with_parent; + } + +private: + exec_list *list; + ir_function *function; + glsl_symbol_table *symbols; + void *mem_ctx; +}; + +} /* anonymous namespace */ + +/** + * Import function prototypes from one IR tree into another + * + * \param source Source instruction stream containing functions whose + * prototypes are to be imported + * \param dest Destination instruction stream where new \c ir_function and + * \c ir_function_signature nodes will be stored + * \param symbols Symbol table where new functions will be stored + * \param mem_ctx ralloc memory context used for new allocations + */ +void +import_prototypes(const exec_list *source, exec_list *dest, + glsl_symbol_table *symbols, void *mem_ctx) +{ + import_prototype_visitor v(dest, symbols, mem_ctx); + + /* Making source be const is just extra documentation. + */ + v.run(const_cast(source)); +} diff --git a/src/compiler/glsl/ir_optimization.h b/src/compiler/glsl/ir_optimization.h new file mode 100644 index 00000000000..be86f547f77 --- /dev/null +++ b/src/compiler/glsl/ir_optimization.h @@ -0,0 +1,147 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + +/** + * \file ir_optimization.h + * + * Prototypes for optimization passes to be called by the compiler and drivers. + */ + +/* Operations for lower_instructions() */ +#define SUB_TO_ADD_NEG 0x01 +#define DIV_TO_MUL_RCP 0x02 +#define EXP_TO_EXP2 0x04 +#define POW_TO_EXP2 0x08 +#define LOG_TO_LOG2 0x10 +#define MOD_TO_FLOOR 0x20 +#define INT_DIV_TO_MUL_RCP 0x40 +#define LDEXP_TO_ARITH 0x80 +#define CARRY_TO_ARITH 0x100 +#define BORROW_TO_ARITH 0x200 +#define SAT_TO_CLAMP 0x400 +#define DOPS_TO_DFRAC 0x800 +#define DFREXP_DLDEXP_TO_ARITH 0x1000 + +/** + * \see class lower_packing_builtins_visitor + */ +enum lower_packing_builtins_op { + LOWER_PACK_UNPACK_NONE = 0x0000, + + LOWER_PACK_SNORM_2x16 = 0x0001, + LOWER_UNPACK_SNORM_2x16 = 0x0002, + + LOWER_PACK_UNORM_2x16 = 0x0004, + LOWER_UNPACK_UNORM_2x16 = 0x0008, + + LOWER_PACK_HALF_2x16 = 0x0010, + LOWER_UNPACK_HALF_2x16 = 0x0020, + + LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040, + LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, + + LOWER_PACK_SNORM_4x8 = 0x0100, + LOWER_UNPACK_SNORM_4x8 = 0x0200, + + LOWER_PACK_UNORM_4x8 = 0x0400, + LOWER_UNPACK_UNORM_4x8 = 0x0800, + + LOWER_PACK_USE_BFI = 0x1000, + LOWER_PACK_USE_BFE = 0x2000, +}; + +bool do_common_optimization(exec_list *ir, bool linked, + bool uniform_locations_assigned, + const struct gl_shader_compiler_options *options, + bool native_integers); + +bool do_rebalance_tree(exec_list *instructions); +bool do_algebraic(exec_list *instructions, bool native_integers, + const struct gl_shader_compiler_options *options); +bool opt_conditional_discard(exec_list *instructions); +bool do_constant_folding(exec_list *instructions); +bool do_constant_variable(exec_list *instructions); +bool do_constant_variable_unlinked(exec_list *instructions); +bool do_copy_propagation(exec_list *instructions); +bool do_copy_propagation_elements(exec_list *instructions); +bool do_constant_propagation(exec_list *instructions); +void do_dead_builtin_varyings(struct gl_context *ctx, + gl_shader *producer, gl_shader *consumer, + unsigned num_tfeedback_decls, + class tfeedback_decl *tfeedback_decls); +bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned); +bool do_dead_code_local(exec_list *instructions); +bool do_dead_code_unlinked(exec_list *instructions); +bool do_dead_functions(exec_list *instructions); +bool opt_flip_matrices(exec_list *instructions); +bool do_function_inlining(exec_list *instructions); +bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false); +bool do_lower_texture_projection(exec_list *instructions); +bool do_if_simplification(exec_list *instructions); +bool opt_flatten_nested_if_blocks(exec_list *instructions); +bool do_discard_simplification(exec_list *instructions); +bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0); +bool do_mat_op_to_vec(exec_list *instructions); +bool do_minmax_prune(exec_list *instructions); +bool do_noop_swizzle(exec_list *instructions); +bool do_structure_splitting(exec_list *instructions); +bool do_swizzle_swizzle(exec_list *instructions); +bool do_vectorize(exec_list *instructions); +bool do_tree_grafting(exec_list *instructions); +bool do_vec_index_to_cond_assign(exec_list *instructions); +bool do_vec_index_to_swizzle(exec_list *instructions); +bool lower_discard(exec_list *instructions); +void lower_discard_flow(exec_list *instructions); +bool lower_instructions(exec_list *instructions, unsigned what_to_lower); +bool lower_noise(exec_list *instructions); +bool lower_variable_index_to_cond_assign(gl_shader_stage stage, + exec_list *instructions, bool lower_input, bool lower_output, + bool lower_temp, bool lower_uniform); +bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); +bool lower_const_arrays_to_uniforms(exec_list *instructions); +bool lower_clip_distance(gl_shader *shader); +void lower_output_reads(unsigned stage, exec_list *instructions); +bool lower_packing_builtins(exec_list *instructions, int op_mask); +void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size); +void lower_ubo_reference(struct gl_shader *shader); +void lower_packed_varyings(void *mem_ctx, + unsigned locations_used, ir_variable_mode mode, + unsigned gs_input_vertices, gl_shader *shader); +bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index); +bool lower_vector_derefs(gl_shader *shader); +void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader); +bool optimize_redundant_jumps(exec_list *instructions); +bool optimize_split_arrays(exec_list *instructions, bool linked); +bool lower_offset_arrays(exec_list *instructions); +void optimize_dead_builtin_variables(exec_list *instructions, + enum ir_variable_mode other); +bool lower_tess_level(gl_shader *shader); + +bool lower_vertex_id(gl_shader *shader); + +bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state); + +ir_rvalue * +compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx); diff --git a/src/compiler/glsl/ir_print_visitor.cpp b/src/compiler/glsl/ir_print_visitor.cpp new file mode 100644 index 00000000000..960b23fe0ed --- /dev/null +++ b/src/compiler/glsl/ir_print_visitor.cpp @@ -0,0 +1,604 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir_print_visitor.h" +#include "compiler/glsl_types.h" +#include "glsl_parser_extras.h" +#include "main/macros.h" +#include "util/hash_table.h" + +static void print_type(FILE *f, const glsl_type *t); + +void +ir_instruction::print(void) const +{ + this->fprint(stdout); +} + +void +ir_instruction::fprint(FILE *f) const +{ + ir_instruction *deconsted = const_cast(this); + + ir_print_visitor v(f); + deconsted->accept(&v); +} + +extern "C" { +void +_mesa_print_ir(FILE *f, exec_list *instructions, + struct _mesa_glsl_parse_state *state) +{ + if (state) { + for (unsigned i = 0; i < state->num_user_structures; i++) { + const glsl_type *const s = state->user_structures[i]; + + fprintf(f, "(structure (%s) (%s@%p) (%u) (\n", + s->name, s->name, (void *) s, s->length); + + for (unsigned j = 0; j < s->length; j++) { + fprintf(f, "\t(("); + print_type(f, s->fields.structure[j].type); + fprintf(f, ")(%s))\n", s->fields.structure[j].name); + } + + fprintf(f, ")\n"); + } + } + + fprintf(f, "(\n"); + foreach_in_list(ir_instruction, ir, instructions) { + ir->fprint(f); + if (ir->ir_type != ir_type_function) + fprintf(f, "\n"); + } + fprintf(f, ")\n"); +} + +void +fprint_ir(FILE *f, const void *instruction) +{ + const ir_instruction *ir = (const ir_instruction *)instruction; + ir->fprint(f); +} + +} /* extern "C" */ + +ir_print_visitor::ir_print_visitor(FILE *f) + : f(f) +{ + indentation = 0; + printable_names = + _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); + symbols = _mesa_symbol_table_ctor(); + mem_ctx = ralloc_context(NULL); +} + +ir_print_visitor::~ir_print_visitor() +{ + _mesa_hash_table_destroy(printable_names, NULL); + _mesa_symbol_table_dtor(symbols); + ralloc_free(mem_ctx); +} + +void ir_print_visitor::indent(void) +{ + for (int i = 0; i < indentation; i++) + fprintf(f, " "); +} + +const char * +ir_print_visitor::unique_name(ir_variable *var) +{ + /* var->name can be NULL in function prototypes when a type is given for a + * parameter but no name is given. In that case, just return an empty + * string. Don't worry about tracking the generated name in the printable + * names hash because this is the only scope where it can ever appear. + */ + if (var->name == NULL) { + static unsigned arg = 1; + return ralloc_asprintf(this->mem_ctx, "parameter@%u", arg++); + } + + /* Do we already have a name for this variable? */ + struct hash_entry * entry = + _mesa_hash_table_search(this->printable_names, var); + + if (entry != NULL) { + return (const char *) entry->data; + } + + /* If there's no conflict, just use the original name */ + const char* name = NULL; + if (_mesa_symbol_table_find_symbol(this->symbols, -1, var->name) == NULL) { + name = var->name; + } else { + static unsigned i = 1; + name = ralloc_asprintf(this->mem_ctx, "%s@%u", var->name, ++i); + } + _mesa_hash_table_insert(this->printable_names, var, (void *) name); + _mesa_symbol_table_add_symbol(this->symbols, -1, name, var); + return name; +} + +static void +print_type(FILE *f, const glsl_type *t) +{ + if (t->base_type == GLSL_TYPE_ARRAY) { + fprintf(f, "(array "); + print_type(f, t->fields.array); + fprintf(f, " %u)", t->length); + } else if ((t->base_type == GLSL_TYPE_STRUCT) + && !is_gl_identifier(t->name)) { + fprintf(f, "%s@%p", t->name, (void *) t); + } else { + fprintf(f, "%s", t->name); + } +} + +void ir_print_visitor::visit(ir_rvalue *) +{ + fprintf(f, "error"); +} + +void ir_print_visitor::visit(ir_variable *ir) +{ + fprintf(f, "(declare "); + + char loc[256] = {0}; + if (ir->data.location != -1) + snprintf(loc, sizeof(loc), "location=%i ", ir->data.location); + + const char *const cent = (ir->data.centroid) ? "centroid " : ""; + const char *const samp = (ir->data.sample) ? "sample " : ""; + const char *const patc = (ir->data.patch) ? "patch " : ""; + const char *const inv = (ir->data.invariant) ? "invariant " : ""; + const char *const mode[] = { "", "uniform ", "shader_storage ", + "shader_shared ", "shader_in ", "shader_out ", + "in ", "out ", "inout ", + "const_in ", "sys ", "temporary " }; + STATIC_ASSERT(ARRAY_SIZE(mode) == ir_var_mode_count); + const char *const stream [] = {"", "stream1 ", "stream2 ", "stream3 "}; + const char *const interp[] = { "", "smooth", "flat", "noperspective" }; + STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_QUALIFIER_COUNT); + + fprintf(f, "(%s%s%s%s%s%s%s%s) ", + loc, cent, samp, patc, inv, mode[ir->data.mode], + stream[ir->data.stream], + interp[ir->data.interpolation]); + + print_type(f, ir->type); + fprintf(f, " %s)", unique_name(ir)); +} + + +void ir_print_visitor::visit(ir_function_signature *ir) +{ + _mesa_symbol_table_push_scope(symbols); + fprintf(f, "(signature "); + indentation++; + + print_type(f, ir->return_type); + fprintf(f, "\n"); + indent(); + + fprintf(f, "(parameters\n"); + indentation++; + + foreach_in_list(ir_variable, inst, &ir->parameters) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + indentation--; + + indent(); + fprintf(f, ")\n"); + + indent(); + + fprintf(f, "(\n"); + indentation++; + + foreach_in_list(ir_instruction, inst, &ir->body) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + indentation--; + indent(); + fprintf(f, "))\n"); + indentation--; + _mesa_symbol_table_pop_scope(symbols); +} + + +void ir_print_visitor::visit(ir_function *ir) +{ + fprintf(f, "(%s function %s\n", ir->is_subroutine ? "subroutine" : "", ir->name); + indentation++; + foreach_in_list(ir_function_signature, sig, &ir->signatures) { + indent(); + sig->accept(this); + fprintf(f, "\n"); + } + indentation--; + indent(); + fprintf(f, ")\n\n"); +} + + +void ir_print_visitor::visit(ir_expression *ir) +{ + fprintf(f, "(expression "); + + print_type(f, ir->type); + + fprintf(f, " %s ", ir->operator_string()); + + for (unsigned i = 0; i < ir->get_num_operands(); i++) { + ir->operands[i]->accept(this); + } + + fprintf(f, ") "); +} + + +void ir_print_visitor::visit(ir_texture *ir) +{ + fprintf(f, "(%s ", ir->opcode_string()); + + if (ir->op == ir_samples_identical) { + ir->sampler->accept(this); + fprintf(f, " "); + ir->coordinate->accept(this); + fprintf(f, ")"); + return; + } + + print_type(f, ir->type); + fprintf(f, " "); + + ir->sampler->accept(this); + fprintf(f, " "); + + if (ir->op != ir_txs && ir->op != ir_query_levels && + ir->op != ir_texture_samples) { + ir->coordinate->accept(this); + + fprintf(f, " "); + + if (ir->offset != NULL) { + ir->offset->accept(this); + } else { + fprintf(f, "0"); + } + + fprintf(f, " "); + } + + if (ir->op != ir_txf && ir->op != ir_txf_ms && + ir->op != ir_txs && ir->op != ir_tg4 && + ir->op != ir_query_levels && ir->op != ir_texture_samples) { + if (ir->projector) + ir->projector->accept(this); + else + fprintf(f, "1"); + + if (ir->shadow_comparitor) { + fprintf(f, " "); + ir->shadow_comparitor->accept(this); + } else { + fprintf(f, " ()"); + } + } + + fprintf(f, " "); + switch (ir->op) + { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + break; + case ir_txb: + ir->lod_info.bias->accept(this); + break; + case ir_txl: + case ir_txf: + case ir_txs: + ir->lod_info.lod->accept(this); + break; + case ir_txf_ms: + ir->lod_info.sample_index->accept(this); + break; + case ir_txd: + fprintf(f, "("); + ir->lod_info.grad.dPdx->accept(this); + fprintf(f, " "); + ir->lod_info.grad.dPdy->accept(this); + fprintf(f, ")"); + break; + case ir_tg4: + ir->lod_info.component->accept(this); + break; + case ir_samples_identical: + unreachable(!"ir_samples_identical was already handled"); + }; + fprintf(f, ")"); +} + + +void ir_print_visitor::visit(ir_swizzle *ir) +{ + const unsigned swiz[4] = { + ir->mask.x, + ir->mask.y, + ir->mask.z, + ir->mask.w, + }; + + fprintf(f, "(swiz "); + for (unsigned i = 0; i < ir->mask.num_components; i++) { + fprintf(f, "%c", "xyzw"[swiz[i]]); + } + fprintf(f, " "); + ir->val->accept(this); + fprintf(f, ")"); +} + + +void ir_print_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *var = ir->variable_referenced(); + fprintf(f, "(var_ref %s) ", unique_name(var)); +} + + +void ir_print_visitor::visit(ir_dereference_array *ir) +{ + fprintf(f, "(array_ref "); + ir->array->accept(this); + ir->array_index->accept(this); + fprintf(f, ") "); +} + + +void ir_print_visitor::visit(ir_dereference_record *ir) +{ + fprintf(f, "(record_ref "); + ir->record->accept(this); + fprintf(f, " %s) ", ir->field); +} + + +void ir_print_visitor::visit(ir_assignment *ir) +{ + fprintf(f, "(assign "); + + if (ir->condition) + ir->condition->accept(this); + + char mask[5]; + unsigned j = 0; + + for (unsigned i = 0; i < 4; i++) { + if ((ir->write_mask & (1 << i)) != 0) { + mask[j] = "xyzw"[i]; + j++; + } + } + mask[j] = '\0'; + + fprintf(f, " (%s) ", mask); + + ir->lhs->accept(this); + + fprintf(f, " "); + + ir->rhs->accept(this); + fprintf(f, ") "); +} + + +void ir_print_visitor::visit(ir_constant *ir) +{ + fprintf(f, "(constant "); + print_type(f, ir->type); + fprintf(f, " ("); + + if (ir->type->is_array()) { + for (unsigned i = 0; i < ir->type->length; i++) + ir->get_array_element(i)->accept(this); + } else if (ir->type->is_record()) { + ir_constant *value = (ir_constant *) ir->components.get_head(); + for (unsigned i = 0; i < ir->type->length; i++) { + fprintf(f, "(%s ", ir->type->fields.structure[i].name); + value->accept(this); + fprintf(f, ")"); + + value = (ir_constant *) value->next; + } + } else { + for (unsigned i = 0; i < ir->type->components(); i++) { + if (i != 0) + fprintf(f, " "); + switch (ir->type->base_type) { + case GLSL_TYPE_UINT: fprintf(f, "%u", ir->value.u[i]); break; + case GLSL_TYPE_INT: fprintf(f, "%d", ir->value.i[i]); break; + case GLSL_TYPE_FLOAT: + if (ir->value.f[i] == 0.0f) + /* 0.0 == -0.0, so print with %f to get the proper sign. */ + fprintf(f, "%f", ir->value.f[i]); + else if (fabs(ir->value.f[i]) < 0.000001f) + fprintf(f, "%a", ir->value.f[i]); + else if (fabs(ir->value.f[i]) > 1000000.0f) + fprintf(f, "%e", ir->value.f[i]); + else + fprintf(f, "%f", ir->value.f[i]); + break; + case GLSL_TYPE_BOOL: fprintf(f, "%d", ir->value.b[i]); break; + case GLSL_TYPE_DOUBLE: + if (ir->value.d[i] == 0.0) + /* 0.0 == -0.0, so print with %f to get the proper sign. */ + fprintf(f, "%.1f", ir->value.d[i]); + else if (fabs(ir->value.d[i]) < 0.000001) + fprintf(f, "%a", ir->value.d[i]); + else if (fabs(ir->value.d[i]) > 1000000.0) + fprintf(f, "%e", ir->value.d[i]); + else + fprintf(f, "%f", ir->value.d[i]); + break; + default: assert(0); + } + } + } + fprintf(f, ")) "); +} + + +void +ir_print_visitor::visit(ir_call *ir) +{ + fprintf(f, "(call %s ", ir->callee_name()); + if (ir->return_deref) + ir->return_deref->accept(this); + fprintf(f, " ("); + foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { + param->accept(this); + } + fprintf(f, "))\n"); +} + + +void +ir_print_visitor::visit(ir_return *ir) +{ + fprintf(f, "(return"); + + ir_rvalue *const value = ir->get_value(); + if (value) { + fprintf(f, " "); + value->accept(this); + } + + fprintf(f, ")"); +} + + +void +ir_print_visitor::visit(ir_discard *ir) +{ + fprintf(f, "(discard "); + + if (ir->condition != NULL) { + fprintf(f, " "); + ir->condition->accept(this); + } + + fprintf(f, ")"); +} + + +void +ir_print_visitor::visit(ir_if *ir) +{ + fprintf(f, "(if "); + ir->condition->accept(this); + + fprintf(f, "(\n"); + indentation++; + + foreach_in_list(ir_instruction, inst, &ir->then_instructions) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + + indentation--; + indent(); + fprintf(f, ")\n"); + + indent(); + if (!ir->else_instructions.is_empty()) { + fprintf(f, "(\n"); + indentation++; + + foreach_in_list(ir_instruction, inst, &ir->else_instructions) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + indentation--; + indent(); + fprintf(f, "))\n"); + } else { + fprintf(f, "())\n"); + } +} + + +void +ir_print_visitor::visit(ir_loop *ir) +{ + fprintf(f, "(loop (\n"); + indentation++; + + foreach_in_list(ir_instruction, inst, &ir->body_instructions) { + indent(); + inst->accept(this); + fprintf(f, "\n"); + } + indentation--; + indent(); + fprintf(f, "))\n"); +} + + +void +ir_print_visitor::visit(ir_loop_jump *ir) +{ + fprintf(f, "%s", ir->is_break() ? "break" : "continue"); +} + +void +ir_print_visitor::visit(ir_emit_vertex *ir) +{ + fprintf(f, "(emit-vertex "); + ir->stream->accept(this); + fprintf(f, ")\n"); +} + +void +ir_print_visitor::visit(ir_end_primitive *ir) +{ + fprintf(f, "(end-primitive "); + ir->stream->accept(this); + fprintf(f, ")\n"); +} + +void +ir_print_visitor::visit(ir_barrier *) +{ + fprintf(f, "(barrier)\n"); +} diff --git a/src/compiler/glsl/ir_print_visitor.h b/src/compiler/glsl/ir_print_visitor.h new file mode 100644 index 00000000000..965e63ade8b --- /dev/null +++ b/src/compiler/glsl/ir_print_visitor.h @@ -0,0 +1,96 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_PRINT_VISITOR_H +#define IR_PRINT_VISITOR_H + +#include "ir.h" +#include "ir_visitor.h" + +extern "C" { +#include "program/symbol_table.h" +} + +/** + * Abstract base class of visitors of IR instruction trees + */ +class ir_print_visitor : public ir_visitor { +public: + ir_print_visitor(FILE *f); + virtual ~ir_print_visitor(); + + void indent(void); + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(ir_rvalue *); + virtual void visit(ir_variable *); + virtual void visit(ir_function_signature *); + virtual void visit(ir_function *); + virtual void visit(ir_expression *); + virtual void visit(ir_texture *); + virtual void visit(ir_swizzle *); + virtual void visit(ir_dereference_variable *); + virtual void visit(ir_dereference_array *); + virtual void visit(ir_dereference_record *); + virtual void visit(ir_assignment *); + virtual void visit(ir_constant *); + virtual void visit(ir_call *); + virtual void visit(ir_return *); + virtual void visit(ir_discard *); + virtual void visit(ir_if *); + virtual void visit(ir_loop *); + virtual void visit(ir_loop_jump *); + virtual void visit(ir_emit_vertex *); + virtual void visit(ir_end_primitive *); + virtual void visit(ir_barrier *); + /*@}*/ + +private: + /** + * Fetch/generate a unique name for ir_variable. + * + * GLSL IR permits multiple ir_variables to share the same name. This works + * fine until we try to print it, when we really need a unique one. + */ + const char *unique_name(ir_variable *var); + + /** A mapping from ir_variable * -> unique printable names. */ + hash_table *printable_names; + _mesa_symbol_table *symbols; + + void *mem_ctx; + FILE *f; + + int indentation; +}; + +#endif /* IR_PRINT_VISITOR_H */ diff --git a/src/compiler/glsl/ir_reader.cpp b/src/compiler/glsl/ir_reader.cpp new file mode 100644 index 00000000000..15315aac522 --- /dev/null +++ b/src/compiler/glsl/ir_reader.cpp @@ -0,0 +1,1167 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir_reader.h" +#include "glsl_parser_extras.h" +#include "compiler/glsl_types.h" +#include "s_expression.h" + +static const bool debug = false; + +namespace { + +class ir_reader { +public: + ir_reader(_mesa_glsl_parse_state *); + + void read(exec_list *instructions, const char *src, bool scan_for_protos); + +private: + void *mem_ctx; + _mesa_glsl_parse_state *state; + + void ir_read_error(s_expression *, const char *fmt, ...); + + const glsl_type *read_type(s_expression *); + + void scan_for_prototypes(exec_list *, s_expression *); + ir_function *read_function(s_expression *, bool skip_body); + void read_function_sig(ir_function *, s_expression *, bool skip_body); + + void read_instructions(exec_list *, s_expression *, ir_loop *); + ir_instruction *read_instruction(s_expression *, ir_loop *); + ir_variable *read_declaration(s_expression *); + ir_if *read_if(s_expression *, ir_loop *); + ir_loop *read_loop(s_expression *); + ir_call *read_call(s_expression *); + ir_return *read_return(s_expression *); + ir_rvalue *read_rvalue(s_expression *); + ir_assignment *read_assignment(s_expression *); + ir_expression *read_expression(s_expression *); + ir_swizzle *read_swizzle(s_expression *); + ir_constant *read_constant(s_expression *); + ir_texture *read_texture(s_expression *); + ir_emit_vertex *read_emit_vertex(s_expression *); + ir_end_primitive *read_end_primitive(s_expression *); + ir_barrier *read_barrier(s_expression *); + + ir_dereference *read_dereference(s_expression *); + ir_dereference_variable *read_var_ref(s_expression *); +}; + +} /* anonymous namespace */ + +ir_reader::ir_reader(_mesa_glsl_parse_state *state) : state(state) +{ + this->mem_ctx = state; +} + +void +_mesa_glsl_read_ir(_mesa_glsl_parse_state *state, exec_list *instructions, + const char *src, bool scan_for_protos) +{ + ir_reader r(state); + r.read(instructions, src, scan_for_protos); +} + +void +ir_reader::read(exec_list *instructions, const char *src, bool scan_for_protos) +{ + void *sx_mem_ctx = ralloc_context(NULL); + s_expression *expr = s_expression::read_expression(sx_mem_ctx, src); + if (expr == NULL) { + ir_read_error(NULL, "couldn't parse S-Expression."); + return; + } + + if (scan_for_protos) { + scan_for_prototypes(instructions, expr); + if (state->error) + return; + } + + read_instructions(instructions, expr, NULL); + ralloc_free(sx_mem_ctx); + + if (debug) + validate_ir_tree(instructions); +} + +void +ir_reader::ir_read_error(s_expression *expr, const char *fmt, ...) +{ + va_list ap; + + state->error = true; + + if (state->current_function != NULL) + ralloc_asprintf_append(&state->info_log, "In function %s:\n", + state->current_function->function_name()); + ralloc_strcat(&state->info_log, "error: "); + + va_start(ap, fmt); + ralloc_vasprintf_append(&state->info_log, fmt, ap); + va_end(ap); + ralloc_strcat(&state->info_log, "\n"); + + if (expr != NULL) { + ralloc_strcat(&state->info_log, "...in this context:\n "); + expr->print(); + ralloc_strcat(&state->info_log, "\n\n"); + } +} + +const glsl_type * +ir_reader::read_type(s_expression *expr) +{ + s_expression *s_base_type; + s_int *s_size; + + s_pattern pat[] = { "array", s_base_type, s_size }; + if (MATCH(expr, pat)) { + const glsl_type *base_type = read_type(s_base_type); + if (base_type == NULL) { + ir_read_error(NULL, "when reading base type of array type"); + return NULL; + } + + return glsl_type::get_array_instance(base_type, s_size->value()); + } + + s_symbol *type_sym = SX_AS_SYMBOL(expr); + if (type_sym == NULL) { + ir_read_error(expr, "expected "); + return NULL; + } + + const glsl_type *type = state->symbols->get_type(type_sym->value()); + if (type == NULL) + ir_read_error(expr, "invalid type: %s", type_sym->value()); + + return type; +} + + +void +ir_reader::scan_for_prototypes(exec_list *instructions, s_expression *expr) +{ + s_list *list = SX_AS_LIST(expr); + if (list == NULL) { + ir_read_error(expr, "Expected ( ...); found an atom."); + return; + } + + foreach_in_list(s_list, sub, &list->subexpressions) { + if (!sub->is_list()) + continue; // not a (function ...); ignore it. + + s_symbol *tag = SX_AS_SYMBOL(sub->subexpressions.get_head()); + if (tag == NULL || strcmp(tag->value(), "function") != 0) + continue; // not a (function ...); ignore it. + + ir_function *f = read_function(sub, true); + if (f == NULL) + return; + instructions->push_tail(f); + } +} + +ir_function * +ir_reader::read_function(s_expression *expr, bool skip_body) +{ + bool added = false; + s_symbol *name; + + s_pattern pat[] = { "function", name }; + if (!PARTIAL_MATCH(expr, pat)) { + ir_read_error(expr, "Expected (function (signature ...) ...)"); + return NULL; + } + + ir_function *f = state->symbols->get_function(name->value()); + if (f == NULL) { + f = new(mem_ctx) ir_function(name->value()); + added = state->symbols->add_function(f); + assert(added); + } + + /* Skip over "function" tag and function name (which are guaranteed to be + * present by the above PARTIAL_MATCH call). + */ + exec_node *node = ((s_list *) expr)->subexpressions.head->next->next; + for (/* nothing */; !node->is_tail_sentinel(); node = node->next) { + s_expression *s_sig = (s_expression *) node; + read_function_sig(f, s_sig, skip_body); + } + return added ? f : NULL; +} + +static bool +always_available(const _mesa_glsl_parse_state *) +{ + return true; +} + +void +ir_reader::read_function_sig(ir_function *f, s_expression *expr, bool skip_body) +{ + s_expression *type_expr; + s_list *paramlist; + s_list *body_list; + + s_pattern pat[] = { "signature", type_expr, paramlist, body_list }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "Expected (signature (parameters ...) " + "( ...))"); + return; + } + + const glsl_type *return_type = read_type(type_expr); + if (return_type == NULL) + return; + + s_symbol *paramtag = SX_AS_SYMBOL(paramlist->subexpressions.get_head()); + if (paramtag == NULL || strcmp(paramtag->value(), "parameters") != 0) { + ir_read_error(paramlist, "Expected (parameters ...)"); + return; + } + + // Read the parameters list into a temporary place. + exec_list hir_parameters; + state->symbols->push_scope(); + + /* Skip over the "parameters" tag. */ + exec_node *node = paramlist->subexpressions.head->next; + for (/* nothing */; !node->is_tail_sentinel(); node = node->next) { + ir_variable *var = read_declaration((s_expression *) node); + if (var == NULL) + return; + + hir_parameters.push_tail(var); + } + + ir_function_signature *sig = + f->exact_matching_signature(state, &hir_parameters); + if (sig == NULL && skip_body) { + /* If scanning for prototypes, generate a new signature. */ + /* ir_reader doesn't know what languages support a given built-in, so + * just say that they're always available. For now, other mechanisms + * guarantee the right built-ins are available. + */ + sig = new(mem_ctx) ir_function_signature(return_type, always_available); + f->add_signature(sig); + } else if (sig != NULL) { + const char *badvar = sig->qualifiers_match(&hir_parameters); + if (badvar != NULL) { + ir_read_error(expr, "function `%s' parameter `%s' qualifiers " + "don't match prototype", f->name, badvar); + return; + } + + if (sig->return_type != return_type) { + ir_read_error(expr, "function `%s' return type doesn't " + "match prototype", f->name); + return; + } + } else { + /* No prototype for this body exists - skip it. */ + state->symbols->pop_scope(); + return; + } + assert(sig != NULL); + + sig->replace_parameters(&hir_parameters); + + if (!skip_body && !body_list->subexpressions.is_empty()) { + if (sig->is_defined) { + ir_read_error(expr, "function %s redefined", f->name); + return; + } + state->current_function = sig; + read_instructions(&sig->body, body_list, NULL); + state->current_function = NULL; + sig->is_defined = true; + } + + state->symbols->pop_scope(); +} + +void +ir_reader::read_instructions(exec_list *instructions, s_expression *expr, + ir_loop *loop_ctx) +{ + // Read in a list of instructions + s_list *list = SX_AS_LIST(expr); + if (list == NULL) { + ir_read_error(expr, "Expected ( ...); found an atom."); + return; + } + + foreach_in_list(s_expression, sub, &list->subexpressions) { + ir_instruction *ir = read_instruction(sub, loop_ctx); + if (ir != NULL) { + /* Global variable declarations should be moved to the top, before + * any functions that might use them. Functions are added to the + * instruction stream when scanning for prototypes, so without this + * hack, they always appear before variable declarations. + */ + if (state->current_function == NULL && ir->as_variable() != NULL) + instructions->push_head(ir); + else + instructions->push_tail(ir); + } + } +} + + +ir_instruction * +ir_reader::read_instruction(s_expression *expr, ir_loop *loop_ctx) +{ + s_symbol *symbol = SX_AS_SYMBOL(expr); + if (symbol != NULL) { + if (strcmp(symbol->value(), "break") == 0 && loop_ctx != NULL) + return new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); + if (strcmp(symbol->value(), "continue") == 0 && loop_ctx != NULL) + return new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); + } + + s_list *list = SX_AS_LIST(expr); + if (list == NULL || list->subexpressions.is_empty()) { + ir_read_error(expr, "Invalid instruction.\n"); + return NULL; + } + + s_symbol *tag = SX_AS_SYMBOL(list->subexpressions.get_head()); + if (tag == NULL) { + ir_read_error(expr, "expected instruction tag"); + return NULL; + } + + ir_instruction *inst = NULL; + if (strcmp(tag->value(), "declare") == 0) { + inst = read_declaration(list); + } else if (strcmp(tag->value(), "assign") == 0) { + inst = read_assignment(list); + } else if (strcmp(tag->value(), "if") == 0) { + inst = read_if(list, loop_ctx); + } else if (strcmp(tag->value(), "loop") == 0) { + inst = read_loop(list); + } else if (strcmp(tag->value(), "call") == 0) { + inst = read_call(list); + } else if (strcmp(tag->value(), "return") == 0) { + inst = read_return(list); + } else if (strcmp(tag->value(), "function") == 0) { + inst = read_function(list, false); + } else if (strcmp(tag->value(), "emit-vertex") == 0) { + inst = read_emit_vertex(list); + } else if (strcmp(tag->value(), "end-primitive") == 0) { + inst = read_end_primitive(list); + } else if (strcmp(tag->value(), "barrier") == 0) { + inst = read_barrier(list); + } else { + inst = read_rvalue(list); + if (inst == NULL) + ir_read_error(NULL, "when reading instruction"); + } + return inst; +} + +ir_variable * +ir_reader::read_declaration(s_expression *expr) +{ + s_list *s_quals; + s_expression *s_type; + s_symbol *s_name; + + s_pattern pat[] = { "declare", s_quals, s_type, s_name }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "expected (declare () )"); + return NULL; + } + + const glsl_type *type = read_type(s_type); + if (type == NULL) + return NULL; + + ir_variable *var = new(mem_ctx) ir_variable(type, s_name->value(), + ir_var_auto); + + foreach_in_list(s_symbol, qualifier, &s_quals->subexpressions) { + if (!qualifier->is_symbol()) { + ir_read_error(expr, "qualifier list must contain only symbols"); + return NULL; + } + + // FINISHME: Check for duplicate/conflicting qualifiers. + if (strcmp(qualifier->value(), "centroid") == 0) { + var->data.centroid = 1; + } else if (strcmp(qualifier->value(), "sample") == 0) { + var->data.sample = 1; + } else if (strcmp(qualifier->value(), "patch") == 0) { + var->data.patch = 1; + } else if (strcmp(qualifier->value(), "invariant") == 0) { + var->data.invariant = 1; + } else if (strcmp(qualifier->value(), "uniform") == 0) { + var->data.mode = ir_var_uniform; + } else if (strcmp(qualifier->value(), "shader_storage") == 0) { + var->data.mode = ir_var_shader_storage; + } else if (strcmp(qualifier->value(), "auto") == 0) { + var->data.mode = ir_var_auto; + } else if (strcmp(qualifier->value(), "in") == 0) { + var->data.mode = ir_var_function_in; + } else if (strcmp(qualifier->value(), "shader_in") == 0) { + var->data.mode = ir_var_shader_in; + } else if (strcmp(qualifier->value(), "const_in") == 0) { + var->data.mode = ir_var_const_in; + } else if (strcmp(qualifier->value(), "out") == 0) { + var->data.mode = ir_var_function_out; + } else if (strcmp(qualifier->value(), "shader_out") == 0) { + var->data.mode = ir_var_shader_out; + } else if (strcmp(qualifier->value(), "inout") == 0) { + var->data.mode = ir_var_function_inout; + } else if (strcmp(qualifier->value(), "temporary") == 0) { + var->data.mode = ir_var_temporary; + } else if (strcmp(qualifier->value(), "stream1") == 0) { + var->data.stream = 1; + } else if (strcmp(qualifier->value(), "stream2") == 0) { + var->data.stream = 2; + } else if (strcmp(qualifier->value(), "stream3") == 0) { + var->data.stream = 3; + } else if (strcmp(qualifier->value(), "smooth") == 0) { + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + } else if (strcmp(qualifier->value(), "flat") == 0) { + var->data.interpolation = INTERP_QUALIFIER_FLAT; + } else if (strcmp(qualifier->value(), "noperspective") == 0) { + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + } else { + ir_read_error(expr, "unknown qualifier: %s", qualifier->value()); + return NULL; + } + } + + // Add the variable to the symbol table + state->symbols->add_variable(var); + + return var; +} + + +ir_if * +ir_reader::read_if(s_expression *expr, ir_loop *loop_ctx) +{ + s_expression *s_cond; + s_expression *s_then; + s_expression *s_else; + + s_pattern pat[] = { "if", s_cond, s_then, s_else }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "expected (if (...) (...))"); + return NULL; + } + + ir_rvalue *condition = read_rvalue(s_cond); + if (condition == NULL) { + ir_read_error(NULL, "when reading condition of (if ...)"); + return NULL; + } + + ir_if *iff = new(mem_ctx) ir_if(condition); + + read_instructions(&iff->then_instructions, s_then, loop_ctx); + read_instructions(&iff->else_instructions, s_else, loop_ctx); + if (state->error) { + delete iff; + iff = NULL; + } + return iff; +} + + +ir_loop * +ir_reader::read_loop(s_expression *expr) +{ + s_expression *s_body; + + s_pattern loop_pat[] = { "loop", s_body }; + if (!MATCH(expr, loop_pat)) { + ir_read_error(expr, "expected (loop )"); + return NULL; + } + + ir_loop *loop = new(mem_ctx) ir_loop; + + read_instructions(&loop->body_instructions, s_body, loop); + if (state->error) { + delete loop; + loop = NULL; + } + return loop; +} + + +ir_return * +ir_reader::read_return(s_expression *expr) +{ + s_expression *s_retval; + + s_pattern return_value_pat[] = { "return", s_retval}; + s_pattern return_void_pat[] = { "return" }; + if (MATCH(expr, return_value_pat)) { + ir_rvalue *retval = read_rvalue(s_retval); + if (retval == NULL) { + ir_read_error(NULL, "when reading return value"); + return NULL; + } + return new(mem_ctx) ir_return(retval); + } else if (MATCH(expr, return_void_pat)) { + return new(mem_ctx) ir_return; + } else { + ir_read_error(expr, "expected (return ) or (return)"); + return NULL; + } +} + + +ir_rvalue * +ir_reader::read_rvalue(s_expression *expr) +{ + s_list *list = SX_AS_LIST(expr); + if (list == NULL || list->subexpressions.is_empty()) + return NULL; + + s_symbol *tag = SX_AS_SYMBOL(list->subexpressions.get_head()); + if (tag == NULL) { + ir_read_error(expr, "expected rvalue tag"); + return NULL; + } + + ir_rvalue *rvalue = read_dereference(list); + if (rvalue != NULL || state->error) + return rvalue; + else if (strcmp(tag->value(), "swiz") == 0) { + rvalue = read_swizzle(list); + } else if (strcmp(tag->value(), "expression") == 0) { + rvalue = read_expression(list); + } else if (strcmp(tag->value(), "constant") == 0) { + rvalue = read_constant(list); + } else { + rvalue = read_texture(list); + if (rvalue == NULL && !state->error) + ir_read_error(expr, "unrecognized rvalue tag: %s", tag->value()); + } + + return rvalue; +} + +ir_assignment * +ir_reader::read_assignment(s_expression *expr) +{ + s_expression *cond_expr = NULL; + s_expression *lhs_expr, *rhs_expr; + s_list *mask_list; + + s_pattern pat4[] = { "assign", mask_list, lhs_expr, rhs_expr }; + s_pattern pat5[] = { "assign", cond_expr, mask_list, lhs_expr, rhs_expr }; + if (!MATCH(expr, pat4) && !MATCH(expr, pat5)) { + ir_read_error(expr, "expected (assign [] () " + " )"); + return NULL; + } + + ir_rvalue *condition = NULL; + if (cond_expr != NULL) { + condition = read_rvalue(cond_expr); + if (condition == NULL) { + ir_read_error(NULL, "when reading condition of assignment"); + return NULL; + } + } + + unsigned mask = 0; + + s_symbol *mask_symbol; + s_pattern mask_pat[] = { mask_symbol }; + if (MATCH(mask_list, mask_pat)) { + const char *mask_str = mask_symbol->value(); + unsigned mask_length = strlen(mask_str); + if (mask_length > 4) { + ir_read_error(expr, "invalid write mask: %s", mask_str); + return NULL; + } + + const unsigned idx_map[] = { 3, 0, 1, 2 }; /* w=bit 3, x=0, y=1, z=2 */ + + for (unsigned i = 0; i < mask_length; i++) { + if (mask_str[i] < 'w' || mask_str[i] > 'z') { + ir_read_error(expr, "write mask contains invalid character: %c", + mask_str[i]); + return NULL; + } + mask |= 1 << idx_map[mask_str[i] - 'w']; + } + } else if (!mask_list->subexpressions.is_empty()) { + ir_read_error(mask_list, "expected () or ()"); + return NULL; + } + + ir_dereference *lhs = read_dereference(lhs_expr); + if (lhs == NULL) { + ir_read_error(NULL, "when reading left-hand side of assignment"); + return NULL; + } + + ir_rvalue *rhs = read_rvalue(rhs_expr); + if (rhs == NULL) { + ir_read_error(NULL, "when reading right-hand side of assignment"); + return NULL; + } + + if (mask == 0 && (lhs->type->is_vector() || lhs->type->is_scalar())) { + ir_read_error(expr, "non-zero write mask required."); + return NULL; + } + + return new(mem_ctx) ir_assignment(lhs, rhs, condition, mask); +} + +ir_call * +ir_reader::read_call(s_expression *expr) +{ + s_symbol *name; + s_list *params; + s_list *s_return = NULL; + + ir_dereference_variable *return_deref = NULL; + + s_pattern void_pat[] = { "call", name, params }; + s_pattern non_void_pat[] = { "call", name, s_return, params }; + if (MATCH(expr, non_void_pat)) { + return_deref = read_var_ref(s_return); + if (return_deref == NULL) { + ir_read_error(s_return, "when reading a call's return storage"); + return NULL; + } + } else if (!MATCH(expr, void_pat)) { + ir_read_error(expr, "expected (call [] ( ...))"); + return NULL; + } + + exec_list parameters; + + foreach_in_list(s_expression, e, ¶ms->subexpressions) { + ir_rvalue *param = read_rvalue(e); + if (param == NULL) { + ir_read_error(e, "when reading parameter to function call"); + return NULL; + } + parameters.push_tail(param); + } + + ir_function *f = state->symbols->get_function(name->value()); + if (f == NULL) { + ir_read_error(expr, "found call to undefined function %s", + name->value()); + return NULL; + } + + ir_function_signature *callee = + f->matching_signature(state, ¶meters, true); + if (callee == NULL) { + ir_read_error(expr, "couldn't find matching signature for function " + "%s", name->value()); + return NULL; + } + + if (callee->return_type == glsl_type::void_type && return_deref) { + ir_read_error(expr, "call has return value storage but void type"); + return NULL; + } else if (callee->return_type != glsl_type::void_type && !return_deref) { + ir_read_error(expr, "call has non-void type but no return value storage"); + return NULL; + } + + return new(mem_ctx) ir_call(callee, return_deref, ¶meters); +} + +ir_expression * +ir_reader::read_expression(s_expression *expr) +{ + s_expression *s_type; + s_symbol *s_op; + s_expression *s_arg[4] = {NULL}; + + s_pattern pat[] = { "expression", s_type, s_op, s_arg[0] }; + if (!PARTIAL_MATCH(expr, pat)) { + ir_read_error(expr, "expected (expression " + " [] [] [])"); + return NULL; + } + s_arg[1] = (s_expression *) s_arg[0]->next; // may be tail sentinel + s_arg[2] = (s_expression *) s_arg[1]->next; // may be tail sentinel or NULL + if (s_arg[2]) + s_arg[3] = (s_expression *) s_arg[2]->next; // may be tail sentinel or NULL + + const glsl_type *type = read_type(s_type); + if (type == NULL) + return NULL; + + /* Read the operator */ + ir_expression_operation op = ir_expression::get_operator(s_op->value()); + if (op == (ir_expression_operation) -1) { + ir_read_error(expr, "invalid operator: %s", s_op->value()); + return NULL; + } + + /* Skip "expression" by subtracting 3. */ + int num_operands = (int) ((s_list *) expr)->subexpressions.length() - 3; + + int expected_operands = ir_expression::get_num_operands(op); + if (num_operands != expected_operands) { + ir_read_error(expr, "found %d expression operands, expected %d", + num_operands, expected_operands); + return NULL; + } + + ir_rvalue *arg[4] = {NULL}; + for (int i = 0; i < num_operands; i++) { + arg[i] = read_rvalue(s_arg[i]); + if (arg[i] == NULL) { + ir_read_error(NULL, "when reading operand #%d of %s", i, s_op->value()); + return NULL; + } + } + + return new(mem_ctx) ir_expression(op, type, arg[0], arg[1], arg[2], arg[3]); +} + +ir_swizzle * +ir_reader::read_swizzle(s_expression *expr) +{ + s_symbol *swiz; + s_expression *sub; + + s_pattern pat[] = { "swiz", swiz, sub }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "expected (swiz )"); + return NULL; + } + + if (strlen(swiz->value()) > 4) { + ir_read_error(expr, "expected a valid swizzle; found %s", swiz->value()); + return NULL; + } + + ir_rvalue *rvalue = read_rvalue(sub); + if (rvalue == NULL) + return NULL; + + ir_swizzle *ir = ir_swizzle::create(rvalue, swiz->value(), + rvalue->type->vector_elements); + if (ir == NULL) + ir_read_error(expr, "invalid swizzle"); + + return ir; +} + +ir_constant * +ir_reader::read_constant(s_expression *expr) +{ + s_expression *type_expr; + s_list *values; + + s_pattern pat[] = { "constant", type_expr, values }; + if (!MATCH(expr, pat)) { + ir_read_error(expr, "expected (constant (...))"); + return NULL; + } + + const glsl_type *type = read_type(type_expr); + if (type == NULL) + return NULL; + + if (values == NULL) { + ir_read_error(expr, "expected (constant (...))"); + return NULL; + } + + if (type->is_array()) { + unsigned elements_supplied = 0; + exec_list elements; + foreach_in_list(s_expression, elt, &values->subexpressions) { + ir_constant *ir_elt = read_constant(elt); + if (ir_elt == NULL) + return NULL; + elements.push_tail(ir_elt); + elements_supplied++; + } + + if (elements_supplied != type->length) { + ir_read_error(values, "expected exactly %u array elements, " + "given %u", type->length, elements_supplied); + return NULL; + } + return new(mem_ctx) ir_constant(type, &elements); + } + + ir_constant_data data = { { 0 } }; + + // Read in list of values (at most 16). + unsigned k = 0; + foreach_in_list(s_expression, expr, &values->subexpressions) { + if (k >= 16) { + ir_read_error(values, "expected at most 16 numbers"); + return NULL; + } + + if (type->base_type == GLSL_TYPE_FLOAT) { + s_number *value = SX_AS_NUMBER(expr); + if (value == NULL) { + ir_read_error(values, "expected numbers"); + return NULL; + } + data.f[k] = value->fvalue(); + } else { + s_int *value = SX_AS_INT(expr); + if (value == NULL) { + ir_read_error(values, "expected integers"); + return NULL; + } + + switch (type->base_type) { + case GLSL_TYPE_UINT: { + data.u[k] = value->value(); + break; + } + case GLSL_TYPE_INT: { + data.i[k] = value->value(); + break; + } + case GLSL_TYPE_BOOL: { + data.b[k] = value->value(); + break; + } + default: + ir_read_error(values, "unsupported constant type"); + return NULL; + } + } + ++k; + } + if (k != type->components()) { + ir_read_error(values, "expected %u constant values, found %u", + type->components(), k); + return NULL; + } + + return new(mem_ctx) ir_constant(type, &data); +} + +ir_dereference_variable * +ir_reader::read_var_ref(s_expression *expr) +{ + s_symbol *s_var; + s_pattern var_pat[] = { "var_ref", s_var }; + + if (MATCH(expr, var_pat)) { + ir_variable *var = state->symbols->get_variable(s_var->value()); + if (var == NULL) { + ir_read_error(expr, "undeclared variable: %s", s_var->value()); + return NULL; + } + return new(mem_ctx) ir_dereference_variable(var); + } + return NULL; +} + +ir_dereference * +ir_reader::read_dereference(s_expression *expr) +{ + s_expression *s_subject; + s_expression *s_index; + s_symbol *s_field; + + s_pattern array_pat[] = { "array_ref", s_subject, s_index }; + s_pattern record_pat[] = { "record_ref", s_subject, s_field }; + + ir_dereference_variable *var_ref = read_var_ref(expr); + if (var_ref != NULL) { + return var_ref; + } else if (MATCH(expr, array_pat)) { + ir_rvalue *subject = read_rvalue(s_subject); + if (subject == NULL) { + ir_read_error(NULL, "when reading the subject of an array_ref"); + return NULL; + } + + ir_rvalue *idx = read_rvalue(s_index); + if (idx == NULL) { + ir_read_error(NULL, "when reading the index of an array_ref"); + return NULL; + } + return new(mem_ctx) ir_dereference_array(subject, idx); + } else if (MATCH(expr, record_pat)) { + ir_rvalue *subject = read_rvalue(s_subject); + if (subject == NULL) { + ir_read_error(NULL, "when reading the subject of a record_ref"); + return NULL; + } + return new(mem_ctx) ir_dereference_record(subject, s_field->value()); + } + return NULL; +} + +ir_texture * +ir_reader::read_texture(s_expression *expr) +{ + s_symbol *tag = NULL; + s_expression *s_type = NULL; + s_expression *s_sampler = NULL; + s_expression *s_coord = NULL; + s_expression *s_offset = NULL; + s_expression *s_proj = NULL; + s_list *s_shadow = NULL; + s_expression *s_lod = NULL; + s_expression *s_sample_index = NULL; + s_expression *s_component = NULL; + + ir_texture_opcode op = ir_tex; /* silence warning */ + + s_pattern tex_pattern[] = + { "tex", s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow }; + s_pattern lod_pattern[] = + { "lod", s_type, s_sampler, s_coord }; + s_pattern txf_pattern[] = + { "txf", s_type, s_sampler, s_coord, s_offset, s_lod }; + s_pattern txf_ms_pattern[] = + { "txf_ms", s_type, s_sampler, s_coord, s_sample_index }; + s_pattern txs_pattern[] = + { "txs", s_type, s_sampler, s_lod }; + s_pattern tg4_pattern[] = + { "tg4", s_type, s_sampler, s_coord, s_offset, s_component }; + s_pattern query_levels_pattern[] = + { "query_levels", s_type, s_sampler }; + s_pattern texture_samples_pattern[] = + { "samples", s_type, s_sampler }; + s_pattern other_pattern[] = + { tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod }; + + if (MATCH(expr, lod_pattern)) { + op = ir_lod; + } else if (MATCH(expr, tex_pattern)) { + op = ir_tex; + } else if (MATCH(expr, txf_pattern)) { + op = ir_txf; + } else if (MATCH(expr, txf_ms_pattern)) { + op = ir_txf_ms; + } else if (MATCH(expr, txs_pattern)) { + op = ir_txs; + } else if (MATCH(expr, tg4_pattern)) { + op = ir_tg4; + } else if (MATCH(expr, query_levels_pattern)) { + op = ir_query_levels; + } else if (MATCH(expr, texture_samples_pattern)) { + op = ir_texture_samples; + } else if (MATCH(expr, other_pattern)) { + op = ir_texture::get_opcode(tag->value()); + if (op == (ir_texture_opcode) -1) + return NULL; + } else { + ir_read_error(NULL, "unexpected texture pattern %s", tag->value()); + return NULL; + } + + ir_texture *tex = new(mem_ctx) ir_texture(op); + + // Read return type + const glsl_type *type = read_type(s_type); + if (type == NULL) { + ir_read_error(NULL, "when reading type in (%s ...)", + tex->opcode_string()); + return NULL; + } + + // Read sampler (must be a deref) + ir_dereference *sampler = read_dereference(s_sampler); + if (sampler == NULL) { + ir_read_error(NULL, "when reading sampler in (%s ...)", + tex->opcode_string()); + return NULL; + } + tex->set_sampler(sampler, type); + + if (op != ir_txs) { + // Read coordinate (any rvalue) + tex->coordinate = read_rvalue(s_coord); + if (tex->coordinate == NULL) { + ir_read_error(NULL, "when reading coordinate in (%s ...)", + tex->opcode_string()); + return NULL; + } + + if (op != ir_txf_ms && op != ir_lod) { + // Read texel offset - either 0 or an rvalue. + s_int *si_offset = SX_AS_INT(s_offset); + if (si_offset == NULL || si_offset->value() != 0) { + tex->offset = read_rvalue(s_offset); + if (tex->offset == NULL) { + ir_read_error(s_offset, "expected 0 or an expression"); + return NULL; + } + } + } + } + + if (op != ir_txf && op != ir_txf_ms && + op != ir_txs && op != ir_lod && op != ir_tg4 && + op != ir_query_levels && op != ir_texture_samples) { + s_int *proj_as_int = SX_AS_INT(s_proj); + if (proj_as_int && proj_as_int->value() == 1) { + tex->projector = NULL; + } else { + tex->projector = read_rvalue(s_proj); + if (tex->projector == NULL) { + ir_read_error(NULL, "when reading projective divide in (%s ..)", + tex->opcode_string()); + return NULL; + } + } + + if (s_shadow->subexpressions.is_empty()) { + tex->shadow_comparitor = NULL; + } else { + tex->shadow_comparitor = read_rvalue(s_shadow); + if (tex->shadow_comparitor == NULL) { + ir_read_error(NULL, "when reading shadow comparitor in (%s ..)", + tex->opcode_string()); + return NULL; + } + } + } + + switch (op) { + case ir_txb: + tex->lod_info.bias = read_rvalue(s_lod); + if (tex->lod_info.bias == NULL) { + ir_read_error(NULL, "when reading LOD bias in (txb ...)"); + return NULL; + } + break; + case ir_txl: + case ir_txf: + case ir_txs: + tex->lod_info.lod = read_rvalue(s_lod); + if (tex->lod_info.lod == NULL) { + ir_read_error(NULL, "when reading LOD in (%s ...)", + tex->opcode_string()); + return NULL; + } + break; + case ir_txf_ms: + tex->lod_info.sample_index = read_rvalue(s_sample_index); + if (tex->lod_info.sample_index == NULL) { + ir_read_error(NULL, "when reading sample_index in (txf_ms ...)"); + return NULL; + } + break; + case ir_txd: { + s_expression *s_dx, *s_dy; + s_pattern dxdy_pat[] = { s_dx, s_dy }; + if (!MATCH(s_lod, dxdy_pat)) { + ir_read_error(s_lod, "expected (dPdx dPdy) in (txd ...)"); + return NULL; + } + tex->lod_info.grad.dPdx = read_rvalue(s_dx); + if (tex->lod_info.grad.dPdx == NULL) { + ir_read_error(NULL, "when reading dPdx in (txd ...)"); + return NULL; + } + tex->lod_info.grad.dPdy = read_rvalue(s_dy); + if (tex->lod_info.grad.dPdy == NULL) { + ir_read_error(NULL, "when reading dPdy in (txd ...)"); + return NULL; + } + break; + } + case ir_tg4: + tex->lod_info.component = read_rvalue(s_component); + if (tex->lod_info.component == NULL) { + ir_read_error(NULL, "when reading component in (tg4 ...)"); + return NULL; + } + break; + default: + // tex and lod don't have any extra parameters. + break; + }; + return tex; +} + +ir_emit_vertex * +ir_reader::read_emit_vertex(s_expression *expr) +{ + s_expression *s_stream = NULL; + + s_pattern pat[] = { "emit-vertex", s_stream }; + + if (MATCH(expr, pat)) { + ir_rvalue *stream = read_dereference(s_stream); + if (stream == NULL) { + ir_read_error(NULL, "when reading stream info in emit-vertex"); + return NULL; + } + return new(mem_ctx) ir_emit_vertex(stream); + } + ir_read_error(NULL, "when reading emit-vertex"); + return NULL; +} + +ir_end_primitive * +ir_reader::read_end_primitive(s_expression *expr) +{ + s_expression *s_stream = NULL; + + s_pattern pat[] = { "end-primitive", s_stream }; + + if (MATCH(expr, pat)) { + ir_rvalue *stream = read_dereference(s_stream); + if (stream == NULL) { + ir_read_error(NULL, "when reading stream info in end-primitive"); + return NULL; + } + return new(mem_ctx) ir_end_primitive(stream); + } + ir_read_error(NULL, "when reading end-primitive"); + return NULL; +} + +ir_barrier * +ir_reader::read_barrier(s_expression *expr) +{ + s_pattern pat[] = { "barrier" }; + + if (MATCH(expr, pat)) { + return new(mem_ctx) ir_barrier(); + } + ir_read_error(NULL, "when reading barrier"); + return NULL; +} diff --git a/src/compiler/glsl/ir_reader.h b/src/compiler/glsl/ir_reader.h new file mode 100644 index 00000000000..aef2ca23bd2 --- /dev/null +++ b/src/compiler/glsl/ir_reader.h @@ -0,0 +1,34 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_READER_H +#define IR_READER_H + +#include "ir.h" + +void _mesa_glsl_read_ir(_mesa_glsl_parse_state *state, exec_list *instructions, + const char *src, bool scan_for_prototypes); + +#endif /* IR_READER_H */ diff --git a/src/compiler/glsl/ir_rvalue_visitor.cpp b/src/compiler/glsl/ir_rvalue_visitor.cpp new file mode 100644 index 00000000000..6ab6cf02176 --- /dev/null +++ b/src/compiler/glsl/ir_rvalue_visitor.cpp @@ -0,0 +1,316 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_rvalue_visitor.cpp + * + * Generic class to implement the common pattern we have of wanting to + * visit each ir_rvalue * and possibly change that node to a different + * class. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_expression *ir) +{ + unsigned int operand; + + for (operand = 0; operand < ir->get_num_operands(); operand++) { + handle_rvalue(&ir->operands[operand]); + } + + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir) +{ + handle_rvalue(&ir->coordinate); + handle_rvalue(&ir->projector); + handle_rvalue(&ir->shadow_comparitor); + handle_rvalue(&ir->offset); + + switch (ir->op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + handle_rvalue(&ir->lod_info.bias); + break; + case ir_txf: + case ir_txl: + case ir_txs: + handle_rvalue(&ir->lod_info.lod); + break; + case ir_txf_ms: + handle_rvalue(&ir->lod_info.sample_index); + break; + case ir_txd: + handle_rvalue(&ir->lod_info.grad.dPdx); + handle_rvalue(&ir->lod_info.grad.dPdy); + break; + case ir_tg4: + handle_rvalue(&ir->lod_info.component); + break; + } + + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_swizzle *ir) +{ + handle_rvalue(&ir->val); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_dereference_array *ir) +{ + /* The array index is not the target of the assignment, so clear the + * 'in_assignee' flag. Restore it after returning from the array index. + */ + const bool was_in_assignee = this->in_assignee; + this->in_assignee = false; + handle_rvalue(&ir->array_index); + this->in_assignee = was_in_assignee; + + handle_rvalue(&ir->array); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_dereference_record *ir) +{ + handle_rvalue(&ir->record); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_assignment *ir) +{ + handle_rvalue(&ir->rhs); + handle_rvalue(&ir->condition); + + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_call *ir) +{ + foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { + ir_rvalue *new_param = param; + handle_rvalue(&new_param); + + if (new_param != param) { + param->replace_with(new_param); + } + } + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_discard *ir) +{ + handle_rvalue(&ir->condition); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_return *ir) +{ + handle_rvalue(&ir->value);; + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_if *ir) +{ + handle_rvalue(&ir->condition); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_emit_vertex *ir) +{ + handle_rvalue(&ir->stream); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_base_visitor::rvalue_visit(ir_end_primitive *ir) +{ + handle_rvalue(&ir->stream); + return visit_continue; +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_expression *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_texture *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_swizzle *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_dereference_array *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_dereference_record *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_assignment *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_call *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_discard *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_return *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_if *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_emit_vertex *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_visitor::visit_leave(ir_end_primitive *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_expression *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_texture *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_swizzle *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_dereference_array *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_dereference_record *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_assignment *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_call *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_discard *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_return *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_if *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_emit_vertex *ir) +{ + return rvalue_visit(ir); +} + +ir_visitor_status +ir_rvalue_enter_visitor::visit_enter(ir_end_primitive *ir) +{ + return rvalue_visit(ir); +} diff --git a/src/compiler/glsl/ir_rvalue_visitor.h b/src/compiler/glsl/ir_rvalue_visitor.h new file mode 100644 index 00000000000..185c72a5ba1 --- /dev/null +++ b/src/compiler/glsl/ir_rvalue_visitor.h @@ -0,0 +1,83 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_rvalue_visitor.h + * + * Generic class to implement the common pattern we have of wanting to + * visit each ir_rvalue * and possibly change that node to a different + * class. Just implement handle_rvalue() and you will be called with + * a pointer to each rvalue in the tree. + */ + +class ir_rvalue_base_visitor : public ir_hierarchical_visitor { +public: + ir_visitor_status rvalue_visit(ir_assignment *); + ir_visitor_status rvalue_visit(ir_call *); + ir_visitor_status rvalue_visit(ir_dereference_array *); + ir_visitor_status rvalue_visit(ir_dereference_record *); + ir_visitor_status rvalue_visit(ir_discard *); + ir_visitor_status rvalue_visit(ir_expression *); + ir_visitor_status rvalue_visit(ir_if *); + ir_visitor_status rvalue_visit(ir_return *); + ir_visitor_status rvalue_visit(ir_swizzle *); + ir_visitor_status rvalue_visit(ir_texture *); + ir_visitor_status rvalue_visit(ir_emit_vertex *); + ir_visitor_status rvalue_visit(ir_end_primitive *); + + virtual void handle_rvalue(ir_rvalue **rvalue) = 0; +}; + +class ir_rvalue_visitor : public ir_rvalue_base_visitor { +public: + + virtual ir_visitor_status visit_leave(ir_assignment *); + virtual ir_visitor_status visit_leave(ir_call *); + virtual ir_visitor_status visit_leave(ir_dereference_array *); + virtual ir_visitor_status visit_leave(ir_dereference_record *); + virtual ir_visitor_status visit_leave(ir_discard *); + virtual ir_visitor_status visit_leave(ir_expression *); + virtual ir_visitor_status visit_leave(ir_if *); + virtual ir_visitor_status visit_leave(ir_return *); + virtual ir_visitor_status visit_leave(ir_swizzle *); + virtual ir_visitor_status visit_leave(ir_texture *); + virtual ir_visitor_status visit_leave(ir_emit_vertex *); + virtual ir_visitor_status visit_leave(ir_end_primitive *); +}; + +class ir_rvalue_enter_visitor : public ir_rvalue_base_visitor { +public: + + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_call *); + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_dereference_record *); + virtual ir_visitor_status visit_enter(ir_discard *); + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_if *); + virtual ir_visitor_status visit_enter(ir_return *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_enter(ir_texture *); + virtual ir_visitor_status visit_enter(ir_emit_vertex *); + virtual ir_visitor_status visit_enter(ir_end_primitive *); +}; diff --git a/src/compiler/glsl/ir_set_program_inouts.cpp b/src/compiler/glsl/ir_set_program_inouts.cpp new file mode 100644 index 00000000000..df06923b870 --- /dev/null +++ b/src/compiler/glsl/ir_set_program_inouts.cpp @@ -0,0 +1,453 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_set_program_inouts.cpp + * + * Sets the InputsRead and OutputsWritten of Mesa programs. + * + * Additionally, for fragment shaders, sets the InterpQualifier array, the + * IsCentroid and IsSample bitfields, and the UsesDFdy flag. + * + * Mesa programs (gl_program, not gl_shader_program) have a set of + * flags indicating which varyings are read and written. Computing + * which are actually read from some sort of backend code can be + * tricky when variable array indexing involved. So this pass + * provides support for setting InputsRead and OutputsWritten right + * from the GLSL IR. + */ + +#include "main/core.h" /* for struct gl_program */ +#include "ir.h" +#include "ir_visitor.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_set_program_inouts_visitor : public ir_hierarchical_visitor { +public: + ir_set_program_inouts_visitor(struct gl_program *prog, + gl_shader_stage shader_stage) + { + this->prog = prog; + this->shader_stage = shader_stage; + } + ~ir_set_program_inouts_visitor() + { + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_function_signature *); + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_discard *); + virtual ir_visitor_status visit_enter(ir_texture *); + virtual ir_visitor_status visit(ir_dereference_variable *); + +private: + void mark_whole_variable(ir_variable *var); + bool try_mark_partial_variable(ir_variable *var, ir_rvalue *index); + + struct gl_program *prog; + gl_shader_stage shader_stage; +}; + +} /* anonymous namespace */ + +static inline bool +is_shader_inout(ir_variable *var) +{ + return var->data.mode == ir_var_shader_in || + var->data.mode == ir_var_shader_out || + var->data.mode == ir_var_system_value; +} + +static void +mark(struct gl_program *prog, ir_variable *var, int offset, int len, + gl_shader_stage stage) +{ + /* As of GLSL 1.20, varyings can only be floats, floating-point + * vectors or matrices, or arrays of them. For Mesa programs using + * InputsRead/OutputsWritten, everything but matrices uses one + * slot, while matrices use a slot per column. Presumably + * something doing a more clever packing would use something other + * than InputsRead/OutputsWritten. + */ + + for (int i = 0; i < len; i++) { + int idx = var->data.location + var->data.index + offset + i; + bool is_patch_generic = var->data.patch && + idx != VARYING_SLOT_TESS_LEVEL_INNER && + idx != VARYING_SLOT_TESS_LEVEL_OUTER; + GLbitfield64 bitfield; + + if (is_patch_generic) { + assert(idx >= VARYING_SLOT_PATCH0 && idx < VARYING_SLOT_TESS_MAX); + bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0); + } + else { + assert(idx < VARYING_SLOT_MAX); + bitfield = BITFIELD64_BIT(idx); + } + + if (var->data.mode == ir_var_shader_in) { + if (is_patch_generic) + prog->PatchInputsRead |= bitfield; + else + prog->InputsRead |= bitfield; + + /* double inputs read is only for vertex inputs */ + if (stage == MESA_SHADER_VERTEX && + var->type->without_array()->is_dual_slot_double()) + prog->DoubleInputsRead |= bitfield; + + if (stage == MESA_SHADER_FRAGMENT) { + gl_fragment_program *fprog = (gl_fragment_program *) prog; + fprog->InterpQualifier[idx] = + (glsl_interp_qualifier) var->data.interpolation; + if (var->data.centroid) + fprog->IsCentroid |= bitfield; + if (var->data.sample) + fprog->IsSample |= bitfield; + } + } else if (var->data.mode == ir_var_system_value) { + prog->SystemValuesRead |= bitfield; + } else { + assert(var->data.mode == ir_var_shader_out); + if (is_patch_generic) + prog->PatchOutputsWritten |= bitfield; + else + prog->OutputsWritten |= bitfield; + } + } +} + +/** + * Mark an entire variable as used. Caller must ensure that the variable + * represents a shader input or output. + */ +void +ir_set_program_inouts_visitor::mark_whole_variable(ir_variable *var) +{ + const glsl_type *type = var->type; + bool vertex_input = false; + if (this->shader_stage == MESA_SHADER_GEOMETRY && + var->data.mode == ir_var_shader_in && type->is_array()) { + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_in) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_out && !var->data.patch) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_EVAL && + var->data.mode == ir_var_shader_in && !var->data.patch) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_VERTEX && + var->data.mode == ir_var_shader_in) + vertex_input = true; + + mark(this->prog, var, 0, type->count_attribute_slots(vertex_input), + this->shader_stage); +} + +/* Default handler: Mark all the locations in the variable as used. */ +ir_visitor_status +ir_set_program_inouts_visitor::visit(ir_dereference_variable *ir) +{ + if (!is_shader_inout(ir->var)) + return visit_continue; + + mark_whole_variable(ir->var); + + return visit_continue; +} + +/** + * Try to mark a portion of the given variable as used. Caller must ensure + * that the variable represents a shader input or output which can be indexed + * into in array fashion (an array or matrix). For the purpose of geometry + * shader inputs (which are always arrays*), this means that the array element + * must be something that can be indexed into in array fashion. + * + * *Except gl_PrimitiveIDIn, as noted below. + * + * For tessellation control shaders all inputs and non-patch outputs are + * arrays. For tessellation evaluation shaders non-patch inputs are arrays. + * + * If the index can't be interpreted as a constant, or some other problem + * occurs, then nothing will be marked and false will be returned. + */ +bool +ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var, + ir_rvalue *index) +{ + const glsl_type *type = var->type; + + if (this->shader_stage == MESA_SHADER_GEOMETRY && + var->data.mode == ir_var_shader_in) { + /* The only geometry shader input that is not an array is + * gl_PrimitiveIDIn, and in that case, this code will never be reached, + * because gl_PrimitiveIDIn can't be indexed into in array fashion. + */ + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_in) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_CTRL && + var->data.mode == ir_var_shader_out && !var->data.patch) { + assert(type->is_array()); + type = type->fields.array; + } + + if (this->shader_stage == MESA_SHADER_TESS_EVAL && + var->data.mode == ir_var_shader_in && !var->data.patch) { + assert(type->is_array()); + type = type->fields.array; + } + + /* TODO: implement proper arrays of arrays support + * for now let the caller mark whole variable as used. + */ + if (type->is_array() && type->fields.array->is_array()) + return false; + + /* The code below only handles: + * + * - Indexing into matrices + * - Indexing into arrays of (matrices, vectors, or scalars) + * + * All other possibilities are either prohibited by GLSL (vertex inputs and + * fragment outputs can't be structs) or should have been eliminated by + * lowering passes (do_vec_index_to_swizzle() gets rid of indexing into + * vectors, and lower_packed_varyings() gets rid of structs that occur in + * varyings). + */ + if (!(type->is_matrix() || + (type->is_array() && + (type->fields.array->is_numeric() || + type->fields.array->is_boolean())))) { + assert(!"Unexpected indexing in ir_set_program_inouts"); + + /* For safety in release builds, in case we ever encounter unexpected + * indexing, give up and let the caller mark the whole variable as used. + */ + return false; + } + + ir_constant *index_as_constant = index->as_constant(); + if (!index_as_constant) + return false; + + unsigned elem_width; + unsigned num_elems; + if (type->is_array()) { + num_elems = type->length; + if (type->fields.array->is_matrix()) + elem_width = type->fields.array->matrix_columns; + else + elem_width = 1; + } else { + num_elems = type->matrix_columns; + elem_width = 1; + } + + if (index_as_constant->value.u[0] >= num_elems) { + /* Constant index outside the bounds of the matrix/array. This could + * arise as a result of constant folding of a legal GLSL program. + * + * Even though the spec says that indexing outside the bounds of a + * matrix/array results in undefined behaviour, we don't want to pass + * out-of-range values to mark() (since this could result in slots that + * don't exist being marked as used), so just let the caller mark the + * whole variable as used. + */ + return false; + } + + /* double element width for double types that takes two slots */ + if (this->shader_stage != MESA_SHADER_VERTEX || + var->data.mode != ir_var_shader_in) { + if (type->without_array()->is_dual_slot_double()) + elem_width *= 2; + } + + mark(this->prog, var, index_as_constant->value.u[0] * elem_width, + elem_width, this->shader_stage); + return true; +} + +static bool +is_multiple_vertices(gl_shader_stage stage, ir_variable *var) +{ + if (var->data.patch) + return false; + + if (var->data.mode == ir_var_shader_in) + return stage == MESA_SHADER_GEOMETRY || + stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL; + if (var->data.mode == ir_var_shader_out) + return stage == MESA_SHADER_TESS_CTRL; + + return false; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_dereference_array *ir) +{ + /* Note: for geometry shader inputs, lower_named_interface_blocks may + * create 2D arrays, so we need to be able to handle those. 2D arrays + * shouldn't be able to crop up for any other reason. + */ + if (ir_dereference_array * const inner_array = + ir->array->as_dereference_array()) { + /* ir => foo[i][j] + * inner_array => foo[i] + */ + if (ir_dereference_variable * const deref_var = + inner_array->array->as_dereference_variable()) { + if (is_multiple_vertices(this->shader_stage, deref_var->var)) { + /* foo is a geometry or tessellation shader input, so i is + * the vertex, and j the part of the input we're accessing. + */ + if (try_mark_partial_variable(deref_var->var, ir->array_index)) + { + /* We've now taken care of foo and j, but i might contain a + * subexpression that accesses shader inputs. So manually + * visit i and then continue with the parent. + */ + inner_array->array_index->accept(this); + return visit_continue_with_parent; + } + } + } + } else if (ir_dereference_variable * const deref_var = + ir->array->as_dereference_variable()) { + /* ir => foo[i], where foo is a variable. */ + if (is_multiple_vertices(this->shader_stage, deref_var->var)) { + /* foo is a geometry or tessellation shader input, so i is + * the vertex, and we're accessing the entire input. + */ + mark_whole_variable(deref_var->var); + /* We've now taken care of foo, but i might contain a subexpression + * that accesses shader inputs. So manually visit i and then + * continue with the parent. + */ + ir->array_index->accept(this); + return visit_continue_with_parent; + } else if (is_shader_inout(deref_var->var)) { + /* foo is a shader input/output, but not a geometry shader input, + * so i is the part of the input we're accessing. + */ + if (try_mark_partial_variable(deref_var->var, ir->array_index)) + return visit_continue_with_parent; + } + } + + /* The expression is something we don't recognize. Just visit its + * subexpressions. + */ + return visit_continue; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_function_signature *ir) +{ + /* We don't want to descend into the function parameters and + * consider them as shader inputs or outputs. + */ + visit_list_elements(this, &ir->body); + return visit_continue_with_parent; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_expression *ir) +{ + if (this->shader_stage == MESA_SHADER_FRAGMENT && + (ir->operation == ir_unop_dFdy || + ir->operation == ir_unop_dFdy_coarse || + ir->operation == ir_unop_dFdy_fine)) { + gl_fragment_program *fprog = (gl_fragment_program *) prog; + fprog->UsesDFdy = true; + } + return visit_continue; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_discard *) +{ + /* discards are only allowed in fragment shaders. */ + assert(this->shader_stage == MESA_SHADER_FRAGMENT); + + gl_fragment_program *fprog = (gl_fragment_program *) prog; + fprog->UsesKill = true; + + return visit_continue; +} + +ir_visitor_status +ir_set_program_inouts_visitor::visit_enter(ir_texture *ir) +{ + if (ir->op == ir_tg4) + prog->UsesGather = true; + return visit_continue; +} + +void +do_set_program_inouts(exec_list *instructions, struct gl_program *prog, + gl_shader_stage shader_stage) +{ + ir_set_program_inouts_visitor v(prog, shader_stage); + + prog->InputsRead = 0; + prog->OutputsWritten = 0; + prog->PatchInputsRead = 0; + prog->PatchOutputsWritten = 0; + prog->SystemValuesRead = 0; + if (shader_stage == MESA_SHADER_FRAGMENT) { + gl_fragment_program *fprog = (gl_fragment_program *) prog; + memset(fprog->InterpQualifier, 0, sizeof(fprog->InterpQualifier)); + fprog->IsCentroid = 0; + fprog->IsSample = 0; + fprog->UsesDFdy = false; + fprog->UsesKill = false; + } + visit_list_elements(&v, instructions); +} diff --git a/src/compiler/glsl/ir_uniform.h b/src/compiler/glsl/ir_uniform.h new file mode 100644 index 00000000000..1854279925b --- /dev/null +++ b/src/compiler/glsl/ir_uniform.h @@ -0,0 +1,216 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_UNIFORM_H +#define IR_UNIFORM_H + + +/* stdbool.h is necessary because this file is included in both C and C++ code. + */ +#include + +#include "program/prog_parameter.h" /* For union gl_constant_value. */ + +/** + * Used by GL_ARB_explicit_uniform_location extension code in the linker + * and glUniform* functions to identify inactive explicit uniform locations. + */ +#define INACTIVE_UNIFORM_EXPLICIT_LOCATION ((gl_uniform_storage *) -1) + +#ifdef __cplusplus +extern "C" { +#endif + +enum PACKED gl_uniform_driver_format { + uniform_native = 0, /**< Store data in the native format. */ + uniform_int_float, /**< Store integer data as floats. */ +}; + +struct gl_uniform_driver_storage { + /** + * Number of bytes from one array element to the next. + */ + uint8_t element_stride; + + /** + * Number of bytes from one vector in a matrix to the next. + */ + uint8_t vector_stride; + + /** + * Base format of the stored data. + */ + enum gl_uniform_driver_format format; + + /** + * Pointer to the base of the data. + */ + void *data; +}; + +struct gl_opaque_uniform_index { + /** + * Base opaque uniform index + * + * If \c gl_uniform_storage::base_type is an opaque type, this + * represents its uniform index. If \c + * gl_uniform_storage::array_elements is not zero, the array will + * use opaque uniform indices \c index through \c index + \c + * gl_uniform_storage::array_elements - 1, inclusive. + * + * Note that the index may be different in each shader stage. + */ + uint8_t index; + + /** + * Whether this opaque uniform is used in this shader stage. + */ + bool active; +}; + +struct gl_uniform_storage { + char *name; + /** Type of this uniform data stored. + * + * In the case of an array, it's the type of a single array element. + */ + const struct glsl_type *type; + + /** + * The number of elements in this uniform. + * + * For non-arrays, this is always 0. For arrays, the value is the size of + * the array. + */ + unsigned array_elements; + + /** + * Has this uniform ever been set? + */ + bool initialized; + + struct gl_opaque_uniform_index opaque[MESA_SHADER_STAGES]; + + /** + * Storage used by the driver for the uniform + */ + unsigned num_driver_storage; + struct gl_uniform_driver_storage *driver_storage; + + /** + * Storage used by Mesa for the uniform + * + * This form of the uniform is used by Mesa's implementation of \c + * glGetUniform. It can also be used by drivers to obtain the value of the + * uniform if the \c ::driver_storage interface is not used. + */ + union gl_constant_value *storage; + + /** Fields for GL_ARB_uniform_buffer_object + * @{ + */ + + /** + * GL_UNIFORM_BLOCK_INDEX: index of the uniform block containing + * the uniform, or -1 for the default uniform block. Note that the + * index is into the linked program's UniformBlocks[] array, not + * the linked shader's. + */ + int block_index; + + /** GL_UNIFORM_OFFSET: byte offset within the uniform block, or -1. */ + int offset; + + /** + * GL_UNIFORM_MATRIX_STRIDE: byte stride between columns or rows of + * a matrix. Set to 0 for non-matrices in UBOs, or -1 for uniforms + * in the default uniform block. + */ + int matrix_stride; + + /** + * GL_UNIFORM_ARRAY_STRIDE: byte stride between elements of the + * array. Set to zero for non-arrays in UBOs, or -1 for uniforms + * in the default uniform block. + */ + int array_stride; + + /** GL_UNIFORM_ROW_MAJOR: true iff it's a row-major matrix in a UBO */ + bool row_major; + + /** @} */ + + /** + * This is a compiler-generated uniform that should not be advertised + * via the API. + */ + bool hidden; + + /** + * This is a built-in uniform that should not be modified through any gl API. + */ + bool builtin; + + /** + * This is a shader storage buffer variable, not an uniform. + */ + bool is_shader_storage; + + /** + * Index within gl_shader_program::AtomicBuffers[] of the atomic + * counter buffer this uniform is stored in, or -1 if this is not + * an atomic counter. + */ + int atomic_buffer_index; + + /** + * The 'base location' for this uniform in the uniform remap table. For + * arrays this is the first element in the array. + * for subroutines this is in shader subroutine uniform remap table. + */ + unsigned remap_location; + + /** + * The number of compatible subroutines with this subroutine uniform. + */ + unsigned num_compatible_subroutines; + + /** + * A single integer identifying the number of active array elements of + * the top-level shader storage block member (GL_TOP_LEVEL_ARRAY_SIZE). + */ + unsigned top_level_array_size; + + /** + * A single integer identifying the stride between array elements of the + * top-level shader storage block member. (GL_TOP_LEVEL_ARRAY_STRIDE). + */ + unsigned top_level_array_stride; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* IR_UNIFORM_H */ diff --git a/src/compiler/glsl/ir_validate.cpp b/src/compiler/glsl/ir_validate.cpp new file mode 100644 index 00000000000..cad7069bf98 --- /dev/null +++ b/src/compiler/glsl/ir_validate.cpp @@ -0,0 +1,930 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_validate.cpp + * + * Attempts to verify that various invariants of the IR tree are true. + * + * In particular, at the moment it makes sure that no single + * ir_instruction node except for ir_variable appears multiple times + * in the ir tree. ir_variable does appear multiple times: Once as a + * declaration in an exec_list, and multiple times as the endpoint of + * a dereference chain. + */ + +#include "ir.h" +#include "ir_hierarchical_visitor.h" +#include "util/hash_table.h" +#include "util/set.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_validate : public ir_hierarchical_visitor { +public: + ir_validate() + { + this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + + this->current_function = NULL; + + this->callback_enter = ir_validate::validate_ir; + this->data_enter = ir_set; + } + + ~ir_validate() + { + _mesa_set_destroy(this->ir_set, NULL); + } + + virtual ir_visitor_status visit(ir_variable *v); + virtual ir_visitor_status visit(ir_dereference_variable *ir); + + virtual ir_visitor_status visit_enter(ir_discard *ir); + virtual ir_visitor_status visit_enter(ir_if *ir); + + virtual ir_visitor_status visit_enter(ir_function *ir); + virtual ir_visitor_status visit_leave(ir_function *ir); + virtual ir_visitor_status visit_enter(ir_function_signature *ir); + + virtual ir_visitor_status visit_leave(ir_expression *ir); + virtual ir_visitor_status visit_leave(ir_swizzle *ir); + + virtual ir_visitor_status visit_enter(class ir_dereference_array *); + + virtual ir_visitor_status visit_enter(ir_assignment *ir); + virtual ir_visitor_status visit_enter(ir_call *ir); + + static void validate_ir(ir_instruction *ir, void *data); + + ir_function *current_function; + + struct set *ir_set; +}; + +} /* anonymous namespace */ + +ir_visitor_status +ir_validate::visit(ir_dereference_variable *ir) +{ + if ((ir->var == NULL) || (ir->var->as_variable() == NULL)) { + printf("ir_dereference_variable @ %p does not specify a variable %p\n", + (void *) ir, (void *) ir->var); + abort(); + } + + if (_mesa_set_search(ir_set, ir->var) == NULL) { + printf("ir_dereference_variable @ %p specifies undeclared variable " + "`%s' @ %p\n", + (void *) ir, ir->var->name, (void *) ir->var); + abort(); + } + + this->validate_ir(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(class ir_dereference_array *ir) +{ + if (!ir->array->type->is_array() && !ir->array->type->is_matrix() && + !ir->array->type->is_vector()) { + printf("ir_dereference_array @ %p does not specify an array, a vector " + "or a matrix\n", + (void *) ir); + ir->print(); + printf("\n"); + abort(); + } + + if (!ir->array_index->type->is_scalar()) { + printf("ir_dereference_array @ %p does not have scalar index: %s\n", + (void *) ir, ir->array_index->type->name); + abort(); + } + + if (!ir->array_index->type->is_integer()) { + printf("ir_dereference_array @ %p does not have integer index: %s\n", + (void *) ir, ir->array_index->type->name); + abort(); + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_discard *ir) +{ + if (ir->condition && ir->condition->type != glsl_type::bool_type) { + printf("ir_discard condition %s type instead of bool.\n", + ir->condition->type->name); + ir->print(); + printf("\n"); + abort(); + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_if *ir) +{ + if (ir->condition->type != glsl_type::bool_type) { + printf("ir_if condition %s type instead of bool.\n", + ir->condition->type->name); + ir->print(); + printf("\n"); + abort(); + } + + return visit_continue; +} + + +ir_visitor_status +ir_validate::visit_enter(ir_function *ir) +{ + /* Function definitions cannot be nested. + */ + if (this->current_function != NULL) { + printf("Function definition nested inside another function " + "definition:\n"); + printf("%s %p inside %s %p\n", + ir->name, (void *) ir, + this->current_function->name, (void *) this->current_function); + abort(); + } + + /* Store the current function hierarchy being traversed. This is used + * by the function signature visitor to ensure that the signatures are + * linked with the correct functions. + */ + this->current_function = ir; + + this->validate_ir(ir, this->data_enter); + + /* Verify that all of the things stored in the list of signatures are, + * in fact, function signatures. + */ + foreach_in_list(ir_instruction, sig, &ir->signatures) { + if (sig->ir_type != ir_type_function_signature) { + printf("Non-signature in signature list of function `%s'\n", + ir->name); + abort(); + } + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_leave(ir_function *ir) +{ + assert(ralloc_parent(ir->name) == ir); + + this->current_function = NULL; + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_function_signature *ir) +{ + if (this->current_function != ir->function()) { + printf("Function signature nested inside wrong function " + "definition:\n"); + printf("%p inside %s %p instead of %s %p\n", + (void *) ir, + this->current_function->name, (void *) this->current_function, + ir->function_name(), (void *) ir->function()); + abort(); + } + + if (ir->return_type == NULL) { + printf("Function signature %p for function %s has NULL return type.\n", + (void *) ir, ir->function_name()); + abort(); + } + + this->validate_ir(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_leave(ir_expression *ir) +{ + switch (ir->operation) { + case ir_unop_bit_not: + assert(ir->operands[0]->type == ir->type); + break; + case ir_unop_logic_not: + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_unop_neg: + case ir_unop_abs: + case ir_unop_sign: + case ir_unop_rcp: + case ir_unop_rsq: + case ir_unop_sqrt: + assert(ir->type == ir->operands[0]->type); + break; + + case ir_unop_exp: + case ir_unop_log: + case ir_unop_exp2: + case ir_unop_log2: + case ir_unop_saturate: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type == ir->operands[0]->type); + break; + + case ir_unop_f2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_f2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_i2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_f2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + case ir_unop_b2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_i2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + case ir_unop_b2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_u2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_i2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_u2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_bitcast_i2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_bitcast_f2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_bitcast_u2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_bitcast_f2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + + case ir_unop_trunc: + case ir_unop_round_even: + case ir_unop_ceil: + case ir_unop_floor: + case ir_unop_fract: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type == ir->type); + break; + case ir_unop_sin: + case ir_unop_cos: + case ir_unop_dFdx: + case ir_unop_dFdx_coarse: + case ir_unop_dFdx_fine: + case ir_unop_dFdy: + case ir_unop_dFdy_coarse: + case ir_unop_dFdy_fine: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->operands[0]->type == ir->type); + break; + + case ir_unop_pack_snorm_2x16: + case ir_unop_pack_unorm_2x16: + case ir_unop_pack_half_2x16: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::vec2_type); + break; + + case ir_unop_pack_snorm_4x8: + case ir_unop_pack_unorm_4x8: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::vec4_type); + break; + + case ir_unop_pack_double_2x32: + assert(ir->type == glsl_type::double_type); + assert(ir->operands[0]->type == glsl_type::uvec2_type); + break; + + case ir_unop_unpack_snorm_2x16: + case ir_unop_unpack_unorm_2x16: + case ir_unop_unpack_half_2x16: + assert(ir->type == glsl_type::vec2_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_unpack_snorm_4x8: + case ir_unop_unpack_unorm_4x8: + assert(ir->type == glsl_type::vec4_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_unpack_half_2x16_split_x: + case ir_unop_unpack_half_2x16_split_y: + assert(ir->type == glsl_type::float_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_unpack_double_2x32: + assert(ir->type == glsl_type::uvec2_type); + assert(ir->operands[0]->type == glsl_type::double_type); + break; + + case ir_unop_bitfield_reverse: + assert(ir->operands[0]->type == ir->type); + assert(ir->type->is_integer()); + break; + + case ir_unop_bit_count: + case ir_unop_find_msb: + case ir_unop_find_lsb: + assert(ir->operands[0]->type->vector_elements == ir->type->vector_elements); + assert(ir->operands[0]->type->is_integer()); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + + case ir_unop_noise: + /* XXX what can we assert here? */ + break; + + case ir_unop_interpolate_at_centroid: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float()); + break; + + case ir_unop_get_buffer_size: + assert(ir->type == glsl_type::int_type); + assert(ir->operands[0]->type == glsl_type::uint_type); + break; + + case ir_unop_ssbo_unsized_array_length: + assert(ir->type == glsl_type::int_type); + assert(ir->operands[0]->type->is_array()); + assert(ir->operands[0]->type->is_unsized_array()); + break; + + case ir_unop_d2f: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + break; + case ir_unop_f2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2i: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_i2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2u: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + case ir_unop_u2d: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_d2b: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_unop_frexp_sig: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_DOUBLE); + break; + case ir_unop_frexp_exp: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_unop_subroutine_to_int: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_SUBROUTINE); + assert(ir->type->base_type == GLSL_TYPE_INT); + break; + case ir_binop_add: + case ir_binop_sub: + case ir_binop_mul: + case ir_binop_div: + case ir_binop_mod: + case ir_binop_min: + case ir_binop_max: + case ir_binop_pow: + assert(ir->operands[0]->type->base_type == + ir->operands[1]->type->base_type); + + if (ir->operands[0]->type->is_scalar()) + assert(ir->operands[1]->type == ir->type); + else if (ir->operands[1]->type->is_scalar()) + assert(ir->operands[0]->type == ir->type); + else if (ir->operands[0]->type->is_vector() && + ir->operands[1]->type->is_vector()) { + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[0]->type == ir->type); + } + break; + + case ir_binop_imul_high: + assert(ir->type == ir->operands[0]->type); + assert(ir->type == ir->operands[1]->type); + assert(ir->type->is_integer()); + break; + + case ir_binop_carry: + case ir_binop_borrow: + assert(ir->type == ir->operands[0]->type); + assert(ir->type == ir->operands[1]->type); + assert(ir->type->base_type == GLSL_TYPE_UINT); + break; + + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + /* The semantics of the IR operators differ from the GLSL <, >, <=, >=, + * ==, and != operators. The IR operators perform a component-wise + * comparison on scalar or vector types and return a boolean scalar or + * vector type of the same size. + */ + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[0]->type->is_vector() + || ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->vector_elements + == ir->type->vector_elements); + break; + + case ir_binop_all_equal: + case ir_binop_any_nequal: + /* GLSL == and != operate on scalars, vectors, matrices and arrays, and + * return a scalar boolean. The IR matches that. + */ + assert(ir->type == glsl_type::bool_type); + assert(ir->operands[0]->type == ir->operands[1]->type); + break; + + case ir_binop_lshift: + case ir_binop_rshift: + assert(ir->operands[0]->type->is_integer() && + ir->operands[1]->type->is_integer()); + if (ir->operands[0]->type->is_scalar()) { + assert(ir->operands[1]->type->is_scalar()); + } + if (ir->operands[0]->type->is_vector() && + ir->operands[1]->type->is_vector()) { + assert(ir->operands[0]->type->components() == + ir->operands[1]->type->components()); + } + assert(ir->type == ir->operands[0]->type); + break; + + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + assert(ir->operands[0]->type->base_type == + ir->operands[1]->type->base_type); + assert(ir->type->is_integer()); + if (ir->operands[0]->type->is_vector() && + ir->operands[1]->type->is_vector()) { + assert(ir->operands[0]->type->vector_elements == + ir->operands[1]->type->vector_elements); + } + break; + + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + assert(ir->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->operands[1]->type->base_type == GLSL_TYPE_BOOL); + break; + + case ir_binop_dot: + assert(ir->type == glsl_type::float_type || + ir->type == glsl_type::double_type); + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[0]->type == ir->operands[1]->type); + break; + + case ir_binop_pack_half_2x16_split: + assert(ir->type == glsl_type::uint_type); + assert(ir->operands[0]->type == glsl_type::float_type); + assert(ir->operands[1]->type == glsl_type::float_type); + break; + + case ir_binop_ubo_load: + assert(ir->operands[0]->type == glsl_type::uint_type); + + assert(ir->operands[1]->type == glsl_type::uint_type); + break; + + case ir_binop_ldexp: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float() || + ir->operands[0]->type->is_double()); + assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT); + assert(ir->operands[0]->type->components() == + ir->operands[1]->type->components()); + break; + + case ir_binop_vector_extract: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[1]->type->is_scalar() + && ir->operands[1]->type->is_integer()); + break; + + case ir_binop_interpolate_at_offset: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float()); + assert(ir->operands[1]->type->components() == 2); + assert(ir->operands[1]->type->is_float()); + break; + + case ir_binop_interpolate_at_sample: + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[0]->type->is_float()); + assert(ir->operands[1]->type == glsl_type::int_type); + break; + + case ir_triop_fma: + assert(ir->type->base_type == GLSL_TYPE_FLOAT || + ir->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->type == ir->operands[0]->type); + assert(ir->type == ir->operands[1]->type); + assert(ir->type == ir->operands[2]->type); + break; + + case ir_triop_lrp: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || + ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); + assert(ir->operands[0]->type == ir->operands[1]->type); + assert(ir->operands[2]->type == ir->operands[0]->type || + ir->operands[2]->type == glsl_type::float_type || + ir->operands[2]->type == glsl_type::double_type); + break; + + case ir_triop_csel: + assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); + assert(ir->type->vector_elements == ir->operands[0]->type->vector_elements); + assert(ir->type == ir->operands[1]->type); + assert(ir->type == ir->operands[2]->type); + break; + + case ir_triop_bitfield_extract: + assert(ir->type->is_integer()); + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[1]->type == ir->type); + assert(ir->operands[2]->type == ir->type); + break; + + case ir_triop_vector_insert: + assert(ir->operands[0]->type->is_vector()); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->operands[1]->type->base_type); + assert(ir->operands[2]->type->is_scalar() + && ir->operands[2]->type->is_integer()); + assert(ir->type == ir->operands[0]->type); + break; + + case ir_quadop_bitfield_insert: + assert(ir->type->is_integer()); + assert(ir->operands[0]->type == ir->type); + assert(ir->operands[1]->type == ir->type); + assert(ir->operands[2]->type == ir->type); + assert(ir->operands[3]->type == ir->type); + break; + + case ir_quadop_vector: + /* The vector operator collects some number of scalars and generates a + * vector from them. + * + * - All of the operands must be scalar. + * - Number of operands must matche the size of the resulting vector. + * - Base type of the operands must match the base type of the result. + */ + assert(ir->type->is_vector()); + switch (ir->type->vector_elements) { + case 2: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2] == NULL); + assert(ir->operands[3] == NULL); + break; + case 3: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2]->type->is_scalar()); + assert(ir->operands[2]->type->base_type == ir->type->base_type); + assert(ir->operands[3] == NULL); + break; + case 4: + assert(ir->operands[0]->type->is_scalar()); + assert(ir->operands[0]->type->base_type == ir->type->base_type); + assert(ir->operands[1]->type->is_scalar()); + assert(ir->operands[1]->type->base_type == ir->type->base_type); + assert(ir->operands[2]->type->is_scalar()); + assert(ir->operands[2]->type->base_type == ir->type->base_type); + assert(ir->operands[3]->type->is_scalar()); + assert(ir->operands[3]->type->base_type == ir->type->base_type); + break; + default: + /* The is_vector assertion above should prevent execution from ever + * getting here. + */ + assert(!"Should not get here."); + break; + } + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_leave(ir_swizzle *ir) +{ + unsigned int chans[4] = {ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w}; + + for (unsigned int i = 0; i < ir->type->vector_elements; i++) { + if (chans[i] >= ir->val->type->vector_elements) { + printf("ir_swizzle @ %p specifies a channel not present " + "in the value.\n", (void *) ir); + ir->print(); + abort(); + } + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit(ir_variable *ir) +{ + /* An ir_variable is the one thing that can (and will) appear multiple times + * in an IR tree. It is added to the hashtable so that it can be used + * in the ir_dereference_variable handler to ensure that a variable is + * declared before it is dereferenced. + */ + if (ir->name && ir->is_name_ralloced()) + assert(ralloc_parent(ir->name) == ir); + + _mesa_set_add(ir_set, ir); + + /* If a variable is an array, verify that the maximum array index is in + * bounds. There was once an error in AST-to-HIR conversion that set this + * to be out of bounds. + */ + if (ir->type->array_size() > 0) { + if (ir->data.max_array_access >= ir->type->length) { + printf("ir_variable has maximum access out of bounds (%d vs %d)\n", + ir->data.max_array_access, ir->type->length - 1); + ir->print(); + abort(); + } + } + + /* If a variable is an interface block (or an array of interface blocks), + * verify that the maximum array index for each interface member is in + * bounds. + */ + if (ir->is_interface_instance()) { + const glsl_struct_field *fields = + ir->get_interface_type()->fields.structure; + for (unsigned i = 0; i < ir->get_interface_type()->length; i++) { + if (fields[i].type->array_size() > 0) { + const unsigned *const max_ifc_array_access = + ir->get_max_ifc_array_access(); + + assert(max_ifc_array_access != NULL); + + if (max_ifc_array_access[i] >= fields[i].type->length) { + printf("ir_variable has maximum access out of bounds for " + "field %s (%d vs %d)\n", fields[i].name, + max_ifc_array_access[i], fields[i].type->length); + ir->print(); + abort(); + } + } + } + } + + if (ir->constant_initializer != NULL && !ir->data.has_initializer) { + printf("ir_variable didn't have an initializer, but has a constant " + "initializer value.\n"); + ir->print(); + abort(); + } + + if (ir->data.mode == ir_var_uniform + && is_gl_identifier(ir->name) + && ir->get_state_slots() == NULL) { + printf("built-in uniform has no state\n"); + ir->print(); + abort(); + } + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_assignment *ir) +{ + const ir_dereference *const lhs = ir->lhs; + if (lhs->type->is_scalar() || lhs->type->is_vector()) { + if (ir->write_mask == 0) { + printf("Assignment LHS is %s, but write mask is 0:\n", + lhs->type->is_scalar() ? "scalar" : "vector"); + ir->print(); + abort(); + } + + int lhs_components = 0; + for (int i = 0; i < 4; i++) { + if (ir->write_mask & (1 << i)) + lhs_components++; + } + + if (lhs_components != ir->rhs->type->vector_elements) { + printf("Assignment count of LHS write mask channels enabled not\n" + "matching RHS vector size (%d LHS, %d RHS).\n", + lhs_components, ir->rhs->type->vector_elements); + ir->print(); + abort(); + } + } + + this->validate_ir(ir, this->data_enter); + + return visit_continue; +} + +ir_visitor_status +ir_validate::visit_enter(ir_call *ir) +{ + ir_function_signature *const callee = ir->callee; + + if (callee->ir_type != ir_type_function_signature) { + printf("IR called by ir_call is not ir_function_signature!\n"); + abort(); + } + + if (ir->return_deref) { + if (ir->return_deref->type != callee->return_type) { + printf("callee type %s does not match return storage type %s\n", + callee->return_type->name, ir->return_deref->type->name); + abort(); + } + } else if (callee->return_type != glsl_type::void_type) { + printf("ir_call has non-void callee but no return storage\n"); + abort(); + } + + const exec_node *formal_param_node = callee->parameters.head; + const exec_node *actual_param_node = ir->actual_parameters.head; + while (true) { + if (formal_param_node->is_tail_sentinel() + != actual_param_node->is_tail_sentinel()) { + printf("ir_call has the wrong number of parameters:\n"); + goto dump_ir; + } + if (formal_param_node->is_tail_sentinel()) { + break; + } + const ir_variable *formal_param + = (const ir_variable *) formal_param_node; + const ir_rvalue *actual_param + = (const ir_rvalue *) actual_param_node; + if (formal_param->type != actual_param->type) { + printf("ir_call parameter type mismatch:\n"); + goto dump_ir; + } + if (formal_param->data.mode == ir_var_function_out + || formal_param->data.mode == ir_var_function_inout) { + if (!actual_param->is_lvalue()) { + printf("ir_call out/inout parameters must be lvalues:\n"); + goto dump_ir; + } + } + formal_param_node = formal_param_node->next; + actual_param_node = actual_param_node->next; + } + + return visit_continue; + +dump_ir: + ir->print(); + printf("callee:\n"); + callee->print(); + abort(); + return visit_stop; +} + +void +ir_validate::validate_ir(ir_instruction *ir, void *data) +{ + struct set *ir_set = (struct set *) data; + + if (_mesa_set_search(ir_set, ir)) { + printf("Instruction node present twice in ir tree:\n"); + ir->print(); + printf("\n"); + abort(); + } + _mesa_set_add(ir_set, ir); +} + +void +check_node_type(ir_instruction *ir, void *data) +{ + (void) data; + + if (ir->ir_type >= ir_type_max) { + printf("Instruction node with unset type\n"); + ir->print(); printf("\n"); + } + ir_rvalue *value = ir->as_rvalue(); + if (value != NULL) + assert(value->type != glsl_type::error_type); +} + +void +validate_ir_tree(exec_list *instructions) +{ + /* We shouldn't have any reason to validate IR in a release build, + * and it's half composed of assert()s anyway which wouldn't do + * anything. + */ +#ifdef DEBUG + ir_validate v; + + v.run(instructions); + + foreach_in_list(ir_instruction, ir, instructions) { + visit_tree(ir, check_node_type, NULL); + } +#endif +} diff --git a/src/compiler/glsl/ir_variable_refcount.cpp b/src/compiler/glsl/ir_variable_refcount.cpp new file mode 100644 index 00000000000..8306be10b9c --- /dev/null +++ b/src/compiler/glsl/ir_variable_refcount.cpp @@ -0,0 +1,153 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_variable_refcount.cpp + * + * Provides a visitor which produces a list of variables referenced, + * how many times they were referenced and assigned, and whether they + * were defined in the scope. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_variable_refcount.h" +#include "compiler/glsl_types.h" +#include "util/hash_table.h" + +ir_variable_refcount_visitor::ir_variable_refcount_visitor() +{ + this->mem_ctx = ralloc_context(NULL); + this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); +} + +static void +free_entry(struct hash_entry *entry) +{ + ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) entry->data; + + /* Free assignment list */ + exec_node *n; + while ((n = ivre->assign_list.pop_head()) != NULL) { + struct assignment_entry *assignment_entry = + exec_node_data(struct assignment_entry, n, link); + free(assignment_entry); + } + + delete ivre; +} + +ir_variable_refcount_visitor::~ir_variable_refcount_visitor() +{ + ralloc_free(this->mem_ctx); + _mesa_hash_table_destroy(this->ht, free_entry); +} + +// constructor +ir_variable_refcount_entry::ir_variable_refcount_entry(ir_variable *var) +{ + this->var = var; + assigned_count = 0; + declaration = false; + referenced_count = 0; +} + + +ir_variable_refcount_entry * +ir_variable_refcount_visitor::get_variable_entry(ir_variable *var) +{ + assert(var); + + struct hash_entry *e = _mesa_hash_table_search(this->ht, var); + if (e) + return (ir_variable_refcount_entry *)e->data; + + ir_variable_refcount_entry *entry = new ir_variable_refcount_entry(var); + assert(entry->referenced_count == 0); + _mesa_hash_table_insert(this->ht, var, entry); + + return entry; +} + + +ir_visitor_status +ir_variable_refcount_visitor::visit(ir_variable *ir) +{ + ir_variable_refcount_entry *entry = this->get_variable_entry(ir); + if (entry) + entry->declaration = true; + + return visit_continue; +} + + +ir_visitor_status +ir_variable_refcount_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *const var = ir->variable_referenced(); + ir_variable_refcount_entry *entry = this->get_variable_entry(var); + + if (entry) + entry->referenced_count++; + + return visit_continue; +} + + +ir_visitor_status +ir_variable_refcount_visitor::visit_enter(ir_function_signature *ir) +{ + /* We don't want to descend into the function parameters and + * dead-code eliminate them, so just accept the body here. + */ + visit_list_elements(this, &ir->body); + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_variable_refcount_visitor::visit_leave(ir_assignment *ir) +{ + ir_variable_refcount_entry *entry; + entry = this->get_variable_entry(ir->lhs->variable_referenced()); + if (entry) { + entry->assigned_count++; + + /* Build a list for dead code optimisation. Don't add assignment if it + * was declared out of scope (outside the instruction stream). Also don't + * bother adding any more to the list if there are more references than + * assignments as this means the variable is used and won't be optimised + * out. + */ + assert(entry->referenced_count >= entry->assigned_count); + if (entry->referenced_count == entry->assigned_count) { + struct assignment_entry *assignment_entry = + (struct assignment_entry *)calloc(1, sizeof(*assignment_entry)); + assignment_entry->assign = ir; + entry->assign_list.push_head(&assignment_entry->link); + } + } + + return visit_continue; +} diff --git a/src/compiler/glsl/ir_variable_refcount.h b/src/compiler/glsl/ir_variable_refcount.h new file mode 100644 index 00000000000..08a11c01495 --- /dev/null +++ b/src/compiler/glsl/ir_variable_refcount.h @@ -0,0 +1,80 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file ir_variable_refcount.h + * + * Provides a visitor which produces a list of variables referenced, + * how many times they were referenced and assigned, and whether they + * were defined in the scope. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "compiler/glsl_types.h" + +struct assignment_entry { + exec_node link; + ir_assignment *assign; +}; + +class ir_variable_refcount_entry +{ +public: + ir_variable_refcount_entry(ir_variable *var); + + ir_variable *var; /* The key: the variable's pointer. */ + + /** + * List of assignments to the variable, if any. + * This is intended to be used for dead code optimisation and may + * not be a complete list. + */ + exec_list assign_list; + + /** Number of times the variable is referenced, including assignments. */ + unsigned referenced_count; + + /** Number of times the variable is assigned. */ + unsigned assigned_count; + + bool declaration; /* If the variable had a decl in the instruction stream */ +}; + +class ir_variable_refcount_visitor : public ir_hierarchical_visitor { +public: + ir_variable_refcount_visitor(void); + ~ir_variable_refcount_visitor(void); + + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit(ir_dereference_variable *); + + virtual ir_visitor_status visit_enter(ir_function_signature *); + virtual ir_visitor_status visit_leave(ir_assignment *); + + ir_variable_refcount_entry *get_variable_entry(ir_variable *var); + + struct hash_table *ht; + + void *mem_ctx; +}; diff --git a/src/compiler/glsl/ir_visitor.h b/src/compiler/glsl/ir_visitor.h new file mode 100644 index 00000000000..7c38481cd53 --- /dev/null +++ b/src/compiler/glsl/ir_visitor.h @@ -0,0 +1,93 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef IR_VISITOR_H +#define IR_VISITOR_H + +#ifdef __cplusplus +/** + * Abstract base class of visitors of IR instruction trees + */ +class ir_visitor { +public: + virtual ~ir_visitor() + { + /* empty */ + } + + /** + * \name Visit methods + * + * As typical for the visitor pattern, there must be one \c visit method for + * each concrete subclass of \c ir_instruction. Virtual base classes within + * the hierarchy should not have \c visit methods. + */ + /*@{*/ + virtual void visit(class ir_rvalue *) { assert(!"unhandled error_type"); } + virtual void visit(class ir_variable *) = 0; + virtual void visit(class ir_function_signature *) = 0; + virtual void visit(class ir_function *) = 0; + virtual void visit(class ir_expression *) = 0; + virtual void visit(class ir_texture *) = 0; + virtual void visit(class ir_swizzle *) = 0; + virtual void visit(class ir_dereference_variable *) = 0; + virtual void visit(class ir_dereference_array *) = 0; + virtual void visit(class ir_dereference_record *) = 0; + virtual void visit(class ir_assignment *) = 0; + virtual void visit(class ir_constant *) = 0; + virtual void visit(class ir_call *) = 0; + virtual void visit(class ir_return *) = 0; + virtual void visit(class ir_discard *) = 0; + virtual void visit(class ir_if *) = 0; + virtual void visit(class ir_loop *) = 0; + virtual void visit(class ir_loop_jump *) = 0; + virtual void visit(class ir_emit_vertex *) = 0; + virtual void visit(class ir_end_primitive *) = 0; + virtual void visit(class ir_barrier *) = 0; + /*@}*/ +}; + +/* NOTE: function calls may never return due to discards inside them + * This is usually not an issue, but if it is, keep it in mind + */ +class ir_control_flow_visitor : public ir_visitor { +public: + virtual void visit(class ir_variable *) {} + virtual void visit(class ir_expression *) {} + virtual void visit(class ir_texture *) {} + virtual void visit(class ir_swizzle *) {} + virtual void visit(class ir_dereference_variable *) {} + virtual void visit(class ir_dereference_array *) {} + virtual void visit(class ir_dereference_record *) {} + virtual void visit(class ir_assignment *) {} + virtual void visit(class ir_constant *) {} + virtual void visit(class ir_call *) {} + virtual void visit(class ir_emit_vertex *) {} + virtual void visit(class ir_end_primitive *) {} + virtual void visit(class ir_barrier *) {} +}; +#endif /* __cplusplus */ + +#endif /* IR_VISITOR_H */ diff --git a/src/compiler/glsl/link_atomics.cpp b/src/compiler/glsl/link_atomics.cpp new file mode 100644 index 00000000000..277d4737ab7 --- /dev/null +++ b/src/compiler/glsl/link_atomics.cpp @@ -0,0 +1,346 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "glsl_parser_extras.h" +#include "ir.h" +#include "ir_uniform.h" +#include "linker.h" +#include "program/hash_table.h" +#include "main/macros.h" + +namespace { + /* + * Atomic counter as seen by the program. + */ + struct active_atomic_counter { + unsigned uniform_loc; + ir_variable *var; + }; + + /* + * Atomic counter buffer referenced by the program. There is a one + * to one correspondence between these and the objects that can be + * queried using glGetActiveAtomicCounterBufferiv(). + */ + struct active_atomic_buffer { + active_atomic_buffer() + : counters(0), num_counters(0), stage_references(), size(0) + {} + + ~active_atomic_buffer() + { + free(counters); + } + + void push_back(unsigned uniform_loc, ir_variable *var) + { + active_atomic_counter *new_counters; + + new_counters = (active_atomic_counter *) + realloc(counters, sizeof(active_atomic_counter) * + (num_counters + 1)); + + if (new_counters == NULL) { + _mesa_error_no_memory(__func__); + return; + } + + counters = new_counters; + counters[num_counters].uniform_loc = uniform_loc; + counters[num_counters].var = var; + num_counters++; + } + + active_atomic_counter *counters; + unsigned num_counters; + unsigned stage_references[MESA_SHADER_STAGES]; + unsigned size; + }; + + int + cmp_actives(const void *a, const void *b) + { + const active_atomic_counter *const first = (active_atomic_counter *) a; + const active_atomic_counter *const second = (active_atomic_counter *) b; + + return int(first->var->data.offset) - int(second->var->data.offset); + } + + bool + check_atomic_counters_overlap(const ir_variable *x, const ir_variable *y) + { + return ((x->data.offset >= y->data.offset && + x->data.offset < y->data.offset + y->type->atomic_size()) || + (y->data.offset >= x->data.offset && + y->data.offset < x->data.offset + x->type->atomic_size())); + } + + void + process_atomic_variable(const glsl_type *t, struct gl_shader_program *prog, + unsigned *uniform_loc, ir_variable *var, + active_atomic_buffer *const buffers, + unsigned *num_buffers, int *offset, + const unsigned shader_stage) + { + /* FIXME: Arrays of arrays get counted separately. For example: + * x1[3][3][2] = 9 counters + * x2[3][2] = 3 counters + * x3[2] = 1 counter + * + * However this code marks all the counters as active even when they + * might not be used. + */ + if (t->is_array() && t->fields.array->is_array()) { + for (unsigned i = 0; i < t->length; i++) { + process_atomic_variable(t->fields.array, prog, uniform_loc, + var, buffers, num_buffers, offset, + shader_stage); + } + } else { + active_atomic_buffer *buf = &buffers[var->data.binding]; + gl_uniform_storage *const storage = + &prog->UniformStorage[*uniform_loc]; + + /* If this is the first time the buffer is used, increment + * the counter of buffers used. + */ + if (buf->size == 0) + (*num_buffers)++; + + buf->push_back(*uniform_loc, var); + + buf->stage_references[shader_stage]++; + buf->size = MAX2(buf->size, *offset + t->atomic_size()); + + storage->offset = *offset; + *offset += t->atomic_size(); + + (*uniform_loc)++; + } + } + + active_atomic_buffer * + find_active_atomic_counters(struct gl_context *ctx, + struct gl_shader_program *prog, + unsigned *num_buffers) + { + active_atomic_buffer *const buffers = + new active_atomic_buffer[ctx->Const.MaxAtomicBufferBindings]; + + *num_buffers = 0; + + for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + if (sh == NULL) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + + if (var && var->type->contains_atomic()) { + int offset = var->data.offset; + unsigned uniform_loc = var->data.location; + process_atomic_variable(var->type, prog, &uniform_loc, + var, buffers, num_buffers, &offset, i); + } + } + } + + for (unsigned i = 0; i < ctx->Const.MaxAtomicBufferBindings; i++) { + if (buffers[i].size == 0) + continue; + + qsort(buffers[i].counters, buffers[i].num_counters, + sizeof(active_atomic_counter), + cmp_actives); + + for (unsigned j = 1; j < buffers[i].num_counters; j++) { + /* If an overlapping counter found, it must be a reference to the + * same counter from a different shader stage. + */ + if (check_atomic_counters_overlap(buffers[i].counters[j-1].var, + buffers[i].counters[j].var) + && strcmp(buffers[i].counters[j-1].var->name, + buffers[i].counters[j].var->name) != 0) { + linker_error(prog, "Atomic counter %s declared at offset %d " + "which is already in use.", + buffers[i].counters[j].var->name, + buffers[i].counters[j].var->data.offset); + } + } + } + return buffers; + } +} + +void +link_assign_atomic_counter_resources(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + unsigned num_buffers; + unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {}; + active_atomic_buffer *abs = + find_active_atomic_counters(ctx, prog, &num_buffers); + + prog->AtomicBuffers = rzalloc_array(prog, gl_active_atomic_buffer, + num_buffers); + prog->NumAtomicBuffers = num_buffers; + + unsigned i = 0; + for (unsigned binding = 0; + binding < ctx->Const.MaxAtomicBufferBindings; + binding++) { + + /* If the binding was not used, skip. + */ + if (abs[binding].size == 0) + continue; + + active_atomic_buffer &ab = abs[binding]; + gl_active_atomic_buffer &mab = prog->AtomicBuffers[i]; + + /* Assign buffer-specific fields. */ + mab.Binding = binding; + mab.MinimumSize = ab.size; + mab.Uniforms = rzalloc_array(prog->AtomicBuffers, GLuint, + ab.num_counters); + mab.NumUniforms = ab.num_counters; + + /* Assign counter-specific fields. */ + for (unsigned j = 0; j < ab.num_counters; j++) { + ir_variable *const var = ab.counters[j].var; + gl_uniform_storage *const storage = + &prog->UniformStorage[ab.counters[j].uniform_loc]; + + mab.Uniforms[j] = ab.counters[j].uniform_loc; + if (!var->data.explicit_binding) + var->data.binding = i; + + storage->atomic_buffer_index = i; + storage->offset = var->data.offset; + storage->array_stride = (var->type->is_array() ? + var->type->without_array()->atomic_size() : 0); + if (!var->type->is_matrix()) + storage->matrix_stride = 0; + } + + /* Assign stage-specific fields. */ + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (ab.stage_references[j]) { + mab.StageReferences[j] = GL_TRUE; + num_atomic_buffers[j]++; + } else { + mab.StageReferences[j] = GL_FALSE; + } + } + + i++; + } + + /* Store a list pointers to atomic buffers per stage and store the index + * to the intra-stage buffer list in uniform storage. + */ + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) { + prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j]; + prog->_LinkedShaders[j]->AtomicBuffers = + rzalloc_array(prog, gl_active_atomic_buffer *, + num_atomic_buffers[j]); + + unsigned intra_stage_idx = 0; + for (unsigned i = 0; i < num_buffers; i++) { + struct gl_active_atomic_buffer *atomic_buffer = + &prog->AtomicBuffers[i]; + if (atomic_buffer->StageReferences[j]) { + prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] = + atomic_buffer; + + for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) { + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index = + intra_stage_idx; + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active = + true; + } + + intra_stage_idx++; + } + } + } + } + + delete [] abs; + assert(i == num_buffers); +} + +void +link_check_atomic_counter_resources(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + unsigned num_buffers; + active_atomic_buffer *const abs = + find_active_atomic_counters(ctx, prog, &num_buffers); + unsigned atomic_counters[MESA_SHADER_STAGES] = {}; + unsigned atomic_buffers[MESA_SHADER_STAGES] = {}; + unsigned total_atomic_counters = 0; + unsigned total_atomic_buffers = 0; + + /* Sum the required resources. Note that this counts buffers and + * counters referenced by several shader stages multiple times + * against the combined limit -- That's the behavior the spec + * requires. + */ + for (unsigned i = 0; i < ctx->Const.MaxAtomicBufferBindings; i++) { + if (abs[i].size == 0) + continue; + + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { + const unsigned n = abs[i].stage_references[j]; + + if (n) { + atomic_counters[j] += n; + total_atomic_counters += n; + atomic_buffers[j]++; + total_atomic_buffers++; + } + } + } + + /* Check that they are within the supported limits. */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (atomic_counters[i] > ctx->Const.Program[i].MaxAtomicCounters) + linker_error(prog, "Too many %s shader atomic counters", + _mesa_shader_stage_to_string(i)); + + if (atomic_buffers[i] > ctx->Const.Program[i].MaxAtomicBuffers) + linker_error(prog, "Too many %s shader atomic counter buffers", + _mesa_shader_stage_to_string(i)); + } + + if (total_atomic_counters > ctx->Const.MaxCombinedAtomicCounters) + linker_error(prog, "Too many combined atomic counters"); + + if (total_atomic_buffers > ctx->Const.MaxCombinedAtomicBuffers) + linker_error(prog, "Too many combined atomic buffers"); + + delete [] abs; +} diff --git a/src/compiler/glsl/link_functions.cpp b/src/compiler/glsl/link_functions.cpp new file mode 100644 index 00000000000..537f4dc77ac --- /dev/null +++ b/src/compiler/glsl/link_functions.cpp @@ -0,0 +1,348 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "glsl_symbol_table.h" +#include "glsl_parser_extras.h" +#include "ir.h" +#include "program.h" +#include "program/hash_table.h" +#include "linker.h" + +static ir_function_signature * +find_matching_signature(const char *name, const exec_list *actual_parameters, + gl_shader **shader_list, unsigned num_shaders, + bool use_builtin); + +namespace { + +class call_link_visitor : public ir_hierarchical_visitor { +public: + call_link_visitor(gl_shader_program *prog, gl_shader *linked, + gl_shader **shader_list, unsigned num_shaders) + { + this->prog = prog; + this->shader_list = shader_list; + this->num_shaders = num_shaders; + this->success = true; + this->linked = linked; + + this->locals = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~call_link_visitor() + { + hash_table_dtor(this->locals); + } + + virtual ir_visitor_status visit(ir_variable *ir) + { + hash_table_insert(locals, ir, ir); + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_call *ir) + { + /* If ir is an ir_call from a function that was imported from another + * shader callee will point to an ir_function_signature in the original + * shader. In this case the function signature MUST NOT BE MODIFIED. + * Doing so will modify the original shader. This may prevent that + * shader from being linkable in other programs. + */ + const ir_function_signature *const callee = ir->callee; + assert(callee != NULL); + const char *const name = callee->function_name(); + + /* Determine if the requested function signature already exists in the + * final linked shader. If it does, use it as the target of the call. + */ + ir_function_signature *sig = + find_matching_signature(name, &callee->parameters, &linked, 1, + ir->use_builtin); + if (sig != NULL) { + ir->callee = sig; + return visit_continue; + } + + /* Try to find the signature in one of the other shaders that is being + * linked. If it's not found there, return an error. + */ + sig = find_matching_signature(name, &ir->actual_parameters, shader_list, + num_shaders, ir->use_builtin); + if (sig == NULL) { + /* FINISHME: Log the full signature of unresolved function. + */ + linker_error(this->prog, "unresolved reference to function `%s'\n", + name); + this->success = false; + return visit_stop; + } + + /* Find the prototype information in the linked shader. Generate any + * details that may be missing. + */ + ir_function *f = linked->symbols->get_function(name); + if (f == NULL) { + f = new(linked) ir_function(name); + + /* Add the new function to the linked IR. Put it at the end + * so that it comes after any global variable declarations + * that it refers to. + */ + linked->symbols->add_function(f); + linked->ir->push_tail(f); + } + + ir_function_signature *linked_sig = + f->exact_matching_signature(NULL, &callee->parameters); + if ((linked_sig == NULL) + || ((linked_sig != NULL) + && (linked_sig->is_builtin() != ir->use_builtin))) { + linked_sig = new(linked) ir_function_signature(callee->return_type); + f->add_signature(linked_sig); + } + + /* At this point linked_sig and called may be the same. If ir is an + * ir_call from linked then linked_sig and callee will be + * ir_function_signatures that have no definitions (is_defined is false). + */ + assert(!linked_sig->is_defined); + assert(linked_sig->body.is_empty()); + + /* Create an in-place clone of the function definition. This multistep + * process introduces some complexity here, but it has some advantages. + * The parameter list and the and function body are cloned separately. + * The clone of the parameter list is used to prime the hashtable used + * to replace variable references in the cloned body. + * + * The big advantage is that the ir_function_signature does not change. + * This means that we don't have to process the rest of the IR tree to + * patch ir_call nodes. In addition, there is no way to remove or + * replace signature stored in a function. One could easily be added, + * but this avoids the need. + */ + struct hash_table *ht = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + exec_list formal_parameters; + foreach_in_list(const ir_instruction, original, &sig->parameters) { + assert(const_cast(original)->as_variable()); + + ir_instruction *copy = original->clone(linked, ht); + formal_parameters.push_tail(copy); + } + + linked_sig->replace_parameters(&formal_parameters); + + linked_sig->is_intrinsic = sig->is_intrinsic; + + if (sig->is_defined) { + foreach_in_list(const ir_instruction, original, &sig->body) { + ir_instruction *copy = original->clone(linked, ht); + linked_sig->body.push_tail(copy); + } + + linked_sig->is_defined = true; + } + + hash_table_dtor(ht); + + /* Patch references inside the function to things outside the function + * (i.e., function calls and global variables). + */ + linked_sig->accept(this); + + ir->callee = linked_sig; + + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_call *ir) + { + /* Traverse list of function parameters, and for array parameters + * propagate max_array_access. Otherwise arrays that are only referenced + * from inside functions via function parameters will be incorrectly + * optimized. This will lead to incorrect code being generated (or worse). + * Do it when leaving the node so the children would propagate their + * array accesses first. + */ + + const exec_node *formal_param_node = ir->callee->parameters.get_head(); + if (formal_param_node) { + const exec_node *actual_param_node = ir->actual_parameters.get_head(); + while (!actual_param_node->is_tail_sentinel()) { + ir_variable *formal_param = (ir_variable *) formal_param_node; + ir_rvalue *actual_param = (ir_rvalue *) actual_param_node; + + formal_param_node = formal_param_node->get_next(); + actual_param_node = actual_param_node->get_next(); + + if (formal_param->type->is_array()) { + ir_dereference_variable *deref = actual_param->as_dereference_variable(); + if (deref && deref->var && deref->var->type->is_array()) { + deref->var->data.max_array_access = + MAX2(formal_param->data.max_array_access, + deref->var->data.max_array_access); + } + } + } + } + return visit_continue; + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (hash_table_find(locals, ir->var) == NULL) { + /* The non-function variable must be a global, so try to find the + * variable in the shader's symbol table. If the variable is not + * found, then it's a global that *MUST* be defined in the original + * shader. + */ + ir_variable *var = linked->symbols->get_variable(ir->var->name); + if (var == NULL) { + /* Clone the ir_variable that the dereference already has and add + * it to the linked shader. + */ + var = ir->var->clone(linked, NULL); + linked->symbols->add_variable(var); + linked->ir->push_head(var); + } else { + if (var->type->is_array()) { + /* It is possible to have a global array declared in multiple + * shaders without a size. The array is implicitly sized by + * the maximal access to it in *any* shader. Because of this, + * we need to track the maximal access to the array as linking + * pulls more functions in that access the array. + */ + var->data.max_array_access = + MAX2(var->data.max_array_access, + ir->var->data.max_array_access); + + if (var->type->length == 0 && ir->var->type->length != 0) + var->type = ir->var->type; + } + if (var->is_interface_instance()) { + /* Similarly, we need implicit sizes of arrays within interface + * blocks to be sized by the maximal access in *any* shader. + */ + unsigned *const linked_max_ifc_array_access = + var->get_max_ifc_array_access(); + unsigned *const ir_max_ifc_array_access = + ir->var->get_max_ifc_array_access(); + + assert(linked_max_ifc_array_access != NULL); + assert(ir_max_ifc_array_access != NULL); + + for (unsigned i = 0; i < var->get_interface_type()->length; + i++) { + linked_max_ifc_array_access[i] = + MAX2(linked_max_ifc_array_access[i], + ir_max_ifc_array_access[i]); + } + } + } + + ir->var = var; + } + + return visit_continue; + } + + /** Was function linking successful? */ + bool success; + +private: + /** + * Shader program being linked + * + * This is only used for logging error messages. + */ + gl_shader_program *prog; + + /** List of shaders available for linking. */ + gl_shader **shader_list; + + /** Number of shaders available for linking. */ + unsigned num_shaders; + + /** + * Final linked shader + * + * This is used two ways. It is used to find global variables in the + * linked shader that are accessed by the function. It is also used to add + * global variables from the shader where the function originated. + */ + gl_shader *linked; + + /** + * Table of variables local to the function. + */ + hash_table *locals; +}; + +} /* anonymous namespace */ + +/** + * Searches a list of shaders for a particular function definition + */ +ir_function_signature * +find_matching_signature(const char *name, const exec_list *actual_parameters, + gl_shader **shader_list, unsigned num_shaders, + bool use_builtin) +{ + for (unsigned i = 0; i < num_shaders; i++) { + ir_function *const f = shader_list[i]->symbols->get_function(name); + + if (f == NULL) + continue; + + ir_function_signature *sig = + f->matching_signature(NULL, actual_parameters, use_builtin); + + if ((sig == NULL) || + (!sig->is_defined && !sig->is_intrinsic)) + continue; + + /* If this function expects to bind to a built-in function and the + * signature that we found isn't a built-in, keep looking. Also keep + * looking if we expect a non-built-in but found a built-in. + */ + if (use_builtin != sig->is_builtin()) + continue; + + return sig; + } + + return NULL; +} + + +bool +link_function_calls(gl_shader_program *prog, gl_shader *main, + gl_shader **shader_list, unsigned num_shaders) +{ + call_link_visitor v(prog, main, shader_list, num_shaders); + + v.run(main->ir); + return v.success; +} diff --git a/src/compiler/glsl/link_interface_blocks.cpp b/src/compiler/glsl/link_interface_blocks.cpp new file mode 100644 index 00000000000..64c30fea9a3 --- /dev/null +++ b/src/compiler/glsl/link_interface_blocks.cpp @@ -0,0 +1,357 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file link_interface_blocks.cpp + * Linker support for GLSL's interface blocks. + */ + +#include "ir.h" +#include "glsl_symbol_table.h" +#include "linker.h" +#include "main/macros.h" +#include "util/hash_table.h" + + +namespace { + +/** + * Check if two interfaces match, according to intrastage interface matching + * rules. If they do, and the first interface uses an unsized array, it will + * be updated to reflect the array size declared in the second interface. + */ +bool +intrastage_match(ir_variable *a, + ir_variable *b, + struct gl_shader_program *prog) +{ + /* Types must match. */ + if (a->get_interface_type() != b->get_interface_type()) { + /* Exception: if both the interface blocks are implicitly declared, + * don't force their types to match. They might mismatch due to the two + * shaders using different GLSL versions, and that's ok. + */ + if (a->data.how_declared != ir_var_declared_implicitly || + b->data.how_declared != ir_var_declared_implicitly) + return false; + } + + /* Presence/absence of interface names must match. */ + if (a->is_interface_instance() != b->is_interface_instance()) + return false; + + /* For uniforms, instance names need not match. For shader ins/outs, + * it's not clear from the spec whether they need to match, but + * Mesa's implementation relies on them matching. + */ + if (a->is_interface_instance() && b->data.mode != ir_var_uniform && + b->data.mode != ir_var_shader_storage && + strcmp(a->name, b->name) != 0) { + return false; + } + + /* If a block is an array then it must match across the shader. + * Unsized arrays are also processed and matched agaist sized arrays. + */ + if (b->type != a->type && + (b->is_interface_instance() || a->is_interface_instance()) && + !validate_intrastage_arrays(prog, b, a)) + return false; + + return true; +} + + +/** + * Check if two interfaces match, according to interstage (in/out) interface + * matching rules. + * + * If \c extra_array_level is true, the consumer interface is required to be + * an array and the producer interface is required to be a non-array. + * This is used for tessellation control and geometry shader consumers. + */ +bool +interstage_match(ir_variable *producer, + ir_variable *consumer, + bool extra_array_level) +{ + /* Unsized arrays should not occur during interstage linking. They + * should have all been assigned a size by link_intrastage_shaders. + */ + assert(!consumer->type->is_unsized_array()); + assert(!producer->type->is_unsized_array()); + + /* Types must match. */ + if (consumer->get_interface_type() != producer->get_interface_type()) { + /* Exception: if both the interface blocks are implicitly declared, + * don't force their types to match. They might mismatch due to the two + * shaders using different GLSL versions, and that's ok. + */ + if (consumer->data.how_declared != ir_var_declared_implicitly || + producer->data.how_declared != ir_var_declared_implicitly) + return false; + } + + /* Ignore outermost array if geom shader */ + const glsl_type *consumer_instance_type; + if (extra_array_level) { + consumer_instance_type = consumer->type->fields.array; + } else { + consumer_instance_type = consumer->type; + } + + /* If a block is an array then it must match across shaders. + * Since unsized arrays have been ruled out, we can check this by just + * making sure the types are equal. + */ + if ((consumer->is_interface_instance() && + consumer_instance_type->is_array()) || + (producer->is_interface_instance() && + producer->type->is_array())) { + if (consumer_instance_type != producer->type) + return false; + } + + return true; +} + + +/** + * This class keeps track of a mapping from an interface block name to the + * necessary information about that interface block to determine whether to + * generate a link error. + * + * Note: this class is expected to be short lived, so it doesn't make copies + * of the strings it references; it simply borrows the pointers from the + * ir_variable class. + */ +class interface_block_definitions +{ +public: + interface_block_definitions() + : mem_ctx(ralloc_context(NULL)), + ht(_mesa_hash_table_create(NULL, _mesa_key_hash_string, + _mesa_key_string_equal)) + { + } + + ~interface_block_definitions() + { + ralloc_free(mem_ctx); + _mesa_hash_table_destroy(ht, NULL); + } + + /** + * Lookup the interface definition. Return NULL if none is found. + */ + ir_variable *lookup(ir_variable *var) + { + if (var->data.explicit_location && + var->data.location >= VARYING_SLOT_VAR0) { + char location_str[11]; + snprintf(location_str, 11, "%d", var->data.location); + + const struct hash_entry *entry = + _mesa_hash_table_search(ht, location_str); + return entry ? (ir_variable *) entry->data : NULL; + } else { + const struct hash_entry *entry = + _mesa_hash_table_search(ht, var->get_interface_type()->name); + return entry ? (ir_variable *) entry->data : NULL; + } + } + + /** + * Add a new interface definition. + */ + void store(ir_variable *var) + { + if (var->data.explicit_location && + var->data.location >= VARYING_SLOT_VAR0) { + /* If explicit location is given then lookup the variable by location. + * We turn the location into a string and use this as the hash key + * rather than the name. Note: We allocate enough space for a 32-bit + * unsigned location value which is overkill but future proof. + */ + char location_str[11]; + snprintf(location_str, 11, "%d", var->data.location); + _mesa_hash_table_insert(ht, ralloc_strdup(mem_ctx, location_str), var); + } else { + _mesa_hash_table_insert(ht, var->get_interface_type()->name, var); + } + } + +private: + /** + * Ralloc context for data structures allocated by this class. + */ + void *mem_ctx; + + /** + * Hash table mapping interface block name to an \c + * ir_variable. + */ + hash_table *ht; +}; + + +}; /* anonymous namespace */ + + +void +validate_intrastage_interface_blocks(struct gl_shader_program *prog, + const gl_shader **shader_list, + unsigned num_shaders) +{ + interface_block_definitions in_interfaces; + interface_block_definitions out_interfaces; + interface_block_definitions uniform_interfaces; + interface_block_definitions buffer_interfaces; + + for (unsigned int i = 0; i < num_shaders; i++) { + if (shader_list[i] == NULL) + continue; + + foreach_in_list(ir_instruction, node, shader_list[i]->ir) { + ir_variable *var = node->as_variable(); + if (!var) + continue; + + const glsl_type *iface_type = var->get_interface_type(); + + if (iface_type == NULL) + continue; + + interface_block_definitions *definitions; + switch (var->data.mode) { + case ir_var_shader_in: + definitions = &in_interfaces; + break; + case ir_var_shader_out: + definitions = &out_interfaces; + break; + case ir_var_uniform: + definitions = &uniform_interfaces; + break; + case ir_var_shader_storage: + definitions = &buffer_interfaces; + break; + default: + /* Only in, out, and uniform interfaces are legal, so we should + * never get here. + */ + assert(!"illegal interface type"); + continue; + } + + ir_variable *prev_def = definitions->lookup(var); + if (prev_def == NULL) { + /* This is the first time we've seen the interface, so save + * it into the appropriate data structure. + */ + definitions->store(var); + } else if (!intrastage_match(prev_def, var, prog)) { + linker_error(prog, "definitions of interface block `%s' do not" + " match\n", iface_type->name); + return; + } + } + } +} + +void +validate_interstage_inout_blocks(struct gl_shader_program *prog, + const gl_shader *producer, + const gl_shader *consumer) +{ + interface_block_definitions definitions; + /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ + const bool extra_array_level = (producer->Stage == MESA_SHADER_VERTEX && + consumer->Stage != MESA_SHADER_FRAGMENT) || + consumer->Stage == MESA_SHADER_GEOMETRY; + + /* Add input interfaces from the consumer to the symbol table. */ + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || var->data.mode != ir_var_shader_in) + continue; + + definitions.store(var); + } + + /* Verify that the producer's output interfaces match. */ + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || var->data.mode != ir_var_shader_out) + continue; + + ir_variable *consumer_def = definitions.lookup(var); + + /* The consumer doesn't use this output block. Ignore it. */ + if (consumer_def == NULL) + continue; + + if (!interstage_match(var, consumer_def, extra_array_level)) { + linker_error(prog, "definitions of interface block `%s' do not " + "match\n", var->get_interface_type()->name); + return; + } + } +} + + +void +validate_interstage_uniform_blocks(struct gl_shader_program *prog, + gl_shader **stages, int num_stages) +{ + interface_block_definitions definitions; + + for (int i = 0; i < num_stages; i++) { + if (stages[i] == NULL) + continue; + + const gl_shader *stage = stages[i]; + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || + (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) + continue; + + ir_variable *old_def = definitions.lookup(var); + if (old_def == NULL) { + definitions.store(var); + } else { + /* Interstage uniform matching rules are the same as intrastage + * uniform matchin rules (for uniforms, it is as though all + * shaders are in the same shader stage). + */ + if (!intrastage_match(old_def, var, prog)) { + linker_error(prog, "definitions of interface block `%s' do not " + "match\n", var->get_interface_type()->name); + return; + } + } + } + } +} diff --git a/src/compiler/glsl/link_uniform_block_active_visitor.cpp b/src/compiler/glsl/link_uniform_block_active_visitor.cpp new file mode 100644 index 00000000000..54fea700b53 --- /dev/null +++ b/src/compiler/glsl/link_uniform_block_active_visitor.cpp @@ -0,0 +1,296 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "link_uniform_block_active_visitor.h" +#include "program.h" + +static link_uniform_block_active * +process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var) +{ + const hash_entry *const existing_block = + _mesa_hash_table_search(ht, var->get_interface_type()->name); + + const glsl_type *const block_type = var->is_interface_instance() + ? var->type : var->get_interface_type(); + + + /* If a block with this block-name has not previously been seen, add it. + * If a block with this block-name has been seen, it must be identical to + * the block currently being examined. + */ + if (existing_block == NULL) { + link_uniform_block_active *const b = + rzalloc(mem_ctx, struct link_uniform_block_active); + + b->type = block_type; + b->has_instance_name = var->is_interface_instance(); + b->is_shader_storage = var->data.mode == ir_var_shader_storage; + + if (var->data.explicit_binding) { + b->has_binding = true; + b->binding = var->data.binding; + } else { + b->has_binding = false; + b->binding = 0; + } + + _mesa_hash_table_insert(ht, var->get_interface_type()->name, (void *) b); + return b; + } else { + link_uniform_block_active *const b = + (link_uniform_block_active *) existing_block->data; + + if (b->type != block_type + || b->has_instance_name != var->is_interface_instance()) + return NULL; + else + return b; + } + + assert(!"Should not get here."); + return NULL; +} + +/* For arrays of arrays this function will give us a middle ground between + * detecting inactive uniform blocks and structuring them in a way that makes + * it easy to calculate the offset for indirect indexing. + * + * For example given the shader: + * + * uniform ArraysOfArraysBlock + * { + * vec4 a; + * } i[3][4][5]; + * + * void main() + * { + * vec4 b = i[0][1][1].a; + * gl_Position = i[2][2][3].a + b; + * } + * + * There are only 2 active blocks above but for the sake of indirect indexing + * and not over complicating the code we will end up with a count of 8. + * Here each dimension has 2 different indices counted so we end up with 2*2*2 + */ +static struct uniform_block_array_elements ** +process_arrays(void *mem_ctx, ir_dereference_array *ir, + struct link_uniform_block_active *block) +{ + if (ir) { + struct uniform_block_array_elements **ub_array_ptr = + process_arrays(mem_ctx, ir->array->as_dereference_array(), block); + if (*ub_array_ptr == NULL) { + *ub_array_ptr = rzalloc(mem_ctx, struct uniform_block_array_elements); + (*ub_array_ptr)->ir = ir; + } + + struct uniform_block_array_elements *ub_array = *ub_array_ptr; + ir_constant *c = ir->array_index->as_constant(); + if (c) { + /* Index is a constant, so mark just that element used, + * if not already. + */ + const unsigned idx = c->get_uint_component(0); + + unsigned i; + for (i = 0; i < ub_array->num_array_elements; i++) { + if (ub_array->array_elements[i] == idx) + break; + } + + assert(i <= ub_array->num_array_elements); + + if (i == ub_array->num_array_elements) { + ub_array->array_elements = reralloc(mem_ctx, + ub_array->array_elements, + unsigned, + ub_array->num_array_elements + 1); + + ub_array->array_elements[ub_array->num_array_elements] = idx; + + ub_array->num_array_elements++; + } + } else { + /* The array index is not a constant, + * so mark the entire array used. + */ + assert(ir->array->type->is_array()); + if (ub_array->num_array_elements < ir->array->type->length) { + ub_array->num_array_elements = ir->array->type->length; + ub_array->array_elements = reralloc(mem_ctx, + ub_array->array_elements, + unsigned, + ub_array->num_array_elements); + + for (unsigned i = 0; i < ub_array->num_array_elements; i++) { + ub_array->array_elements[i] = i; + } + } + } + return &ub_array->array; + } else { + return &block->array; + } +} + +ir_visitor_status +link_uniform_block_active_visitor::visit(ir_variable *var) +{ + if (!var->is_in_buffer_block()) + return visit_continue; + + /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec says: + * + * "All members of a named uniform block declared with a shared or + * std140 layout qualifier are considered active, even if they are not + * referenced in any shader in the program. The uniform block itself is + * also considered active, even if no member of the block is + * referenced." + */ + if (var->get_interface_type()->interface_packing == + GLSL_INTERFACE_PACKING_PACKED) + return visit_continue; + + /* Process the block. Bail if there was an error. + */ + link_uniform_block_active *const b = + process_block(this->mem_ctx, this->ht, var); + if (b == NULL) { + linker_error(this->prog, + "uniform block `%s' has mismatching definitions", + var->get_interface_type()->name); + this->success = false; + return visit_stop; + } + + assert(b->array == NULL); + assert(b->type != NULL); + assert(!b->type->is_array() || b->has_instance_name); + + /* For uniform block arrays declared with a shared or std140 layout + * qualifier, mark all its instances as used. + */ + const glsl_type *type = b->type; + struct uniform_block_array_elements **ub_array = &b->array; + while (type->is_array()) { + assert(b->type->length > 0); + + *ub_array = rzalloc(this->mem_ctx, struct uniform_block_array_elements); + (*ub_array)->num_array_elements = type->length; + (*ub_array)->array_elements = reralloc(this->mem_ctx, + (*ub_array)->array_elements, + unsigned, + (*ub_array)->num_array_elements); + + for (unsigned i = 0; i < (*ub_array)->num_array_elements; i++) { + (*ub_array)->array_elements[i] = i; + } + ub_array = &(*ub_array)->array; + type = type->fields.array; + } + + return visit_continue; +} + +ir_visitor_status +link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir) +{ + /* cycle through arrays of arrays */ + ir_dereference_array *base_ir = ir; + while (base_ir->array->ir_type == ir_type_dereference_array) + base_ir = base_ir->array->as_dereference_array(); + + ir_dereference_variable *const d = + base_ir->array->as_dereference_variable(); + ir_variable *const var = (d == NULL) ? NULL : d->var; + + /* If the r-value being dereferenced is not a variable (e.g., a field of a + * structure) or is not a uniform block instance, continue. + * + * WARNING: It is not enough for the variable to be part of uniform block. + * It must represent the entire block. Arrays (or matrices) inside blocks + * that lack an instance name are handled by the ir_dereference_variable + * function. + */ + if (var == NULL + || !var->is_in_buffer_block() + || !var->is_interface_instance()) + return visit_continue; + + /* Process the block. Bail if there was an error. + */ + link_uniform_block_active *const b = + process_block(this->mem_ctx, this->ht, var); + if (b == NULL) { + linker_error(prog, + "uniform block `%s' has mismatching definitions", + var->get_interface_type()->name); + this->success = false; + return visit_stop; + } + + /* Block arrays must be declared with an instance name. + */ + assert(b->has_instance_name); + assert(b->type != NULL); + + /* If the block array was declared with a shared or + * std140 layout qualifier, all its instances have been already marked + * as used in link_uniform_block_active_visitor::visit(ir_variable *). + */ + if (var->get_interface_type()->interface_packing == + GLSL_INTERFACE_PACKING_PACKED) { + b->var = var; + process_arrays(this->mem_ctx, ir, b); + } + + return visit_continue_with_parent; +} + +ir_visitor_status +link_uniform_block_active_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *var = ir->var; + + if (!var->is_in_buffer_block()) + return visit_continue; + + assert(!var->is_interface_instance() || !var->type->is_array()); + + /* Process the block. Bail if there was an error. + */ + link_uniform_block_active *const b = + process_block(this->mem_ctx, this->ht, var); + if (b == NULL) { + linker_error(this->prog, + "uniform block `%s' has mismatching definitions", + var->get_interface_type()->name); + this->success = false; + return visit_stop; + } + + assert(b->array == NULL); + assert(b->type != NULL); + + return visit_continue; +} diff --git a/src/compiler/glsl/link_uniform_block_active_visitor.h b/src/compiler/glsl/link_uniform_block_active_visitor.h new file mode 100644 index 00000000000..afb52c14a37 --- /dev/null +++ b/src/compiler/glsl/link_uniform_block_active_visitor.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H +#define LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H + +#include "ir.h" +#include "util/hash_table.h" + +struct uniform_block_array_elements { + unsigned *array_elements; + unsigned num_array_elements; + + ir_dereference_array *ir; + + struct uniform_block_array_elements *array; +}; + +struct link_uniform_block_active { + const glsl_type *type; + ir_variable *var; + + struct uniform_block_array_elements *array; + + unsigned binding; + + bool has_instance_name; + bool has_binding; + bool is_shader_storage; +}; + +class link_uniform_block_active_visitor : public ir_hierarchical_visitor { +public: + link_uniform_block_active_visitor(void *mem_ctx, struct hash_table *ht, + struct gl_shader_program *prog) + : success(true), prog(prog), ht(ht), mem_ctx(mem_ctx) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit(ir_dereference_variable *); + virtual ir_visitor_status visit(ir_variable *); + + bool success; + +private: + struct gl_shader_program *prog; + struct hash_table *ht; + void *mem_ctx; +}; + +#endif /* LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H */ diff --git a/src/compiler/glsl/link_uniform_blocks.cpp b/src/compiler/glsl/link_uniform_blocks.cpp new file mode 100644 index 00000000000..7d755765852 --- /dev/null +++ b/src/compiler/glsl/link_uniform_blocks.cpp @@ -0,0 +1,472 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "ir.h" +#include "linker.h" +#include "ir_uniform.h" +#include "link_uniform_block_active_visitor.h" +#include "util/hash_table.h" +#include "program.h" + +namespace { + +class ubo_visitor : public program_resource_visitor { +public: + ubo_visitor(void *mem_ctx, gl_uniform_buffer_variable *variables, + unsigned num_variables) + : index(0), offset(0), buffer_size(0), variables(variables), + num_variables(num_variables), mem_ctx(mem_ctx), is_array_instance(false) + { + /* empty */ + } + + void process(const glsl_type *type, const char *name) + { + this->offset = 0; + this->buffer_size = 0; + this->is_array_instance = strchr(name, ']') != NULL; + this->program_resource_visitor::process(type, name); + } + + unsigned index; + unsigned offset; + unsigned buffer_size; + gl_uniform_buffer_variable *variables; + unsigned num_variables; + void *mem_ctx; + bool is_array_instance; + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + (void) type; + (void) name; + (void) row_major; + assert(!"Should not get here."); + } + + virtual void enter_record(const glsl_type *type, const char *, + bool row_major, const unsigned packing) { + assert(type->is_record()); + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->offset = glsl_align( + this->offset, type->std430_base_alignment(row_major)); + else + this->offset = glsl_align( + this->offset, type->std140_base_alignment(row_major)); + } + + virtual void leave_record(const glsl_type *type, const char *, + bool row_major, const unsigned packing) { + assert(type->is_record()); + + /* If this is the last field of a structure, apply rule #9. The + * GL_ARB_uniform_buffer_object spec says: + * + * "The structure may have padding at the end; the base offset of + * the member following the sub-structure is rounded up to the next + * multiple of the base alignment of the structure." + */ + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->offset = glsl_align( + this->offset, type->std430_base_alignment(row_major)); + else + this->offset = glsl_align( + this->offset, type->std140_base_alignment(row_major)); + } + + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major, const glsl_type *, + const unsigned packing, + bool last_field) + { + assert(this->index < this->num_variables); + + gl_uniform_buffer_variable *v = &this->variables[this->index++]; + + v->Name = ralloc_strdup(mem_ctx, name); + v->Type = type; + v->RowMajor = type->without_array()->is_matrix() && row_major; + + if (this->is_array_instance) { + v->IndexName = ralloc_strdup(mem_ctx, name); + + char *open_bracket = strchr(v->IndexName, '['); + assert(open_bracket != NULL); + + char *close_bracket = strchr(open_bracket, '.') - 1; + assert(close_bracket != NULL); + + /* Length of the tail without the ']' but with the NUL. + */ + unsigned len = strlen(close_bracket + 1) + 1; + + memmove(open_bracket, close_bracket + 1, len); + } else { + v->IndexName = v->Name; + } + + unsigned alignment = 0; + unsigned size = 0; + + /* From ARB_program_interface_query: + * + * "If the final member of an active shader storage block is array + * with no declared size, the minimum buffer size is computed + * assuming the array was declared as an array with one element." + * + * For that reason, we use the base type of the unsized array to calculate + * its size. We don't need to check if the unsized array is the last member + * of a shader storage block (that check was already done by the parser). + */ + const glsl_type *type_for_size = type; + if (type->is_unsized_array()) { + assert(last_field); + type_for_size = type->without_array(); + } + + if (packing == GLSL_INTERFACE_PACKING_STD430) { + alignment = type->std430_base_alignment(v->RowMajor); + size = type_for_size->std430_size(v->RowMajor); + } else { + alignment = type->std140_base_alignment(v->RowMajor); + size = type_for_size->std140_size(v->RowMajor); + } + + this->offset = glsl_align(this->offset, alignment); + v->Offset = this->offset; + + this->offset += size; + + /* From the GL_ARB_uniform_buffer_object spec: + * + * "For uniform blocks laid out according to [std140] rules, the + * minimum buffer object size returned by the + * UNIFORM_BLOCK_DATA_SIZE query is derived by taking the offset of + * the last basic machine unit consumed by the last uniform of the + * uniform block (including any end-of-array or end-of-structure + * padding), adding one, and rounding up to the next multiple of + * the base alignment required for a vec4." + */ + this->buffer_size = glsl_align(this->offset, 16); + } +}; + +class count_block_size : public program_resource_visitor { +public: + count_block_size() : num_active_uniforms(0) + { + /* empty */ + } + + unsigned num_active_uniforms; + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + (void) type; + (void) name; + (void) row_major; + this->num_active_uniforms++; + } +}; + +} /* anonymous namespace */ + +struct block { + const glsl_type *type; + bool has_instance_name; +}; + +static void +process_block_array(struct uniform_block_array_elements *ub_array, char **name, + size_t name_length, gl_uniform_block *blocks, + ubo_visitor *parcel, gl_uniform_buffer_variable *variables, + const struct link_uniform_block_active *const b, + unsigned *block_index, unsigned *binding_offset, + struct gl_context *ctx, struct gl_shader_program *prog) +{ + if (ub_array) { + for (unsigned j = 0; j < ub_array->num_array_elements; j++) { + size_t new_length = name_length; + + /* Append the subscript to the current variable name */ + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", + ub_array->array_elements[j]); + + process_block_array(ub_array->array, name, new_length, blocks, + parcel, variables, b, block_index, + binding_offset, ctx, prog); + } + } else { + unsigned i = *block_index; + const glsl_type *type = b->type->without_array(); + + blocks[i].Name = ralloc_strdup(blocks, *name); + blocks[i].Uniforms = &variables[(*parcel).index]; + + /* The GL_ARB_shading_language_420pack spec says: + * + * "If the binding identifier is used with a uniform block + * instanced as an array then the first element of the array + * takes the specified block binding and each subsequent + * element takes the next consecutive uniform block binding + * point." + */ + blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0; + + blocks[i].UniformBufferSize = 0; + blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing); + + parcel->process(type, blocks[i].Name); + + blocks[i].UniformBufferSize = parcel->buffer_size; + + /* Check SSBO size is lower than maximum supported size for SSBO */ + if (b->is_shader_storage && + parcel->buffer_size > ctx->Const.MaxShaderStorageBlockSize) { + linker_error(prog, "shader storage block `%s' has size %d, " + "which is larger than than the maximum allowed (%d)", + b->type->name, + parcel->buffer_size, + ctx->Const.MaxShaderStorageBlockSize); + } + blocks[i].NumUniforms = + (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms); + blocks[i].IsShaderStorage = b->is_shader_storage; + + *block_index = *block_index + 1; + *binding_offset = *binding_offset + 1; + } +} + +/* This function resizes the array types of the block so that later we can use + * this new size to correctly calculate the offest for indirect indexing. + */ +static const glsl_type * +resize_block_array(const glsl_type *type, + struct uniform_block_array_elements *ub_array) +{ + if (type->is_array()) { + struct uniform_block_array_elements *child_array = + type->fields.array->is_array() ? ub_array->array : NULL; + const glsl_type *new_child_type = + resize_block_array(type->fields.array, child_array); + + const glsl_type *new_type = + glsl_type::get_array_instance(new_child_type, + ub_array->num_array_elements); + ub_array->ir->array->type = new_type; + return new_type; + } else { + return type; + } +} + +unsigned +link_uniform_blocks(void *mem_ctx, + struct gl_context *ctx, + struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders, + struct gl_uniform_block **blocks_ret) +{ + /* This hash table will track all of the uniform blocks that have been + * encountered. Since blocks with the same block-name must be the same, + * the hash is organized by block-name. + */ + struct hash_table *block_hash = + _mesa_hash_table_create(mem_ctx, _mesa_key_hash_string, + _mesa_key_string_equal); + + if (block_hash == NULL) { + _mesa_error_no_memory(__func__); + linker_error(prog, "out of memory\n"); + return 0; + } + + /* Determine which uniform blocks are active. + */ + link_uniform_block_active_visitor v(mem_ctx, block_hash, prog); + for (unsigned i = 0; i < num_shaders; i++) { + visit_list_elements(&v, shader_list[i]->ir); + } + + /* Count the number of active uniform blocks. Count the total number of + * active slots in those uniform blocks. + */ + unsigned num_blocks = 0; + unsigned num_variables = 0; + count_block_size block_size; + struct hash_entry *entry; + + hash_table_foreach (block_hash, entry) { + struct link_uniform_block_active *const b = + (struct link_uniform_block_active *) entry->data; + + assert((b->array != NULL) == b->type->is_array()); + + if (b->array != NULL && + (b->type->without_array()->interface_packing == + GLSL_INTERFACE_PACKING_PACKED)) { + b->type = resize_block_array(b->type, b->array); + b->var->type = b->type; + } + + block_size.num_active_uniforms = 0; + block_size.process(b->type->without_array(), ""); + + if (b->array != NULL) { + unsigned aoa_size = b->type->arrays_of_arrays_size(); + num_blocks += aoa_size; + num_variables += aoa_size * block_size.num_active_uniforms; + } else { + num_blocks++; + num_variables += block_size.num_active_uniforms; + } + + } + + if (num_blocks == 0) { + assert(num_variables == 0); + _mesa_hash_table_destroy(block_hash, NULL); + return 0; + } + + assert(num_variables != 0); + + /* Allocate storage to hold all of the informatation related to uniform + * blocks that can be queried through the API. + */ + gl_uniform_block *blocks = + ralloc_array(mem_ctx, gl_uniform_block, num_blocks); + gl_uniform_buffer_variable *variables = + ralloc_array(blocks, gl_uniform_buffer_variable, num_variables); + + /* Add each variable from each uniform block to the API tracking + * structures. + */ + unsigned i = 0; + ubo_visitor parcel(blocks, variables, num_variables); + + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140) + == unsigned(ubo_packing_std140)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED) + == unsigned(ubo_packing_shared)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED) + == unsigned(ubo_packing_packed)); + STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430) + == unsigned(ubo_packing_std430)); + + hash_table_foreach (block_hash, entry) { + const struct link_uniform_block_active *const b = + (const struct link_uniform_block_active *) entry->data; + const glsl_type *block_type = b->type; + + if (b->array != NULL) { + unsigned binding_offset = 0; + char *name = ralloc_strdup(NULL, block_type->without_array()->name); + size_t name_length = strlen(name); + + assert(b->has_instance_name); + process_block_array(b->array, &name, name_length, blocks, &parcel, + variables, b, &i, &binding_offset, ctx, prog); + ralloc_free(name); + } else { + blocks[i].Name = ralloc_strdup(blocks, block_type->name); + blocks[i].Uniforms = &variables[parcel.index]; + blocks[i].Binding = (b->has_binding) ? b->binding : 0; + blocks[i].UniformBufferSize = 0; + blocks[i]._Packing = + gl_uniform_block_packing(block_type->interface_packing); + + parcel.process(block_type, + b->has_instance_name ? block_type->name : ""); + + blocks[i].UniformBufferSize = parcel.buffer_size; + + /* Check SSBO size is lower than maximum supported size for SSBO */ + if (b->is_shader_storage && + parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) { + linker_error(prog, "shader storage block `%s' has size %d, " + "which is larger than than the maximum allowed (%d)", + block_type->name, + parcel.buffer_size, + ctx->Const.MaxShaderStorageBlockSize); + } + blocks[i].NumUniforms = + (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); + + blocks[i].IsShaderStorage = b->is_shader_storage; + + i++; + } + } + + assert(parcel.index == num_variables); + + _mesa_hash_table_destroy(block_hash, NULL); + + *blocks_ret = blocks; + return num_blocks; +} + +bool +link_uniform_blocks_are_compatible(const gl_uniform_block *a, + const gl_uniform_block *b) +{ + assert(strcmp(a->Name, b->Name) == 0); + + /* Page 35 (page 42 of the PDF) in section 4.3.7 of the GLSL 1.50 spec says: + * + * "Matched block names within an interface (as defined above) must + * match in terms of having the same number of declarations with the + * same sequence of types and the same sequence of member names, as + * well as having the same member-wise layout qualification....if a + * matching block is declared as an array, then the array sizes must + * also match... Any mismatch will generate a link error." + * + * Arrays are not yet supported, so there is no check for that. + */ + if (a->NumUniforms != b->NumUniforms) + return false; + + if (a->_Packing != b->_Packing) + return false; + + for (unsigned i = 0; i < a->NumUniforms; i++) { + if (strcmp(a->Uniforms[i].Name, b->Uniforms[i].Name) != 0) + return false; + + if (a->Uniforms[i].Type != b->Uniforms[i].Type) + return false; + + if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor) + return false; + } + + return true; +} diff --git a/src/compiler/glsl/link_uniform_initializers.cpp b/src/compiler/glsl/link_uniform_initializers.cpp new file mode 100644 index 00000000000..58d21e5125e --- /dev/null +++ b/src/compiler/glsl/link_uniform_initializers.cpp @@ -0,0 +1,355 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "ir.h" +#include "linker.h" +#include "ir_uniform.h" + +/* These functions are put in a "private" namespace instead of being marked + * static so that the unit tests can access them. See + * http://code.google.com/p/googletest/wiki/AdvancedGuide#Testing_Private_Code + */ +namespace linker { + +gl_uniform_storage * +get_storage(gl_uniform_storage *storage, unsigned num_storage, + const char *name) +{ + for (unsigned int i = 0; i < num_storage; i++) { + if (strcmp(name, storage[i].name) == 0) + return &storage[i]; + } + + return NULL; +} + +static unsigned +get_uniform_block_index(const gl_shader_program *shProg, + const char *uniformBlockName) +{ + for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { + if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName)) + return i; + } + + return GL_INVALID_INDEX; +} + +void +copy_constant_to_storage(union gl_constant_value *storage, + const ir_constant *val, + const enum glsl_base_type base_type, + const unsigned int elements, + unsigned int boolean_true) +{ + for (unsigned int i = 0; i < elements; i++) { + switch (base_type) { + case GLSL_TYPE_UINT: + storage[i].u = val->value.u[i]; + break; + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + storage[i].i = val->value.i[i]; + break; + case GLSL_TYPE_FLOAT: + storage[i].f = val->value.f[i]; + break; + case GLSL_TYPE_DOUBLE: + /* XXX need to check on big-endian */ + storage[i * 2].u = *(uint32_t *)&val->value.d[i]; + storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1); + break; + case GLSL_TYPE_BOOL: + storage[i].b = val->value.b[i] ? boolean_true : 0; + break; + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_IMAGE: + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_VOID: + case GLSL_TYPE_SUBROUTINE: + case GLSL_TYPE_ERROR: + /* All other types should have already been filtered by other + * paths in the caller. + */ + assert(!"Should not get here."); + break; + } + } +} + +/** + * Initialize an opaque uniform from the value of an explicit binding + * qualifier specified in the shader. Atomic counters are different because + * they have no storage and should be handled elsewhere. + */ +void +set_opaque_binding(void *mem_ctx, gl_shader_program *prog, + const glsl_type *type, const char *name, int *binding) +{ + + if (type->is_array() && type->fields.array->is_array()) { + const glsl_type *const element_type = type->fields.array; + + for (unsigned int i = 0; i < type->length; i++) { + const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); + + set_opaque_binding(mem_ctx, prog, element_type, + element_name, binding); + } + } else { + struct gl_uniform_storage *const storage = + get_storage(prog->UniformStorage, prog->NumUniformStorage, name); + + if (storage == NULL) { + assert(storage != NULL); + return; + } + + const unsigned elements = MAX2(storage->array_elements, 1); + + /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec + * says: + * + * "If the binding identifier is used with an array, the first element + * of the array takes the specified unit and each subsequent element + * takes the next consecutive unit." + */ + for (unsigned int i = 0; i < elements; i++) { + storage->storage[i].i = (*binding)++; + } + + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + gl_shader *shader = prog->_LinkedShaders[sh]; + + if (shader) { + if (storage->type->base_type == GLSL_TYPE_SAMPLER && + storage->opaque[sh].active) { + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->opaque[sh].index + i; + shader->SamplerUnits[index] = storage->storage[i].i; + } + + } else if (storage->type->base_type == GLSL_TYPE_IMAGE && + storage->opaque[sh].active) { + for (unsigned i = 0; i < elements; i++) { + const unsigned index = storage->opaque[sh].index + i; + shader->ImageUnits[index] = storage->storage[i].i; + } + } + } + } + + storage->initialized = true; + } +} + +void +set_block_binding(gl_shader_program *prog, const char *block_name, int binding) +{ + const unsigned block_index = get_uniform_block_index(prog, block_name); + + if (block_index == GL_INVALID_INDEX) { + assert(block_index != GL_INVALID_INDEX); + return; + } + + /* This is a field of a UBO. val is the binding index. */ + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + int stage_index = prog->InterfaceBlockStageIndex[i][block_index]; + + if (stage_index != -1) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + sh->BufferInterfaceBlocks[stage_index].Binding = binding; + } + } +} + +void +set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, + const char *name, const glsl_type *type, + ir_constant *val, unsigned int boolean_true) +{ + const glsl_type *t_without_array = type->without_array(); + if (type->is_record()) { + ir_constant *field_constant; + + field_constant = (ir_constant *)val->components.get_head(); + + for (unsigned int i = 0; i < type->length; i++) { + const glsl_type *field_type = type->fields.structure[i].type; + const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, + type->fields.structure[i].name); + set_uniform_initializer(mem_ctx, prog, field_name, + field_type, field_constant, boolean_true); + field_constant = (ir_constant *)field_constant->next; + } + return; + } else if (t_without_array->is_record() || + (type->is_array() && type->fields.array->is_array())) { + const glsl_type *const element_type = type->fields.array; + + for (unsigned int i = 0; i < type->length; i++) { + const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); + + set_uniform_initializer(mem_ctx, prog, element_name, + element_type, val->array_elements[i], + boolean_true); + } + return; + } + + struct gl_uniform_storage *const storage = + get_storage(prog->UniformStorage, + prog->NumUniformStorage, + name); + if (storage == NULL) { + assert(storage != NULL); + return; + } + + if (val->type->is_array()) { + const enum glsl_base_type base_type = + val->array_elements[0]->type->base_type; + const unsigned int elements = val->array_elements[0]->type->components(); + unsigned int idx = 0; + unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1; + + assert(val->type->length >= storage->array_elements); + for (unsigned int i = 0; i < storage->array_elements; i++) { + copy_constant_to_storage(& storage->storage[idx], + val->array_elements[i], + base_type, + elements, + boolean_true); + + idx += elements * dmul; + } + } else { + copy_constant_to_storage(storage->storage, + val, + val->type->base_type, + val->type->components(), + boolean_true); + + if (storage->type->is_sampler()) { + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + gl_shader *shader = prog->_LinkedShaders[sh]; + + if (shader && storage->opaque[sh].active) { + unsigned index = storage->opaque[sh].index; + + shader->SamplerUnits[index] = storage->storage[0].i; + } + } + } + } + + storage->initialized = true; +} +} + +void +link_set_uniform_initializers(struct gl_shader_program *prog, + unsigned int boolean_true) +{ + void *mem_ctx = NULL; + + for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *shader = prog->_LinkedShaders[i]; + + if (shader == NULL) + continue; + + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *const var = node->as_variable(); + + if (!var || (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) + continue; + + if (!mem_ctx) + mem_ctx = ralloc_context(NULL); + + if (var->data.explicit_binding) { + const glsl_type *const type = var->type; + + if (type->without_array()->is_sampler() || + type->without_array()->is_image()) { + int binding = var->data.binding; + linker::set_opaque_binding(mem_ctx, prog, var->type, + var->name, &binding); + } else if (var->is_in_buffer_block()) { + const glsl_type *const iface_type = var->get_interface_type(); + + /* If the variable is an array and it is an interface instance, + * we need to set the binding for each array element. Just + * checking that the variable is an array is not sufficient. + * The variable could be an array element of a uniform block + * that lacks an instance name. For example: + * + * uniform U { + * float f[4]; + * }; + * + * In this case "f" would pass is_in_buffer_block (above) and + * type->is_array(), but it will fail is_interface_instance(). + */ + if (var->is_interface_instance() && var->type->is_array()) { + for (unsigned i = 0; i < var->type->length; i++) { + const char *name = + ralloc_asprintf(mem_ctx, "%s[%u]", iface_type->name, i); + + /* Section 4.4.3 (Uniform Block Layout Qualifiers) of the + * GLSL 4.20 spec says: + * + * "If the binding identifier is used with a uniform + * block instanced as an array then the first element + * of the array takes the specified block binding and + * each subsequent element takes the next consecutive + * uniform block binding point." + */ + linker::set_block_binding(prog, name, + var->data.binding + i); + } + } else { + linker::set_block_binding(prog, iface_type->name, + var->data.binding); + } + } else if (type->contains_atomic()) { + /* we don't actually need to do anything. */ + } else { + assert(!"Explicit binding not on a sampler, UBO or atomic."); + } + } else if (var->constant_initializer) { + linker::set_uniform_initializer(mem_ctx, prog, var->name, + var->type, var->constant_initializer, + boolean_true); + } + } + } + + ralloc_free(mem_ctx); +} diff --git a/src/compiler/glsl/link_uniforms.cpp b/src/compiler/glsl/link_uniforms.cpp new file mode 100644 index 00000000000..33b2d4c8646 --- /dev/null +++ b/src/compiler/glsl/link_uniforms.cpp @@ -0,0 +1,1330 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" +#include "ir.h" +#include "linker.h" +#include "ir_uniform.h" +#include "glsl_symbol_table.h" +#include "program/hash_table.h" +#include "program.h" +#include "util/hash_table.h" + +/** + * \file link_uniforms.cpp + * Assign locations for GLSL uniforms. + * + * \author Ian Romanick + */ + +/** + * Used by linker to indicate uniforms that have no location set. + */ +#define UNMAPPED_UNIFORM_LOC ~0u + +/** + * Count the backing storage requirements for a type + */ +static unsigned +values_for_type(const glsl_type *type) +{ + if (type->is_sampler()) { + return 1; + } else if (type->is_array() && type->fields.array->is_sampler()) { + return type->array_size(); + } else { + return type->component_slots(); + } +} + +void +program_resource_visitor::process(const glsl_type *type, const char *name) +{ + assert(type->without_array()->is_record() + || type->without_array()->is_interface()); + + unsigned record_array_count = 1; + char *name_copy = ralloc_strdup(NULL, name); + unsigned packing = type->interface_packing; + + recursion(type, &name_copy, strlen(name), false, NULL, packing, false, + record_array_count); + ralloc_free(name_copy); +} + +void +program_resource_visitor::process(ir_variable *var) +{ + unsigned record_array_count = 1; + const glsl_type *t = var->type; + const glsl_type *t_without_array = var->type->without_array(); + const bool row_major = + var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; + + const unsigned packing = var->get_interface_type() ? + var->get_interface_type()->interface_packing : + var->type->interface_packing; + + /* false is always passed for the row_major parameter to the other + * processing functions because no information is available to do + * otherwise. See the warning in linker.h. + */ + + /* Only strdup the name if we actually will need to modify it. */ + if (var->data.from_named_ifc_block_array) { + /* lower_named_interface_blocks created this variable by lowering an + * interface block array to an array variable. For example if the + * original source code was: + * + * out Blk { vec4 bar } foo[3]; + * + * Then the variable is now: + * + * out vec4 bar[3]; + * + * We need to visit each array element using the names constructed like + * so: + * + * Blk[0].bar + * Blk[1].bar + * Blk[2].bar + */ + assert(t->is_array()); + const glsl_type *ifc_type = var->get_interface_type(); + char *name = ralloc_strdup(NULL, ifc_type->name); + size_t name_length = strlen(name); + for (unsigned i = 0; i < t->length; i++) { + size_t new_length = name_length; + ralloc_asprintf_rewrite_tail(&name, &new_length, "[%u].%s", i, + var->name); + /* Note: row_major is only meaningful for uniform blocks, and + * lowering is only applied to non-uniform interface blocks, so we + * can safely pass false for row_major. + */ + recursion(var->type, &name, new_length, row_major, NULL, packing, + false, record_array_count); + } + ralloc_free(name); + } else if (var->data.from_named_ifc_block_nonarray) { + /* lower_named_interface_blocks created this variable by lowering a + * named interface block (non-array) to an ordinary variable. For + * example if the original source code was: + * + * out Blk { vec4 bar } foo; + * + * Then the variable is now: + * + * out vec4 bar; + * + * We need to visit this variable using the name: + * + * Blk.bar + */ + const glsl_type *ifc_type = var->get_interface_type(); + char *name = ralloc_asprintf(NULL, "%s.%s", ifc_type->name, var->name); + /* Note: row_major is only meaningful for uniform blocks, and lowering + * is only applied to non-uniform interface blocks, so we can safely + * pass false for row_major. + */ + recursion(var->type, &name, strlen(name), row_major, NULL, packing, + false, record_array_count); + ralloc_free(name); + } else if (t_without_array->is_record() || + (t->is_array() && t->fields.array->is_array())) { + char *name = ralloc_strdup(NULL, var->name); + recursion(var->type, &name, strlen(name), row_major, NULL, packing, + false, record_array_count); + ralloc_free(name); + } else if (t_without_array->is_interface()) { + char *name = ralloc_strdup(NULL, t_without_array->name); + recursion(var->type, &name, strlen(name), row_major, NULL, packing, + false, record_array_count); + ralloc_free(name); + } else { + this->set_record_array_count(record_array_count); + this->visit_field(t, var->name, row_major, NULL, packing, false); + } +} + +void +program_resource_visitor::recursion(const glsl_type *t, char **name, + size_t name_length, bool row_major, + const glsl_type *record_type, + const unsigned packing, + bool last_field, + unsigned record_array_count) +{ + /* Records need to have each field processed individually. + * + * Arrays of records need to have each array element processed + * individually, then each field of the resulting array elements processed + * individually. + */ + if (t->is_record() || t->is_interface()) { + if (record_type == NULL && t->is_record()) + record_type = t; + + if (t->is_record()) + this->enter_record(t, *name, row_major, packing); + + for (unsigned i = 0; i < t->length; i++) { + const char *field = t->fields.structure[i].name; + size_t new_length = name_length; + + if (t->fields.structure[i].type->is_record()) + this->visit_field(&t->fields.structure[i]); + + /* Append '.field' to the current variable name. */ + if (name_length == 0) { + ralloc_asprintf_rewrite_tail(name, &new_length, "%s", field); + } else { + ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); + } + + /* The layout of structures at the top level of the block is set + * during parsing. For matrices contained in multiple levels of + * structures in the block, the inner structures have no layout. + * These cases must potentially inherit the layout from the outer + * levels. + */ + bool field_row_major = row_major; + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(t->fields.structure[i].matrix_layout); + if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { + field_row_major = true; + } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { + field_row_major = false; + } + + recursion(t->fields.structure[i].type, name, new_length, + field_row_major, + record_type, + packing, + (i + 1) == t->length, record_array_count); + + /* Only the first leaf-field of the record gets called with the + * record type pointer. + */ + record_type = NULL; + } + + if (t->is_record()) { + (*name)[name_length] = '\0'; + this->leave_record(t, *name, row_major, packing); + } + } else if (t->without_array()->is_record() || + t->without_array()->is_interface() || + (t->is_array() && t->fields.array->is_array())) { + if (record_type == NULL && t->fields.array->is_record()) + record_type = t->fields.array; + + unsigned length = t->length; + /* Shader storage block unsized arrays: add subscript [0] to variable + * names */ + if (t->is_unsized_array()) + length = 1; + + record_array_count *= length; + + for (unsigned i = 0; i < length; i++) { + size_t new_length = name_length; + + /* Append the subscript to the current variable name */ + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); + + recursion(t->fields.array, name, new_length, row_major, + record_type, + packing, + (i + 1) == t->length, record_array_count); + + /* Only the first leaf-field of the record gets called with the + * record type pointer. + */ + record_type = NULL; + } + } else { + this->set_record_array_count(record_array_count); + this->visit_field(t, *name, row_major, record_type, packing, last_field); + } +} + +void +program_resource_visitor::visit_field(const glsl_type *type, const char *name, + bool row_major, + const glsl_type *, + const unsigned, + bool /* last_field */) +{ + visit_field(type, name, row_major); +} + +void +program_resource_visitor::visit_field(const glsl_struct_field *field) +{ + (void) field; + /* empty */ +} + +void +program_resource_visitor::enter_record(const glsl_type *, const char *, bool, + const unsigned) +{ +} + +void +program_resource_visitor::leave_record(const glsl_type *, const char *, bool, + const unsigned) +{ +} + +void +program_resource_visitor::set_record_array_count(unsigned) +{ +} + +namespace { + +/** + * Class to help calculate the storage requirements for a set of uniforms + * + * As uniforms are added to the active set the number of active uniforms and + * the storage requirements for those uniforms are accumulated. The active + * uniforms are added to the hash table supplied to the constructor. + * + * If the same uniform is added multiple times (i.e., once for each shader + * target), it will only be accounted once. + */ +class count_uniform_size : public program_resource_visitor { +public: + count_uniform_size(struct string_to_uint_map *map, + struct string_to_uint_map *hidden_map) + : num_active_uniforms(0), num_hidden_uniforms(0), num_values(0), + num_shader_samplers(0), num_shader_images(0), + num_shader_uniform_components(0), num_shader_subroutines(0), + is_ubo_var(false), is_shader_storage(false), map(map), + hidden_map(hidden_map) + { + /* empty */ + } + + void start_shader() + { + this->num_shader_samplers = 0; + this->num_shader_images = 0; + this->num_shader_uniform_components = 0; + this->num_shader_subroutines = 0; + } + + void process(ir_variable *var) + { + this->current_var = var; + this->is_ubo_var = var->is_in_buffer_block(); + this->is_shader_storage = var->is_in_shader_storage_block(); + if (var->is_interface_instance()) + program_resource_visitor::process(var->get_interface_type(), + var->get_interface_type()->name); + else + program_resource_visitor::process(var); + } + + /** + * Total number of active uniforms counted + */ + unsigned num_active_uniforms; + + unsigned num_hidden_uniforms; + + /** + * Number of data values required to back the storage for the active uniforms + */ + unsigned num_values; + + /** + * Number of samplers used + */ + unsigned num_shader_samplers; + + /** + * Number of images used + */ + unsigned num_shader_images; + + /** + * Number of uniforms used in the current shader + */ + unsigned num_shader_uniform_components; + + /** + * Number of subroutine uniforms used + */ + unsigned num_shader_subroutines; + + bool is_ubo_var; + bool is_shader_storage; + + struct string_to_uint_map *map; + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + assert(!type->without_array()->is_record()); + assert(!type->without_array()->is_interface()); + assert(!(type->is_array() && type->fields.array->is_array())); + + (void) row_major; + + /* Count the number of samplers regardless of whether the uniform is + * already in the hash table. The hash table prevents adding the same + * uniform for multiple shader targets, but in this case we want to + * count it for each shader target. + */ + const unsigned values = values_for_type(type); + if (type->contains_subroutine()) { + this->num_shader_subroutines += values; + } else if (type->contains_sampler()) { + this->num_shader_samplers += values; + } else if (type->contains_image()) { + this->num_shader_images += values; + + /* As drivers are likely to represent image uniforms as + * scalar indices, count them against the limit of uniform + * components in the default block. The spec allows image + * uniforms to use up no more than one scalar slot. + */ + if(!is_shader_storage) + this->num_shader_uniform_components += values; + } else { + /* Accumulate the total number of uniform slots used by this shader. + * Note that samplers do not count against this limit because they + * don't use any storage on current hardware. + */ + if (!is_ubo_var && !is_shader_storage) + this->num_shader_uniform_components += values; + } + + /* If the uniform is already in the map, there's nothing more to do. + */ + unsigned id; + if (this->map->get(id, name)) + return; + + if (this->current_var->data.how_declared == ir_var_hidden) { + this->hidden_map->put(this->num_hidden_uniforms, name); + this->num_hidden_uniforms++; + } else { + this->map->put(this->num_active_uniforms-this->num_hidden_uniforms, + name); + } + + /* Each leaf uniform occupies one entry in the list of active + * uniforms. + */ + this->num_active_uniforms++; + this->num_values += values; + } + + struct string_to_uint_map *hidden_map; + + /** + * Current variable being processed. + */ + ir_variable *current_var; +}; + +} /* anonymous namespace */ + +/** + * Class to help parcel out pieces of backing storage to uniforms + * + * Each uniform processed has some range of the \c gl_constant_value + * structures associated with it. The association is done by finding + * the uniform in the \c string_to_uint_map and using the value from + * the map to connect that slot in the \c gl_uniform_storage table + * with the next available slot in the \c gl_constant_value array. + * + * \warning + * This class assumes that every uniform that will be processed is + * already in the \c string_to_uint_map. In addition, it assumes that + * the \c gl_uniform_storage and \c gl_constant_value arrays are "big + * enough." + */ +class parcel_out_uniform_storage : public program_resource_visitor { +public: + parcel_out_uniform_storage(struct string_to_uint_map *map, + struct gl_uniform_storage *uniforms, + union gl_constant_value *values) + : map(map), uniforms(uniforms), values(values) + { + } + + void start_shader(gl_shader_stage shader_type) + { + assert(shader_type < MESA_SHADER_STAGES); + this->shader_type = shader_type; + + this->shader_samplers_used = 0; + this->shader_shadow_samplers = 0; + this->next_sampler = 0; + this->next_image = 0; + this->next_subroutine = 0; + this->record_array_count = 1; + memset(this->targets, 0, sizeof(this->targets)); + } + + void set_and_process(struct gl_shader_program *prog, + ir_variable *var) + { + current_var = var; + field_counter = 0; + this->record_next_sampler = new string_to_uint_map; + + ubo_block_index = -1; + if (var->is_in_buffer_block()) { + if (var->is_interface_instance() && var->type->is_array()) { + unsigned l = strlen(var->get_interface_type()->name); + + for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { + if (strncmp(var->get_interface_type()->name, + prog->BufferInterfaceBlocks[i].Name, + l) == 0 + && prog->BufferInterfaceBlocks[i].Name[l] == '[') { + ubo_block_index = i; + break; + } + } + } else { + for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { + if (strcmp(var->get_interface_type()->name, + prog->BufferInterfaceBlocks[i].Name) == 0) { + ubo_block_index = i; + break; + } + } + } + assert(ubo_block_index != -1); + + /* Uniform blocks that were specified with an instance name must be + * handled a little bit differently. The name of the variable is the + * name used to reference the uniform block instead of being the name + * of a variable within the block. Therefore, searching for the name + * within the block will fail. + */ + if (var->is_interface_instance()) { + ubo_byte_offset = 0; + process(var->get_interface_type(), + var->get_interface_type()->name); + } else { + const struct gl_uniform_block *const block = + &prog->BufferInterfaceBlocks[ubo_block_index]; + + assert(var->data.location != -1); + + const struct gl_uniform_buffer_variable *const ubo_var = + &block->Uniforms[var->data.location]; + + ubo_byte_offset = ubo_var->Offset; + process(var); + } + } else { + /* Store any explicit location and reset data location so we can + * reuse this variable for storing the uniform slot number. + */ + this->explicit_location = current_var->data.location; + current_var->data.location = -1; + + process(var); + } + delete this->record_next_sampler; + } + + int ubo_block_index; + int ubo_byte_offset; + gl_shader_stage shader_type; + +private: + void handle_samplers(const glsl_type *base_type, + struct gl_uniform_storage *uniform, const char *name) + { + if (base_type->is_sampler()) { + uniform->opaque[shader_type].active = true; + + /* Handle multiple samplers inside struct arrays */ + if (this->record_array_count > 1) { + unsigned inner_array_size = MAX2(1, uniform->array_elements); + char *name_copy = ralloc_strdup(NULL, name); + + /* Remove all array subscripts from the sampler name */ + char *str_start; + const char *str_end; + while((str_start = strchr(name_copy, '[')) && + (str_end = strchr(name_copy, ']'))) { + memmove(str_start, str_end + 1, 1 + strlen(str_end)); + } + + unsigned index = 0; + if (this->record_next_sampler->get(index, name_copy)) { + /* In this case, we've already seen this uniform so we just use + * the next sampler index recorded the last time we visited. + */ + uniform->opaque[shader_type].index = index; + index = inner_array_size + uniform->opaque[shader_type].index; + this->record_next_sampler->put(index, name_copy); + + ralloc_free(name_copy); + /* Return as everything else has already been initialised in a + * previous pass. + */ + return; + } else { + /* We've never seen this uniform before so we need to allocate + * enough indices to store it. + * + * Nested struct arrays behave like arrays of arrays so we need + * to increase the index by the total number of elements of the + * sampler in case there is more than one sampler inside the + * structs. This allows the offset to be easily calculated for + * indirect indexing. + */ + uniform->opaque[shader_type].index = this->next_sampler; + this->next_sampler += + inner_array_size * this->record_array_count; + + /* Store the next index for future passes over the struct array + */ + index = uniform->opaque[shader_type].index + inner_array_size; + this->record_next_sampler->put(index, name_copy); + ralloc_free(name_copy); + } + } else { + /* Increment the sampler by 1 for non-arrays and by the number of + * array elements for arrays. + */ + uniform->opaque[shader_type].index = this->next_sampler; + this->next_sampler += MAX2(1, uniform->array_elements); + } + + const gl_texture_index target = base_type->sampler_index(); + const unsigned shadow = base_type->sampler_shadow; + for (unsigned i = uniform->opaque[shader_type].index; + i < MIN2(this->next_sampler, MAX_SAMPLERS); + i++) { + this->targets[i] = target; + this->shader_samplers_used |= 1U << i; + this->shader_shadow_samplers |= shadow << i; + } + } + } + + void handle_images(const glsl_type *base_type, + struct gl_uniform_storage *uniform) + { + if (base_type->is_image()) { + uniform->opaque[shader_type].index = this->next_image; + uniform->opaque[shader_type].active = true; + + /* Increment the image index by 1 for non-arrays and by the + * number of array elements for arrays. + */ + this->next_image += MAX2(1, uniform->array_elements); + + } + } + + void handle_subroutines(const glsl_type *base_type, + struct gl_uniform_storage *uniform) + { + if (base_type->is_subroutine()) { + uniform->opaque[shader_type].index = this->next_subroutine; + uniform->opaque[shader_type].active = true; + + /* Increment the subroutine index by 1 for non-arrays and by the + * number of array elements for arrays. + */ + this->next_subroutine += MAX2(1, uniform->array_elements); + + } + } + + virtual void set_record_array_count(unsigned record_array_count) + { + this->record_array_count = record_array_count; + } + + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + (void) type; + (void) name; + (void) row_major; + assert(!"Should not get here."); + } + + virtual void enter_record(const glsl_type *type, const char *, + bool row_major, const unsigned packing) { + assert(type->is_record()); + if (this->ubo_block_index == -1) + return; + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std430_base_alignment(row_major)); + else + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std140_base_alignment(row_major)); + } + + virtual void leave_record(const glsl_type *type, const char *, + bool row_major, const unsigned packing) { + assert(type->is_record()); + if (this->ubo_block_index == -1) + return; + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std430_base_alignment(row_major)); + else + this->ubo_byte_offset = glsl_align( + this->ubo_byte_offset, type->std140_base_alignment(row_major)); + } + + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major, const glsl_type *record_type, + const unsigned packing, + bool /* last_field */) + { + assert(!type->without_array()->is_record()); + assert(!type->without_array()->is_interface()); + assert(!(type->is_array() && type->fields.array->is_array())); + + unsigned id; + bool found = this->map->get(id, name); + assert(found); + + if (!found) + return; + + const glsl_type *base_type; + if (type->is_array()) { + this->uniforms[id].array_elements = type->length; + base_type = type->fields.array; + } else { + this->uniforms[id].array_elements = 0; + base_type = type; + } + + /* Initialise opaque data */ + this->uniforms[id].opaque[shader_type].index = ~0; + this->uniforms[id].opaque[shader_type].active = false; + + /* This assigns uniform indices to sampler and image uniforms. */ + handle_samplers(base_type, &this->uniforms[id], name); + handle_images(base_type, &this->uniforms[id]); + handle_subroutines(base_type, &this->uniforms[id]); + + /* For array of arrays or struct arrays the base location may have + * already been set so don't set it again. + */ + if (ubo_block_index == -1 && current_var->data.location == -1) { + current_var->data.location = id; + } + + /* If there is already storage associated with this uniform or if the + * uniform is set as builtin, it means that it was set while processing + * an earlier shader stage. For example, we may be processing the + * uniform in the fragment shader, but the uniform was already processed + * in the vertex shader. + */ + if (this->uniforms[id].storage != NULL || this->uniforms[id].builtin) { + return; + } + + /* Assign explicit locations. */ + if (current_var->data.explicit_location) { + /* Set sequential locations for struct fields. */ + if (current_var->type->without_array()->is_record() || + current_var->type->is_array_of_arrays()) { + const unsigned entries = MAX2(1, this->uniforms[id].array_elements); + this->uniforms[id].remap_location = + this->explicit_location + field_counter; + field_counter += entries; + } else { + this->uniforms[id].remap_location = this->explicit_location; + } + } else { + /* Initialize to to indicate that no location is set */ + this->uniforms[id].remap_location = UNMAPPED_UNIFORM_LOC; + } + + this->uniforms[id].name = ralloc_strdup(this->uniforms, name); + this->uniforms[id].type = base_type; + this->uniforms[id].initialized = 0; + this->uniforms[id].num_driver_storage = 0; + this->uniforms[id].driver_storage = NULL; + this->uniforms[id].atomic_buffer_index = -1; + this->uniforms[id].hidden = + current_var->data.how_declared == ir_var_hidden; + this->uniforms[id].builtin = is_gl_identifier(name); + + /* Do not assign storage if the uniform is builtin */ + if (!this->uniforms[id].builtin) + this->uniforms[id].storage = this->values; + + this->uniforms[id].is_shader_storage = + current_var->is_in_shader_storage_block(); + + if (this->ubo_block_index != -1) { + this->uniforms[id].block_index = this->ubo_block_index; + + unsigned alignment = type->std140_base_alignment(row_major); + if (packing == GLSL_INTERFACE_PACKING_STD430) + alignment = type->std430_base_alignment(row_major); + this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment); + this->uniforms[id].offset = this->ubo_byte_offset; + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->ubo_byte_offset += type->std430_size(row_major); + else + this->ubo_byte_offset += type->std140_size(row_major); + + if (type->is_array()) { + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->uniforms[id].array_stride = + type->without_array()->std430_array_stride(row_major); + else + this->uniforms[id].array_stride = + glsl_align(type->without_array()->std140_size(row_major), + 16); + } else { + this->uniforms[id].array_stride = 0; + } + + if (type->without_array()->is_matrix()) { + const glsl_type *matrix = type->without_array(); + const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4; + const unsigned items = + row_major ? matrix->matrix_columns : matrix->vector_elements; + + assert(items <= 4); + if (packing == GLSL_INTERFACE_PACKING_STD430) + this->uniforms[id].matrix_stride = items < 3 ? items * N : + glsl_align(items * N, 16); + else + this->uniforms[id].matrix_stride = glsl_align(items * N, 16); + this->uniforms[id].row_major = row_major; + } else { + this->uniforms[id].matrix_stride = 0; + this->uniforms[id].row_major = false; + } + } else { + this->uniforms[id].block_index = -1; + this->uniforms[id].offset = -1; + this->uniforms[id].array_stride = -1; + this->uniforms[id].matrix_stride = -1; + this->uniforms[id].row_major = false; + } + + this->values += values_for_type(type); + } + + struct string_to_uint_map *map; + + struct gl_uniform_storage *uniforms; + unsigned next_sampler; + unsigned next_image; + unsigned next_subroutine; + + /** + * Field counter is used to take care that uniform structures + * with explicit locations get sequential locations. + */ + unsigned field_counter; + + /** + * Current variable being processed. + */ + ir_variable *current_var; + + /* Used to store the explicit location from current_var so that we can + * reuse the location field for storing the uniform slot id. + */ + int explicit_location; + + /* Stores total struct array elements including nested structs */ + unsigned record_array_count; + + /* Map for temporarily storing next sampler index when handling samplers in + * struct arrays. + */ + struct string_to_uint_map *record_next_sampler; + +public: + union gl_constant_value *values; + + gl_texture_index targets[MAX_SAMPLERS]; + + /** + * Mask of samplers used by the current shader stage. + */ + unsigned shader_samplers_used; + + /** + * Mask of samplers used by the current shader stage for shadows. + */ + unsigned shader_shadow_samplers; +}; + +/** + * Merges a uniform block into an array of uniform blocks that may or + * may not already contain a copy of it. + * + * Returns the index of the new block in the array. + */ +int +link_cross_validate_uniform_block(void *mem_ctx, + struct gl_uniform_block **linked_blocks, + unsigned int *num_linked_blocks, + struct gl_uniform_block *new_block) +{ + for (unsigned int i = 0; i < *num_linked_blocks; i++) { + struct gl_uniform_block *old_block = &(*linked_blocks)[i]; + + if (strcmp(old_block->Name, new_block->Name) == 0) + return link_uniform_blocks_are_compatible(old_block, new_block) + ? i : -1; + } + + *linked_blocks = reralloc(mem_ctx, *linked_blocks, + struct gl_uniform_block, + *num_linked_blocks + 1); + int linked_block_index = (*num_linked_blocks)++; + struct gl_uniform_block *linked_block = &(*linked_blocks)[linked_block_index]; + + memcpy(linked_block, new_block, sizeof(*new_block)); + linked_block->Uniforms = ralloc_array(*linked_blocks, + struct gl_uniform_buffer_variable, + linked_block->NumUniforms); + + memcpy(linked_block->Uniforms, + new_block->Uniforms, + sizeof(*linked_block->Uniforms) * linked_block->NumUniforms); + + for (unsigned int i = 0; i < linked_block->NumUniforms; i++) { + struct gl_uniform_buffer_variable *ubo_var = + &linked_block->Uniforms[i]; + + if (ubo_var->Name == ubo_var->IndexName) { + ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); + ubo_var->IndexName = ubo_var->Name; + } else { + ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); + ubo_var->IndexName = ralloc_strdup(*linked_blocks, ubo_var->IndexName); + } + } + + return linked_block_index; +} + +/** + * Walks the IR and update the references to uniform blocks in the + * ir_variables to point at linked shader's list (previously, they + * would point at the uniform block list in one of the pre-linked + * shaders). + */ +static void +link_update_uniform_buffer_variables(struct gl_shader *shader) +{ + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || !var->is_in_buffer_block()) + continue; + + assert(var->data.mode == ir_var_uniform || + var->data.mode == ir_var_shader_storage); + + if (var->is_interface_instance()) { + var->data.location = 0; + continue; + } + + bool found = false; + char sentinel = '\0'; + + if (var->type->is_record()) { + sentinel = '.'; + } else if (var->type->is_array() && (var->type->fields.array->is_array() + || var->type->without_array()->is_record())) { + sentinel = '['; + } + + const unsigned l = strlen(var->name); + for (unsigned i = 0; i < shader->NumBufferInterfaceBlocks; i++) { + for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i].NumUniforms; j++) { + if (sentinel) { + const char *begin = shader->BufferInterfaceBlocks[i].Uniforms[j].Name; + const char *end = strchr(begin, sentinel); + + if (end == NULL) + continue; + + if ((ptrdiff_t) l != (end - begin)) + continue; + + if (strncmp(var->name, begin, l) == 0) { + found = true; + var->data.location = j; + break; + } + } else if (!strcmp(var->name, + shader->BufferInterfaceBlocks[i].Uniforms[j].Name)) { + found = true; + var->data.location = j; + break; + } + } + if (found) + break; + } + assert(found); + } +} + +static void +link_set_image_access_qualifiers(struct gl_shader_program *prog, + gl_shader *sh, unsigned shader_stage, + ir_variable *var, const glsl_type *type, + char **name, size_t name_length) +{ + /* Handle arrays of arrays */ + if (type->is_array() && type->fields.array->is_array()) { + for (unsigned i = 0; i < type->length; i++) { + size_t new_length = name_length; + + /* Append the subscript to the current variable name */ + ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); + + link_set_image_access_qualifiers(prog, sh, shader_stage, var, + type->fields.array, name, + new_length); + } + } else { + unsigned id = 0; + bool found = prog->UniformHash->get(id, *name); + assert(found); + (void) found; + const gl_uniform_storage *storage = &prog->UniformStorage[id]; + const unsigned index = storage->opaque[shader_stage].index; + const GLenum access = (var->data.image_read_only ? GL_READ_ONLY : + var->data.image_write_only ? GL_WRITE_ONLY : + GL_READ_WRITE); + + for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j) + sh->ImageAccess[index + j] = access; + } +} + +/** + * Combine the hidden uniform hash map with the uniform hash map so that the + * hidden uniforms will be given indicies at the end of the uniform storage + * array. + */ +static void +assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id, + void *closure) +{ + count_uniform_size *uniform_size = (count_uniform_size *) closure; + unsigned hidden_uniform_start = uniform_size->num_active_uniforms - + uniform_size->num_hidden_uniforms; + + uniform_size->map->put(hidden_uniform_start + hidden_id, name); +} + +void +link_assign_uniform_locations(struct gl_shader_program *prog, + unsigned int boolean_true) +{ + ralloc_free(prog->UniformStorage); + prog->UniformStorage = NULL; + prog->NumUniformStorage = 0; + + if (prog->UniformHash != NULL) { + prog->UniformHash->clear(); + } else { + prog->UniformHash = new string_to_uint_map; + } + + /* First pass: Count the uniform resources used by the user-defined + * uniforms. While this happens, each active uniform will have an index + * assigned to it. + * + * Note: this is *NOT* the index that is returned to the application by + * glGetUniformLocation. + */ + struct string_to_uint_map *hiddenUniforms = new string_to_uint_map; + count_uniform_size uniform_size(prog->UniformHash, hiddenUniforms); + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + /* Uniforms that lack an initializer in the shader code have an initial + * value of zero. This includes sampler uniforms. + * + * Page 24 (page 30 of the PDF) of the GLSL 1.20 spec says: + * + * "The link time initial value is either the value of the variable's + * initializer, if present, or 0 if no initializer is present. Sampler + * types cannot have initializers." + */ + memset(sh->SamplerUnits, 0, sizeof(sh->SamplerUnits)); + memset(sh->ImageUnits, 0, sizeof(sh->ImageUnits)); + + link_update_uniform_buffer_variables(sh); + + /* Reset various per-shader target counts. + */ + uniform_size.start_shader(); + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) + continue; + + uniform_size.process(var); + } + + sh->num_samplers = uniform_size.num_shader_samplers; + sh->NumImages = uniform_size.num_shader_images; + sh->num_uniform_components = uniform_size.num_shader_uniform_components; + sh->num_combined_uniform_components = sh->num_uniform_components; + + for (unsigned i = 0; i < sh->NumBufferInterfaceBlocks; i++) { + if (!sh->BufferInterfaceBlocks[i].IsShaderStorage) { + sh->num_combined_uniform_components += + sh->BufferInterfaceBlocks[i].UniformBufferSize / 4; + } + } + } + + const unsigned num_uniforms = uniform_size.num_active_uniforms; + const unsigned num_data_slots = uniform_size.num_values; + const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms; + + /* assign hidden uniforms a slot id */ + hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size); + delete hiddenUniforms; + + /* On the outside chance that there were no uniforms, bail out. + */ + if (num_uniforms == 0) + return; + + struct gl_uniform_storage *uniforms = + rzalloc_array(prog, struct gl_uniform_storage, num_uniforms); + union gl_constant_value *data = + rzalloc_array(uniforms, union gl_constant_value, num_data_slots); +#ifndef NDEBUG + union gl_constant_value *data_end = &data[num_data_slots]; +#endif + + parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data); + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + parcel.start_shader((gl_shader_stage)i); + + foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_uniform && + var->data.mode != ir_var_shader_storage)) + continue; + + parcel.set_and_process(prog, var); + } + + prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used; + prog->_LinkedShaders[i]->shadow_samplers = parcel.shader_shadow_samplers; + + STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) == + sizeof(parcel.targets)); + memcpy(prog->_LinkedShaders[i]->SamplerTargets, parcel.targets, + sizeof(prog->_LinkedShaders[i]->SamplerTargets)); + } + + /* Reserve all the explicit locations of the active uniforms. */ + for (unsigned i = 0; i < num_uniforms; i++) { + if (uniforms[i].type->is_subroutine() || + uniforms[i].is_shader_storage) + continue; + + if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) { + /* How many new entries for this uniform? */ + const unsigned entries = MAX2(1, uniforms[i].array_elements); + + /* Set remap table entries point to correct gl_uniform_storage. */ + for (unsigned j = 0; j < entries; j++) { + unsigned element_loc = uniforms[i].remap_location + j; + assert(prog->UniformRemapTable[element_loc] == + INACTIVE_UNIFORM_EXPLICIT_LOCATION); + prog->UniformRemapTable[element_loc] = &uniforms[i]; + } + } + } + + /* Reserve locations for rest of the uniforms. */ + for (unsigned i = 0; i < num_uniforms; i++) { + + if (uniforms[i].type->is_subroutine() || + uniforms[i].is_shader_storage) + continue; + + /* Built-in uniforms should not get any location. */ + if (uniforms[i].builtin) + continue; + + /* Explicit ones have been set already. */ + if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) + continue; + + /* how many new entries for this uniform? */ + const unsigned entries = MAX2(1, uniforms[i].array_elements); + + /* resize remap table to fit new entries */ + prog->UniformRemapTable = + reralloc(prog, + prog->UniformRemapTable, + gl_uniform_storage *, + prog->NumUniformRemapTable + entries); + + /* set pointers for this uniform */ + for (unsigned j = 0; j < entries; j++) + prog->UniformRemapTable[prog->NumUniformRemapTable+j] = &uniforms[i]; + + /* set the base location in remap table for the uniform */ + uniforms[i].remap_location = prog->NumUniformRemapTable; + + prog->NumUniformRemapTable += entries; + } + + /* Reserve all the explicit locations of the active subroutine uniforms. */ + for (unsigned i = 0; i < num_uniforms; i++) { + if (!uniforms[i].type->is_subroutine()) + continue; + + if (uniforms[i].remap_location == UNMAPPED_UNIFORM_LOC) + continue; + + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + struct gl_shader *sh = prog->_LinkedShaders[j]; + if (!sh) + continue; + + if (!uniforms[i].opaque[j].active) + continue; + + /* How many new entries for this uniform? */ + const unsigned entries = MAX2(1, uniforms[i].array_elements); + + /* Set remap table entries point to correct gl_uniform_storage. */ + for (unsigned k = 0; k < entries; k++) { + unsigned element_loc = uniforms[i].remap_location + k; + assert(sh->SubroutineUniformRemapTable[element_loc] == + INACTIVE_UNIFORM_EXPLICIT_LOCATION); + sh->SubroutineUniformRemapTable[element_loc] = &uniforms[i]; + } + } + } + + /* reserve subroutine locations */ + for (unsigned i = 0; i < num_uniforms; i++) { + + if (!uniforms[i].type->is_subroutine()) + continue; + const unsigned entries = MAX2(1, uniforms[i].array_elements); + + if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) + continue; + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + struct gl_shader *sh = prog->_LinkedShaders[j]; + if (!sh) + continue; + + if (!uniforms[i].opaque[j].active) + continue; + + sh->SubroutineUniformRemapTable = + reralloc(sh, + sh->SubroutineUniformRemapTable, + gl_uniform_storage *, + sh->NumSubroutineUniformRemapTable + entries); + + for (unsigned k = 0; k < entries; k++) + sh->SubroutineUniformRemapTable[sh->NumSubroutineUniformRemapTable + k] = &uniforms[i]; + uniforms[i].remap_location = sh->NumSubroutineUniformRemapTable; + sh->NumSubroutineUniformRemapTable += entries; + } + } + +#ifndef NDEBUG + for (unsigned i = 0; i < num_uniforms; i++) { + assert(uniforms[i].storage != NULL || uniforms[i].builtin); + } + + assert(parcel.values == data_end); +#endif + + prog->NumUniformStorage = num_uniforms; + prog->NumHiddenUniforms = hidden_uniforms; + prog->UniformStorage = uniforms; + + /** + * Scan the program for image uniforms and store image unit access + * information into the gl_shader data structure. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + + if (var && var->data.mode == ir_var_uniform && + var->type->contains_image()) { + char *name_copy = ralloc_strdup(NULL, var->name); + link_set_image_access_qualifiers(prog, sh, i, var, var->type, + &name_copy, strlen(var->name)); + ralloc_free(name_copy); + } + } + } + + link_set_uniform_initializers(prog, boolean_true); + + return; +} diff --git a/src/compiler/glsl/link_varyings.cpp b/src/compiler/glsl/link_varyings.cpp new file mode 100644 index 00000000000..264b69ca619 --- /dev/null +++ b/src/compiler/glsl/link_varyings.cpp @@ -0,0 +1,1888 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file link_varyings.cpp + * + * Linker functions related specifically to linking varyings between shader + * stages. + */ + + +#include "main/mtypes.h" +#include "glsl_symbol_table.h" +#include "glsl_parser_extras.h" +#include "ir_optimization.h" +#include "linker.h" +#include "link_varyings.h" +#include "main/macros.h" +#include "program/hash_table.h" +#include "program.h" + + +/** + * Get the varying type stripped of the outermost array if we're processing + * a stage whose varyings are arrays indexed by a vertex number (such as + * geometry shader inputs). + */ +static const glsl_type * +get_varying_type(const ir_variable *var, gl_shader_stage stage) +{ + const glsl_type *type = var->type; + + if (!var->data.patch && + ((var->data.mode == ir_var_shader_out && + stage == MESA_SHADER_TESS_CTRL) || + (var->data.mode == ir_var_shader_in && + (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL || + stage == MESA_SHADER_GEOMETRY)))) { + assert(type->is_array()); + type = type->fields.array; + } + + return type; +} + +/** + * Validate the types and qualifiers of an output from one stage against the + * matching input to another stage. + */ +static void +cross_validate_types_and_qualifiers(struct gl_shader_program *prog, + const ir_variable *input, + const ir_variable *output, + gl_shader_stage consumer_stage, + gl_shader_stage producer_stage) +{ + /* Check that the types match between stages. + */ + const glsl_type *type_to_match = input->type; + + /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ + const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX && + consumer_stage != MESA_SHADER_FRAGMENT) || + consumer_stage == MESA_SHADER_GEOMETRY; + if (extra_array_level) { + assert(type_to_match->is_array()); + type_to_match = type_to_match->fields.array; + } + + if (type_to_match != output->type) { + /* There is a bit of a special case for gl_TexCoord. This + * built-in is unsized by default. Applications that variable + * access it must redeclare it with a size. There is some + * language in the GLSL spec that implies the fragment shader + * and vertex shader do not have to agree on this size. Other + * driver behave this way, and one or two applications seem to + * rely on it. + * + * Neither declaration needs to be modified here because the array + * sizes are fixed later when update_array_sizes is called. + * + * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: + * + * "Unlike user-defined varying variables, the built-in + * varying variables don't have a strict one-to-one + * correspondence between the vertex language and the + * fragment language." + */ + if (!output->type->is_array() || !is_gl_identifier(output->name)) { + linker_error(prog, + "%s shader output `%s' declared as type `%s', " + "but %s shader input declared as type `%s'\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + output->type->name, + _mesa_shader_stage_to_string(consumer_stage), + input->type->name); + return; + } + } + + /* Check that all of the qualifiers match between stages. + */ + if (input->data.centroid != output->data.centroid) { + linker_error(prog, + "%s shader output `%s' %s centroid qualifier, " + "but %s shader input %s centroid qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + (output->data.centroid) ? "has" : "lacks", + _mesa_shader_stage_to_string(consumer_stage), + (input->data.centroid) ? "has" : "lacks"); + return; + } + + if (input->data.sample != output->data.sample) { + linker_error(prog, + "%s shader output `%s' %s sample qualifier, " + "but %s shader input %s sample qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + (output->data.sample) ? "has" : "lacks", + _mesa_shader_stage_to_string(consumer_stage), + (input->data.sample) ? "has" : "lacks"); + return; + } + + if (input->data.patch != output->data.patch) { + linker_error(prog, + "%s shader output `%s' %s patch qualifier, " + "but %s shader input %s patch qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + (output->data.patch) ? "has" : "lacks", + _mesa_shader_stage_to_string(consumer_stage), + (input->data.patch) ? "has" : "lacks"); + return; + } + + if (!prog->IsES && input->data.invariant != output->data.invariant) { + linker_error(prog, + "%s shader output `%s' %s invariant qualifier, " + "but %s shader input %s invariant qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + (output->data.invariant) ? "has" : "lacks", + _mesa_shader_stage_to_string(consumer_stage), + (input->data.invariant) ? "has" : "lacks"); + return; + } + + /* GLSL >= 4.40 removes text requiring interpolation qualifiers + * to match cross stage, they must only match within the same stage. + * + * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec: + * + * "It is a link-time error if, within the same stage, the interpolation + * qualifiers of variables of the same name do not match. + * + */ + if (input->data.interpolation != output->data.interpolation && + prog->Version < 440) { + linker_error(prog, + "%s shader output `%s' specifies %s " + "interpolation qualifier, " + "but %s shader input specifies %s " + "interpolation qualifier\n", + _mesa_shader_stage_to_string(producer_stage), + output->name, + interpolation_string(output->data.interpolation), + _mesa_shader_stage_to_string(consumer_stage), + interpolation_string(input->data.interpolation)); + return; + } +} + +/** + * Validate front and back color outputs against single color input + */ +static void +cross_validate_front_and_back_color(struct gl_shader_program *prog, + const ir_variable *input, + const ir_variable *front_color, + const ir_variable *back_color, + gl_shader_stage consumer_stage, + gl_shader_stage producer_stage) +{ + if (front_color != NULL && front_color->data.assigned) + cross_validate_types_and_qualifiers(prog, input, front_color, + consumer_stage, producer_stage); + + if (back_color != NULL && back_color->data.assigned) + cross_validate_types_and_qualifiers(prog, input, back_color, + consumer_stage, producer_stage); +} + +/** + * Validate that outputs from one stage match inputs of another + */ +void +cross_validate_outputs_to_inputs(struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer) +{ + glsl_symbol_table parameters; + ir_variable *explicit_locations[MAX_VARYING] = { NULL, }; + + /* Find all shader outputs in the "producer" stage. + */ + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_shader_out)) + continue; + + if (!var->data.explicit_location + || var->data.location < VARYING_SLOT_VAR0) + parameters.add_variable(var); + else { + /* User-defined varyings with explicit locations are handled + * differently because they do not need to have matching names. + */ + const unsigned idx = var->data.location - VARYING_SLOT_VAR0; + + if (explicit_locations[idx] != NULL) { + linker_error(prog, + "%s shader has multiple outputs explicitly " + "assigned to location %d\n", + _mesa_shader_stage_to_string(producer->Stage), + idx); + return; + } + + explicit_locations[idx] = var; + } + } + + + /* Find all shader inputs in the "consumer" stage. Any variables that have + * matching outputs already in the symbol table must have the same type and + * qualifiers. + * + * Exception: if the consumer is the geometry shader, then the inputs + * should be arrays and the type of the array element should match the type + * of the corresponding producer output. + */ + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const input = node->as_variable(); + + if ((input == NULL) || (input->data.mode != ir_var_shader_in)) + continue; + + if (strcmp(input->name, "gl_Color") == 0 && input->data.used) { + const ir_variable *const front_color = + parameters.get_variable("gl_FrontColor"); + + const ir_variable *const back_color = + parameters.get_variable("gl_BackColor"); + + cross_validate_front_and_back_color(prog, input, + front_color, back_color, + consumer->Stage, producer->Stage); + } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { + const ir_variable *const front_color = + parameters.get_variable("gl_FrontSecondaryColor"); + + const ir_variable *const back_color = + parameters.get_variable("gl_BackSecondaryColor"); + + cross_validate_front_and_back_color(prog, input, + front_color, back_color, + consumer->Stage, producer->Stage); + } else { + /* The rules for connecting inputs and outputs change in the presence + * of explicit locations. In this case, we no longer care about the + * names of the variables. Instead, we care only about the + * explicitly assigned location. + */ + ir_variable *output = NULL; + if (input->data.explicit_location + && input->data.location >= VARYING_SLOT_VAR0) { + output = explicit_locations[input->data.location - VARYING_SLOT_VAR0]; + + if (output == NULL) { + linker_error(prog, + "%s shader input `%s' with explicit location " + "has no matching output\n", + _mesa_shader_stage_to_string(consumer->Stage), + input->name); + } + } else { + output = parameters.get_variable(input->name); + } + + if (output != NULL) { + cross_validate_types_and_qualifiers(prog, input, output, + consumer->Stage, producer->Stage); + } else { + /* Check for input vars with unmatched output vars in prev stage + * taking into account that interface blocks could have a matching + * output but with different name, so we ignore them. + */ + assert(!input->data.assigned); + if (input->data.used && !input->get_interface_type() && + !input->data.explicit_location && !prog->SeparateShader) + linker_error(prog, + "%s shader input `%s' " + "has no matching output in the previous stage\n", + _mesa_shader_stage_to_string(consumer->Stage), + input->name); + } + } + } +} + +/** + * Demote shader inputs and outputs that are not used in other stages, and + * remove them via dead code elimination. + */ +void +remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, + gl_shader *sh, + enum ir_variable_mode mode) +{ + if (is_separate_shader_object) + return; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != int(mode))) + continue; + + /* A shader 'in' or 'out' variable is only really an input or output if + * its value is used by other shader stages. This will cause the + * variable to have a location assigned. + */ + if (var->data.is_unmatched_generic_inout) { + assert(var->data.mode != ir_var_temporary); + var->data.mode = ir_var_auto; + } + } + + /* Eliminate code that is now dead due to unused inputs/outputs being + * demoted. + */ + while (do_dead_code(sh->ir, false)) + ; + +} + +/** + * Initialize this object based on a string that was passed to + * glTransformFeedbackVaryings. + * + * If the input is mal-formed, this call still succeeds, but it sets + * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var() + * will fail to find any matching variable. + */ +void +tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx, + const char *input) +{ + /* We don't have to be pedantic about what is a valid GLSL variable name, + * because any variable with an invalid name can't exist in the IR anyway. + */ + + this->location = -1; + this->orig_name = input; + this->lowered_builtin_array_variable = none; + this->skip_components = 0; + this->next_buffer_separator = false; + this->matched_candidate = NULL; + this->stream_id = 0; + + if (ctx->Extensions.ARB_transform_feedback3) { + /* Parse gl_NextBuffer. */ + if (strcmp(input, "gl_NextBuffer") == 0) { + this->next_buffer_separator = true; + return; + } + + /* Parse gl_SkipComponents. */ + if (strcmp(input, "gl_SkipComponents1") == 0) + this->skip_components = 1; + else if (strcmp(input, "gl_SkipComponents2") == 0) + this->skip_components = 2; + else if (strcmp(input, "gl_SkipComponents3") == 0) + this->skip_components = 3; + else if (strcmp(input, "gl_SkipComponents4") == 0) + this->skip_components = 4; + + if (this->skip_components) + return; + } + + /* Parse a declaration. */ + const char *base_name_end; + long subscript = parse_program_resource_name(input, &base_name_end); + this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input); + if (this->var_name == NULL) { + _mesa_error_no_memory(__func__); + return; + } + + if (subscript >= 0) { + this->array_subscript = subscript; + this->is_subscripted = true; + } else { + this->is_subscripted = false; + } + + /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this + * class must behave specially to account for the fact that gl_ClipDistance + * is converted from a float[8] to a vec4[2]. + */ + if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerClipDistance && + strcmp(this->var_name, "gl_ClipDistance") == 0) { + this->lowered_builtin_array_variable = clip_distance; + } + + if (ctx->Const.LowerTessLevel && + (strcmp(this->var_name, "gl_TessLevelOuter") == 0)) + this->lowered_builtin_array_variable = tess_level_outer; + if (ctx->Const.LowerTessLevel && + (strcmp(this->var_name, "gl_TessLevelInner") == 0)) + this->lowered_builtin_array_variable = tess_level_inner; +} + + +/** + * Determine whether two tfeedback_decl objects refer to the same variable and + * array index (if applicable). + */ +bool +tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y) +{ + assert(x.is_varying() && y.is_varying()); + + if (strcmp(x.var_name, y.var_name) != 0) + return false; + if (x.is_subscripted != y.is_subscripted) + return false; + if (x.is_subscripted && x.array_subscript != y.array_subscript) + return false; + return true; +} + + +/** + * Assign a location and stream ID for this tfeedback_decl object based on the + * transform feedback candidate found by find_candidate. + * + * If an error occurs, the error is reported through linker_error() and false + * is returned. + */ +bool +tfeedback_decl::assign_location(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + assert(this->is_varying()); + + unsigned fine_location + = this->matched_candidate->toplevel_var->data.location * 4 + + this->matched_candidate->toplevel_var->data.location_frac + + this->matched_candidate->offset; + + if (this->matched_candidate->type->is_array()) { + /* Array variable */ + const unsigned matrix_cols = + this->matched_candidate->type->fields.array->matrix_columns; + const unsigned vector_elements = + this->matched_candidate->type->fields.array->vector_elements; + const unsigned dmul = + this->matched_candidate->type->fields.array->is_double() ? 2 : 1; + unsigned actual_array_size; + switch (this->lowered_builtin_array_variable) { + case clip_distance: + actual_array_size = prog->LastClipDistanceArraySize; + break; + case tess_level_outer: + actual_array_size = 4; + break; + case tess_level_inner: + actual_array_size = 2; + break; + case none: + default: + actual_array_size = this->matched_candidate->type->array_size(); + break; + } + + if (this->is_subscripted) { + /* Check array bounds. */ + if (this->array_subscript >= actual_array_size) { + linker_error(prog, "Transform feedback varying %s has index " + "%i, but the array size is %u.", + this->orig_name, this->array_subscript, + actual_array_size); + return false; + } + unsigned array_elem_size = this->lowered_builtin_array_variable ? + 1 : vector_elements * matrix_cols * dmul; + fine_location += array_elem_size * this->array_subscript; + this->size = 1; + } else { + this->size = actual_array_size; + } + this->vector_elements = vector_elements; + this->matrix_columns = matrix_cols; + if (this->lowered_builtin_array_variable) + this->type = GL_FLOAT; + else + this->type = this->matched_candidate->type->fields.array->gl_type; + } else { + /* Regular variable (scalar, vector, or matrix) */ + if (this->is_subscripted) { + linker_error(prog, "Transform feedback varying %s requested, " + "but %s is not an array.", + this->orig_name, this->var_name); + return false; + } + this->size = 1; + this->vector_elements = this->matched_candidate->type->vector_elements; + this->matrix_columns = this->matched_candidate->type->matrix_columns; + this->type = this->matched_candidate->type->gl_type; + } + this->location = fine_location / 4; + this->location_frac = fine_location % 4; + + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * the total number of components to capture in any varying + * variable in is greater than the constant + * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the + * buffer mode is SEPARATE_ATTRIBS_EXT; + */ + if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && + this->num_components() > + ctx->Const.MaxTransformFeedbackSeparateComponents) { + linker_error(prog, "Transform feedback varying %s exceeds " + "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.", + this->orig_name); + return false; + } + + /* Only transform feedback varyings can be assigned to non-zero streams, + * so assign the stream id here. + */ + this->stream_id = this->matched_candidate->toplevel_var->data.stream; + + return true; +} + + +unsigned +tfeedback_decl::get_num_outputs() const +{ + if (!this->is_varying()) { + return 0; + } + return (this->num_components() + this->location_frac + 3)/4; +} + + +/** + * Update gl_transform_feedback_info to reflect this tfeedback_decl. + * + * If an error occurs, the error is reported through linker_error() and false + * is returned. + */ +bool +tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, + struct gl_transform_feedback_info *info, + unsigned buffer, const unsigned max_outputs) const +{ + assert(!this->next_buffer_separator); + + /* Handle gl_SkipComponents. */ + if (this->skip_components) { + info->BufferStride[buffer] += this->skip_components; + return true; + } + + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * the total number of components to capture is greater than + * the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT + * and the buffer mode is INTERLEAVED_ATTRIBS_EXT. + */ + if (prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS && + info->BufferStride[buffer] + this->num_components() > + ctx->Const.MaxTransformFeedbackInterleavedComponents) { + linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " + "limit has been exceeded."); + return false; + } + + unsigned location = this->location; + unsigned location_frac = this->location_frac; + unsigned num_components = this->num_components(); + while (num_components > 0) { + unsigned output_size = MIN2(num_components, 4 - location_frac); + assert(info->NumOutputs < max_outputs); + info->Outputs[info->NumOutputs].ComponentOffset = location_frac; + info->Outputs[info->NumOutputs].OutputRegister = location; + info->Outputs[info->NumOutputs].NumComponents = output_size; + info->Outputs[info->NumOutputs].StreamId = stream_id; + info->Outputs[info->NumOutputs].OutputBuffer = buffer; + info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer]; + ++info->NumOutputs; + info->BufferStride[buffer] += output_size; + info->BufferStream[buffer] = this->stream_id; + num_components -= output_size; + location++; + location_frac = 0; + } + + info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, this->orig_name); + info->Varyings[info->NumVarying].Type = this->type; + info->Varyings[info->NumVarying].Size = this->size; + info->NumVarying++; + + return true; +} + + +const tfeedback_candidate * +tfeedback_decl::find_candidate(gl_shader_program *prog, + hash_table *tfeedback_candidates) +{ + const char *name = this->var_name; + switch (this->lowered_builtin_array_variable) { + case none: + name = this->var_name; + break; + case clip_distance: + name = "gl_ClipDistanceMESA"; + break; + case tess_level_outer: + name = "gl_TessLevelOuterMESA"; + break; + case tess_level_inner: + name = "gl_TessLevelInnerMESA"; + break; + } + this->matched_candidate = (const tfeedback_candidate *) + hash_table_find(tfeedback_candidates, name); + if (!this->matched_candidate) { + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * any variable name specified in the array is not + * declared as an output in the geometry shader (if present) or + * the vertex shader (if no geometry shader is present); + */ + linker_error(prog, "Transform feedback varying %s undeclared.", + this->orig_name); + } + return this->matched_candidate; +} + + +/** + * Parse all the transform feedback declarations that were passed to + * glTransformFeedbackVaryings() and store them in tfeedback_decl objects. + * + * If an error occurs, the error is reported through linker_error() and false + * is returned. + */ +bool +parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, + const void *mem_ctx, unsigned num_names, + char **varying_names, tfeedback_decl *decls) +{ + for (unsigned i = 0; i < num_names; ++i) { + decls[i].init(ctx, mem_ctx, varying_names[i]); + + if (!decls[i].is_varying()) + continue; + + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * any two entries in the array specify the same varying + * variable; + * + * We interpret this to mean "any two entries in the array + * specify the same varying variable and array index", since transform + * feedback of arrays would be useless otherwise. + */ + for (unsigned j = 0; j < i; ++j) { + if (!decls[j].is_varying()) + continue; + + if (tfeedback_decl::is_same(decls[i], decls[j])) { + linker_error(prog, "Transform feedback varying %s specified " + "more than once.", varying_names[i]); + return false; + } + } + } + return true; +} + + +/** + * Store transform feedback location assignments into + * prog->LinkedTransformFeedback based on the data stored in tfeedback_decls. + * + * If an error occurs, the error is reported through linker_error() and false + * is returned. + */ +bool +store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls) +{ + bool separate_attribs_mode = + prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; + + ralloc_free(prog->LinkedTransformFeedback.Varyings); + ralloc_free(prog->LinkedTransformFeedback.Outputs); + + memset(&prog->LinkedTransformFeedback, 0, + sizeof(prog->LinkedTransformFeedback)); + + prog->LinkedTransformFeedback.Varyings = + rzalloc_array(prog, + struct gl_transform_feedback_varying_info, + num_tfeedback_decls); + + unsigned num_outputs = 0; + for (unsigned i = 0; i < num_tfeedback_decls; ++i) + num_outputs += tfeedback_decls[i].get_num_outputs(); + + prog->LinkedTransformFeedback.Outputs = + rzalloc_array(prog, + struct gl_transform_feedback_output, + num_outputs); + + unsigned num_buffers = 0; + + if (separate_attribs_mode) { + /* GL_SEPARATE_ATTRIBS */ + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + if (!tfeedback_decls[i].store(ctx, prog, &prog->LinkedTransformFeedback, + num_buffers, num_outputs)) + return false; + + num_buffers++; + } + } + else { + /* GL_INVERLEAVED_ATTRIBS */ + int buffer_stream_id = -1; + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + if (tfeedback_decls[i].is_next_buffer_separator()) { + num_buffers++; + buffer_stream_id = -1; + continue; + } else if (buffer_stream_id == -1) { + /* First varying writing to this buffer: remember its stream */ + buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); + } else if (buffer_stream_id != + (int) tfeedback_decls[i].get_stream_id()) { + /* Varying writes to the same buffer from a different stream */ + linker_error(prog, + "Transform feedback can't capture varyings belonging " + "to different vertex streams in a single buffer. " + "Varying %s writes to buffer from stream %u, other " + "varyings in the same buffer write from stream %u.", + tfeedback_decls[i].name(), + tfeedback_decls[i].get_stream_id(), + buffer_stream_id); + return false; + } + + if (!tfeedback_decls[i].store(ctx, prog, + &prog->LinkedTransformFeedback, + num_buffers, num_outputs)) + return false; + } + num_buffers++; + } + + assert(prog->LinkedTransformFeedback.NumOutputs == num_outputs); + + prog->LinkedTransformFeedback.NumBuffers = num_buffers; + return true; +} + +namespace { + +/** + * Data structure recording the relationship between outputs of one shader + * stage (the "producer") and inputs of another (the "consumer"). + */ +class varying_matches +{ +public: + varying_matches(bool disable_varying_packing, + gl_shader_stage producer_stage, + gl_shader_stage consumer_stage); + ~varying_matches(); + void record(ir_variable *producer_var, ir_variable *consumer_var); + unsigned assign_locations(struct gl_shader_program *prog, + uint64_t reserved_slots, bool separate_shader); + void store_locations() const; + +private: + /** + * If true, this driver disables varying packing, so all varyings need to + * be aligned on slot boundaries, and take up a number of slots equal to + * their number of matrix columns times their array size. + */ + const bool disable_varying_packing; + + /** + * Enum representing the order in which varyings are packed within a + * packing class. + * + * Currently we pack vec4's first, then vec2's, then scalar values, then + * vec3's. This order ensures that the only vectors that are at risk of + * having to be "double parked" (split between two adjacent varying slots) + * are the vec3's. + */ + enum packing_order_enum { + PACKING_ORDER_VEC4, + PACKING_ORDER_VEC2, + PACKING_ORDER_SCALAR, + PACKING_ORDER_VEC3, + }; + + static unsigned compute_packing_class(const ir_variable *var); + static packing_order_enum compute_packing_order(const ir_variable *var); + static int match_comparator(const void *x_generic, const void *y_generic); + + /** + * Structure recording the relationship between a single producer output + * and a single consumer input. + */ + struct match { + /** + * Packing class for this varying, computed by compute_packing_class(). + */ + unsigned packing_class; + + /** + * Packing order for this varying, computed by compute_packing_order(). + */ + packing_order_enum packing_order; + unsigned num_components; + + /** + * The output variable in the producer stage. + */ + ir_variable *producer_var; + + /** + * The input variable in the consumer stage. + */ + ir_variable *consumer_var; + + /** + * The location which has been assigned for this varying. This is + * expressed in multiples of a float, with the first generic varying + * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the + * value 0. + */ + unsigned generic_location; + } *matches; + + /** + * The number of elements in the \c matches array that are currently in + * use. + */ + unsigned num_matches; + + /** + * The number of elements that were set aside for the \c matches array when + * it was allocated. + */ + unsigned matches_capacity; + + gl_shader_stage producer_stage; + gl_shader_stage consumer_stage; +}; + +} /* anonymous namespace */ + +varying_matches::varying_matches(bool disable_varying_packing, + gl_shader_stage producer_stage, + gl_shader_stage consumer_stage) + : disable_varying_packing(disable_varying_packing), + producer_stage(producer_stage), + consumer_stage(consumer_stage) +{ + /* Note: this initial capacity is rather arbitrarily chosen to be large + * enough for many cases without wasting an unreasonable amount of space. + * varying_matches::record() will resize the array if there are more than + * this number of varyings. + */ + this->matches_capacity = 8; + this->matches = (match *) + malloc(sizeof(*this->matches) * this->matches_capacity); + this->num_matches = 0; +} + + +varying_matches::~varying_matches() +{ + free(this->matches); +} + + +/** + * Record the given producer/consumer variable pair in the list of variables + * that should later be assigned locations. + * + * It is permissible for \c consumer_var to be NULL (this happens if a + * variable is output by the producer and consumed by transform feedback, but + * not consumed by the consumer). + * + * If \c producer_var has already been paired up with a consumer_var, or + * producer_var is part of fixed pipeline functionality (and hence already has + * a location assigned), this function has no effect. + * + * Note: as a side effect this function may change the interpolation type of + * \c producer_var, but only when the change couldn't possibly affect + * rendering. + */ +void +varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) +{ + assert(producer_var != NULL || consumer_var != NULL); + + if ((producer_var && (!producer_var->data.is_unmatched_generic_inout || + producer_var->data.explicit_location)) || + (consumer_var && (!consumer_var->data.is_unmatched_generic_inout || + consumer_var->data.explicit_location))) { + /* Either a location already exists for this variable (since it is part + * of fixed functionality), or it has already been recorded as part of a + * previous match. + */ + return; + } + + if ((consumer_var == NULL && producer_var->type->contains_integer()) || + (consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) { + /* Since this varying is not being consumed by the fragment shader, its + * interpolation type varying cannot possibly affect rendering. + * Also, this variable is non-flat and is (or contains) an integer. + * If the consumer stage is unknown, don't modify the interpolation + * type as it could affect rendering later with separate shaders. + * + * lower_packed_varyings requires all integer varyings to flat, + * regardless of where they appear. We can trivially satisfy that + * requirement by changing the interpolation type to flat here. + */ + if (producer_var) { + producer_var->data.centroid = false; + producer_var->data.sample = false; + producer_var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + + if (consumer_var) { + consumer_var->data.centroid = false; + consumer_var->data.sample = false; + consumer_var->data.interpolation = INTERP_QUALIFIER_FLAT; + } + } + + if (this->num_matches == this->matches_capacity) { + this->matches_capacity *= 2; + this->matches = (match *) + realloc(this->matches, + sizeof(*this->matches) * this->matches_capacity); + } + + const ir_variable *const var = (producer_var != NULL) + ? producer_var : consumer_var; + const gl_shader_stage stage = (producer_var != NULL) + ? producer_stage : consumer_stage; + const glsl_type *type = get_varying_type(var, stage); + + this->matches[this->num_matches].packing_class + = this->compute_packing_class(var); + this->matches[this->num_matches].packing_order + = this->compute_packing_order(var); + if (this->disable_varying_packing) { + unsigned slots = type->count_attribute_slots(false); + this->matches[this->num_matches].num_components = slots * 4; + } else { + this->matches[this->num_matches].num_components + = type->component_slots(); + } + this->matches[this->num_matches].producer_var = producer_var; + this->matches[this->num_matches].consumer_var = consumer_var; + this->num_matches++; + if (producer_var) + producer_var->data.is_unmatched_generic_inout = 0; + if (consumer_var) + consumer_var->data.is_unmatched_generic_inout = 0; +} + + +/** + * Choose locations for all of the variable matches that were previously + * passed to varying_matches::record(). + */ +unsigned +varying_matches::assign_locations(struct gl_shader_program *prog, + uint64_t reserved_slots, + bool separate_shader) +{ + /* We disable varying sorting for separate shader programs for the + * following reasons: + * + * 1/ All programs must sort the code in the same order to guarantee the + * interface matching. However varying_matches::record() will change the + * interpolation qualifier of some stages. + * + * 2/ GLSL version 4.50 removes the matching constrain on the interpolation + * qualifier. + * + * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec: + * + * "The type and presence of interpolation qualifiers of variables with + * the same name declared in all linked shaders for the same cross-stage + * interface must match, otherwise the link command will fail. + * + * When comparing an output from one stage to an input of a subsequent + * stage, the input and output don't match if their interpolation + * qualifiers (or lack thereof) are not the same." + * + * "It is a link-time error if, within the same stage, the interpolation + * qualifiers of variables of the same name do not match." + */ + if (!separate_shader) { + /* Sort varying matches into an order that makes them easy to pack. */ + qsort(this->matches, this->num_matches, sizeof(*this->matches), + &varying_matches::match_comparator); + } + + unsigned generic_location = 0; + unsigned generic_patch_location = MAX_VARYING*4; + + for (unsigned i = 0; i < this->num_matches; i++) { + unsigned *location = &generic_location; + + const ir_variable *var; + const glsl_type *type; + bool is_vertex_input = false; + if (matches[i].consumer_var) { + var = matches[i].consumer_var; + type = get_varying_type(var, consumer_stage); + if (consumer_stage == MESA_SHADER_VERTEX) + is_vertex_input = true; + } else { + var = matches[i].producer_var; + type = get_varying_type(var, producer_stage); + } + + if (var->data.patch) + location = &generic_patch_location; + + /* Advance to the next slot if this varying has a different packing + * class than the previous one, and we're not already on a slot + * boundary. + */ + if (i > 0 && + this->matches[i - 1].packing_class + != this->matches[i].packing_class) { + *location = ALIGN(*location, 4); + } + + unsigned num_elements = type->count_attribute_slots(is_vertex_input); + unsigned slot_end = this->disable_varying_packing ? 4 : + type->without_array()->vector_elements; + slot_end += *location - 1; + + /* FIXME: We could be smarter in the below code and loop back over + * trying to fill any locations that we skipped because we couldn't pack + * the varying between an explicit location. For now just let the user + * hit the linking error if we run out of room and suggest they use + * explicit locations. + */ + for (unsigned j = 0; j < num_elements; j++) { + while ((slot_end < MAX_VARYING * 4u) && + ((reserved_slots & (UINT64_C(1) << *location / 4u) || + (reserved_slots & (UINT64_C(1) << slot_end / 4u))))) { + + *location = ALIGN(*location + 1, 4); + slot_end = *location; + + /* reset the counter and try again */ + j = 0; + } + + /* Increase the slot to make sure there is enough room for next + * array element. + */ + if (this->disable_varying_packing) + slot_end += 4; + else + slot_end += type->without_array()->vector_elements; + } + + if (!var->data.patch && *location >= MAX_VARYING * 4u) { + linker_error(prog, "insufficient contiguous locations available for " + "%s it is possible an array or struct could not be " + "packed between varyings with explicit locations. Try " + "using an explicit location for arrays and structs.", + var->name); + } + + this->matches[i].generic_location = *location; + + *location += this->matches[i].num_components; + } + + return (generic_location + 3) / 4; +} + + +/** + * Update the producer and consumer shaders to reflect the locations + * assignments that were made by varying_matches::assign_locations(). + */ +void +varying_matches::store_locations() const +{ + for (unsigned i = 0; i < this->num_matches; i++) { + ir_variable *producer_var = this->matches[i].producer_var; + ir_variable *consumer_var = this->matches[i].consumer_var; + unsigned generic_location = this->matches[i].generic_location; + unsigned slot = generic_location / 4; + unsigned offset = generic_location % 4; + + if (producer_var) { + producer_var->data.location = VARYING_SLOT_VAR0 + slot; + producer_var->data.location_frac = offset; + } + + if (consumer_var) { + assert(consumer_var->data.location == -1); + consumer_var->data.location = VARYING_SLOT_VAR0 + slot; + consumer_var->data.location_frac = offset; + } + } +} + + +/** + * Compute the "packing class" of the given varying. This is an unsigned + * integer with the property that two variables in the same packing class can + * be safely backed into the same vec4. + */ +unsigned +varying_matches::compute_packing_class(const ir_variable *var) +{ + /* Without help from the back-end, there is no way to pack together + * variables with different interpolation types, because + * lower_packed_varyings must choose exactly one interpolation type for + * each packed varying it creates. + * + * However, we can safely pack together floats, ints, and uints, because: + * + * - varyings of base type "int" and "uint" must use the "flat" + * interpolation type, which can only occur in GLSL 1.30 and above. + * + * - On platforms that support GLSL 1.30 and above, lower_packed_varyings + * can store flat floats as ints without losing any information (using + * the ir_unop_bitcast_* opcodes). + * + * Therefore, the packing class depends only on the interpolation type. + */ + unsigned packing_class = var->data.centroid | (var->data.sample << 1) | + (var->data.patch << 2); + packing_class *= 4; + packing_class += var->data.interpolation; + return packing_class; +} + + +/** + * Compute the "packing order" of the given varying. This is a sort key we + * use to determine when to attempt to pack the given varying relative to + * other varyings in the same packing class. + */ +varying_matches::packing_order_enum +varying_matches::compute_packing_order(const ir_variable *var) +{ + const glsl_type *element_type = var->type; + + while (element_type->base_type == GLSL_TYPE_ARRAY) { + element_type = element_type->fields.array; + } + + switch (element_type->component_slots() % 4) { + case 1: return PACKING_ORDER_SCALAR; + case 2: return PACKING_ORDER_VEC2; + case 3: return PACKING_ORDER_VEC3; + case 0: return PACKING_ORDER_VEC4; + default: + assert(!"Unexpected value of vector_elements"); + return PACKING_ORDER_VEC4; + } +} + + +/** + * Comparison function passed to qsort() to sort varyings by packing_class and + * then by packing_order. + */ +int +varying_matches::match_comparator(const void *x_generic, const void *y_generic) +{ + const match *x = (const match *) x_generic; + const match *y = (const match *) y_generic; + + if (x->packing_class != y->packing_class) + return x->packing_class - y->packing_class; + return x->packing_order - y->packing_order; +} + + +/** + * Is the given variable a varying variable to be counted against the + * limit in ctx->Const.MaxVarying? + * This includes variables such as texcoords, colors and generic + * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord. + */ +static bool +var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var) +{ + /* Only fragment shaders will take a varying variable as an input */ + if (stage == MESA_SHADER_FRAGMENT && + var->data.mode == ir_var_shader_in) { + switch (var->data.location) { + case VARYING_SLOT_POS: + case VARYING_SLOT_FACE: + case VARYING_SLOT_PNTC: + return false; + default: + return true; + } + } + return false; +} + + +/** + * Visitor class that generates tfeedback_candidate structs describing all + * possible targets of transform feedback. + * + * tfeedback_candidate structs are stored in the hash table + * tfeedback_candidates, which is passed to the constructor. This hash table + * maps varying names to instances of the tfeedback_candidate struct. + */ +class tfeedback_candidate_generator : public program_resource_visitor +{ +public: + tfeedback_candidate_generator(void *mem_ctx, + hash_table *tfeedback_candidates) + : mem_ctx(mem_ctx), + tfeedback_candidates(tfeedback_candidates), + toplevel_var(NULL), + varying_floats(0) + { + } + + void process(ir_variable *var) + { + /* All named varying interface blocks should be flattened by now */ + assert(!var->is_interface_instance()); + + this->toplevel_var = var; + this->varying_floats = 0; + program_resource_visitor::process(var); + } + +private: + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) + { + assert(!type->without_array()->is_record()); + assert(!type->without_array()->is_interface()); + + (void) row_major; + + tfeedback_candidate *candidate + = rzalloc(this->mem_ctx, tfeedback_candidate); + candidate->toplevel_var = this->toplevel_var; + candidate->type = type; + candidate->offset = this->varying_floats; + hash_table_insert(this->tfeedback_candidates, candidate, + ralloc_strdup(this->mem_ctx, name)); + this->varying_floats += type->component_slots(); + } + + /** + * Memory context used to allocate hash table keys and values. + */ + void * const mem_ctx; + + /** + * Hash table in which tfeedback_candidate objects should be stored. + */ + hash_table * const tfeedback_candidates; + + /** + * Pointer to the toplevel variable that is being traversed. + */ + ir_variable *toplevel_var; + + /** + * Total number of varying floats that have been visited so far. This is + * used to determine the offset to each varying within the toplevel + * variable. + */ + unsigned varying_floats; +}; + + +namespace linker { + +bool +populate_consumer_input_sets(void *mem_ctx, exec_list *ir, + hash_table *consumer_inputs, + hash_table *consumer_interface_inputs, + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) +{ + memset(consumer_inputs_with_locations, + 0, + sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX); + + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const input_var = node->as_variable(); + + if ((input_var != NULL) && (input_var->data.mode == ir_var_shader_in)) { + if (input_var->type->is_interface()) + return false; + + if (input_var->data.explicit_location) { + /* assign_varying_locations only cares about finding the + * ir_variable at the start of a contiguous location block. + * + * - For !producer, consumer_inputs_with_locations isn't used. + * + * - For !consumer, consumer_inputs_with_locations is empty. + * + * For consumer && producer, if you were trying to set some + * ir_variable to the middle of a location block on the other side + * of producer/consumer, cross_validate_outputs_to_inputs() should + * be link-erroring due to either type mismatch or location + * overlaps. If the variables do match up, then they've got a + * matching data.location and you only looked at + * consumer_inputs_with_locations[var->data.location], not any + * following entries for the array/structure. + */ + consumer_inputs_with_locations[input_var->data.location] = + input_var; + } else if (input_var->get_interface_type() != NULL) { + char *const iface_field_name = + ralloc_asprintf(mem_ctx, "%s.%s", + input_var->get_interface_type()->name, + input_var->name); + hash_table_insert(consumer_interface_inputs, input_var, + iface_field_name); + } else { + hash_table_insert(consumer_inputs, input_var, + ralloc_strdup(mem_ctx, input_var->name)); + } + } + } + + return true; +} + +/** + * Find a variable from the consumer that "matches" the specified variable + * + * This function only finds inputs with names that match. There is no + * validation (here) that the types, etc. are compatible. + */ +ir_variable * +get_matching_input(void *mem_ctx, + const ir_variable *output_var, + hash_table *consumer_inputs, + hash_table *consumer_interface_inputs, + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) +{ + ir_variable *input_var; + + if (output_var->data.explicit_location) { + input_var = consumer_inputs_with_locations[output_var->data.location]; + } else if (output_var->get_interface_type() != NULL) { + char *const iface_field_name = + ralloc_asprintf(mem_ctx, "%s.%s", + output_var->get_interface_type()->name, + output_var->name); + input_var = + (ir_variable *) hash_table_find(consumer_interface_inputs, + iface_field_name); + } else { + input_var = + (ir_variable *) hash_table_find(consumer_inputs, output_var->name); + } + + return (input_var == NULL || input_var->data.mode != ir_var_shader_in) + ? NULL : input_var; +} + +} + +static int +io_variable_cmp(const void *_a, const void *_b) +{ + const ir_variable *const a = *(const ir_variable **) _a; + const ir_variable *const b = *(const ir_variable **) _b; + + if (a->data.explicit_location && b->data.explicit_location) + return b->data.location - a->data.location; + + if (a->data.explicit_location && !b->data.explicit_location) + return 1; + + if (!a->data.explicit_location && b->data.explicit_location) + return -1; + + return -strcmp(a->name, b->name); +} + +/** + * Sort the shader IO variables into canonical order + */ +static void +canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) +{ + ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4]; + unsigned num_variables = 0; + + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode) + continue; + + /* If we have already encountered more I/O variables that could + * successfully link, bail. + */ + if (num_variables == ARRAY_SIZE(var_table)) + return; + + var_table[num_variables++] = var; + } + + if (num_variables == 0) + return; + + /* Sort the list in reverse order (io_variable_cmp handles this). Later + * we're going to push the variables on to the IR list as a stack, so we + * want the last variable (in canonical order) to be first in the list. + */ + qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp); + + /* Remove the variable from it's current location in the IR, and put it at + * the front. + */ + for (unsigned i = 0; i < num_variables; i++) { + var_table[i]->remove(); + ir->push_head(var_table[i]); + } +} + +/** + * Generate a bitfield map of the explicit locations for shader varyings. + * + * In theory a 32 bits value will be enough but a 64 bits value is future proof. + */ +uint64_t +reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode) +{ + assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); + assert(MAX_VARYING <= 64); /* avoid an overflow of the returned value */ + + uint64_t slots = 0; + int var_slot; + + if (!stage) + return slots; + + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode || + !var->data.explicit_location || + var->data.location < VARYING_SLOT_VAR0) + continue; + + var_slot = var->data.location - VARYING_SLOT_VAR0; + + unsigned num_elements = get_varying_type(var, stage->Stage) + ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX); + for (unsigned i = 0; i < num_elements; i++) { + if (var_slot >= 0 && var_slot < MAX_VARYING) + slots |= UINT64_C(1) << var_slot; + var_slot += 1; + } + } + + return slots; +} + + +/** + * Assign locations for all variables that are produced in one pipeline stage + * (the "producer") and consumed in the next stage (the "consumer"). + * + * Variables produced by the producer may also be consumed by transform + * feedback. + * + * \param num_tfeedback_decls is the number of declarations indicating + * variables that may be consumed by transform feedback. + * + * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects + * representing the result of parsing the strings passed to + * glTransformFeedbackVaryings(). assign_location() will be called for + * each of these objects that matches one of the outputs of the + * producer. + * + * When num_tfeedback_decls is nonzero, it is permissible for the consumer to + * be NULL. In this case, varying locations are assigned solely based on the + * requirements of transform feedback. + */ +bool +assign_varying_locations(struct gl_context *ctx, + void *mem_ctx, + struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls) +{ + if (ctx->Const.DisableVaryingPacking) { + /* Transform feedback code assumes varyings are packed, so if the driver + * has disabled varying packing, make sure it does not support transform + * feedback. + */ + assert(!ctx->Extensions.EXT_transform_feedback); + } + + /* Tessellation shaders treat inputs and outputs as shared memory and can + * access inputs and outputs of other invocations. + * Therefore, they can't be lowered to temps easily (and definitely not + * efficiently). + */ + bool disable_varying_packing = + ctx->Const.DisableVaryingPacking || + (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || + (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || + (producer && producer->Stage == MESA_SHADER_TESS_CTRL); + + varying_matches matches(disable_varying_packing, + producer ? producer->Stage : (gl_shader_stage)-1, + consumer ? consumer->Stage : (gl_shader_stage)-1); + hash_table *tfeedback_candidates + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); + hash_table *consumer_inputs + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); + hash_table *consumer_interface_inputs + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = { + NULL, + }; + + unsigned consumer_vertices = 0; + if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY) + consumer_vertices = prog->Geom.VerticesIn; + + /* Operate in a total of four passes. + * + * 1. Sort inputs / outputs into a canonical order. This is necessary so + * that inputs / outputs of separable shaders will be assigned + * predictable locations regardless of the order in which declarations + * appeared in the shader source. + * + * 2. Assign locations for any matching inputs and outputs. + * + * 3. Mark output variables in the producer that do not have locations as + * not being outputs. This lets the optimizer eliminate them. + * + * 4. Mark input variables in the consumer that do not have locations as + * not being inputs. This lets the optimizer eliminate them. + */ + if (consumer) + canonicalize_shader_io(consumer->ir, ir_var_shader_in); + + if (producer) + canonicalize_shader_io(producer->ir, ir_var_shader_out); + + if (consumer + && !linker::populate_consumer_input_sets(mem_ctx, + consumer->ir, + consumer_inputs, + consumer_interface_inputs, + consumer_inputs_with_locations)) { + assert(!"populate_consumer_input_sets failed"); + hash_table_dtor(tfeedback_candidates); + hash_table_dtor(consumer_inputs); + hash_table_dtor(consumer_interface_inputs); + return false; + } + + if (producer) { + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *const output_var = node->as_variable(); + + if ((output_var == NULL) || + (output_var->data.mode != ir_var_shader_out)) + continue; + + /* Only geometry shaders can use non-zero streams */ + assert(output_var->data.stream == 0 || + (output_var->data.stream < MAX_VERTEX_STREAMS && + producer->Stage == MESA_SHADER_GEOMETRY)); + + tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates); + g.process(output_var); + + ir_variable *const input_var = + linker::get_matching_input(mem_ctx, output_var, consumer_inputs, + consumer_interface_inputs, + consumer_inputs_with_locations); + + /* If a matching input variable was found, add this ouptut (and the + * input) to the set. If this is a separable program and there is no + * consumer stage, add the output. + * + * Always add TCS outputs. They are shared by all invocations + * within a patch and can be used as shared memory. + */ + if (input_var || (prog->SeparateShader && consumer == NULL) || + producer->Type == GL_TESS_CONTROL_SHADER) { + matches.record(output_var, input_var); + } + + /* Only stream 0 outputs can be consumed in the next stage */ + if (input_var && output_var->data.stream != 0) { + linker_error(prog, "output %s is assigned to stream=%d but " + "is linked to an input, which requires stream=0", + output_var->name, output_var->data.stream); + return false; + } + } + } else { + /* If there's no producer stage, then this must be a separable program. + * For example, we may have a program that has just a fragment shader. + * Later this program will be used with some arbitrary vertex (or + * geometry) shader program. This means that locations must be assigned + * for all the inputs. + */ + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const input_var = node->as_variable(); + + if ((input_var == NULL) || + (input_var->data.mode != ir_var_shader_in)) + continue; + + matches.record(NULL, input_var); + } + } + + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + if (!tfeedback_decls[i].is_varying()) + continue; + + const tfeedback_candidate *matched_candidate + = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates); + + if (matched_candidate == NULL) { + hash_table_dtor(tfeedback_candidates); + hash_table_dtor(consumer_inputs); + hash_table_dtor(consumer_interface_inputs); + return false; + } + + if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) + matches.record(matched_candidate->toplevel_var, NULL); + } + + const uint64_t reserved_slots = + reserved_varying_slot(producer, ir_var_shader_out) | + reserved_varying_slot(consumer, ir_var_shader_in); + + const unsigned slots_used = matches.assign_locations(prog, reserved_slots, + prog->SeparateShader); + matches.store_locations(); + + for (unsigned i = 0; i < num_tfeedback_decls; ++i) { + if (!tfeedback_decls[i].is_varying()) + continue; + + if (!tfeedback_decls[i].assign_location(ctx, prog)) { + hash_table_dtor(tfeedback_candidates); + hash_table_dtor(consumer_inputs); + hash_table_dtor(consumer_interface_inputs); + return false; + } + } + + hash_table_dtor(tfeedback_candidates); + hash_table_dtor(consumer_inputs); + hash_table_dtor(consumer_interface_inputs); + + if (consumer && producer) { + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const var = node->as_variable(); + + if (var && var->data.mode == ir_var_shader_in && + var->data.is_unmatched_generic_inout) { + if (prog->IsES) { + /* + * On Page 91 (Page 97 of the PDF) of the GLSL ES 1.0 spec: + * + * If the vertex shader declares but doesn't write to a + * varying and the fragment shader declares and reads it, + * is this an error? + * + * RESOLUTION: No. + */ + linker_warning(prog, "%s shader varying %s not written " + "by %s shader\n.", + _mesa_shader_stage_to_string(consumer->Stage), + var->name, + _mesa_shader_stage_to_string(producer->Stage)); + } else if (prog->Version <= 120) { + /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: + * + * Only those varying variables used (i.e. read) in + * the fragment shader executable must be written to + * by the vertex shader executable; declaring + * superfluous varying variables in a vertex shader is + * permissible. + * + * We interpret this text as meaning that the VS must + * write the variable for the FS to read it. See + * "glsl1-varying read but not written" in piglit. + */ + linker_error(prog, "%s shader varying %s not written " + "by %s shader\n.", + _mesa_shader_stage_to_string(consumer->Stage), + var->name, + _mesa_shader_stage_to_string(producer->Stage)); + } + } + } + + /* Now that validation is done its safe to remove unused varyings. As + * we have both a producer and consumer its safe to remove unused + * varyings even if the program is a SSO because the stages are being + * linked together i.e. we have a multi-stage SSO. + */ + remove_unused_shader_inputs_and_outputs(false, producer, + ir_var_shader_out); + remove_unused_shader_inputs_and_outputs(false, consumer, + ir_var_shader_in); + } + + if (!disable_varying_packing) { + if (producer) { + lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, + 0, producer); + } + if (consumer) { + lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, + consumer_vertices, consumer); + } + } + + return true; +} + +bool +check_against_output_limit(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader *producer) +{ + unsigned output_vectors = 0; + + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *const var = node->as_variable(); + + if (var && var->data.mode == ir_var_shader_out && + var_counts_against_varying_limit(producer->Stage, var)) { + /* outputs for fragment shader can't be doubles */ + output_vectors += var->type->count_attribute_slots(false); + } + } + + assert(producer->Stage != MESA_SHADER_FRAGMENT); + unsigned max_output_components = + ctx->Const.Program[producer->Stage].MaxOutputComponents; + + const unsigned output_components = output_vectors * 4; + if (output_components > max_output_components) { + if (ctx->API == API_OPENGLES2 || prog->IsES) + linker_error(prog, "%s shader uses too many output vectors " + "(%u > %u)\n", + _mesa_shader_stage_to_string(producer->Stage), + output_vectors, + max_output_components / 4); + else + linker_error(prog, "%s shader uses too many output components " + "(%u > %u)\n", + _mesa_shader_stage_to_string(producer->Stage), + output_components, + max_output_components); + + return false; + } + + return true; +} + +bool +check_against_input_limit(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader *consumer) +{ + unsigned input_vectors = 0; + + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const var = node->as_variable(); + + if (var && var->data.mode == ir_var_shader_in && + var_counts_against_varying_limit(consumer->Stage, var)) { + /* vertex inputs aren't varying counted */ + input_vectors += var->type->count_attribute_slots(false); + } + } + + assert(consumer->Stage != MESA_SHADER_VERTEX); + unsigned max_input_components = + ctx->Const.Program[consumer->Stage].MaxInputComponents; + + const unsigned input_components = input_vectors * 4; + if (input_components > max_input_components) { + if (ctx->API == API_OPENGLES2 || prog->IsES) + linker_error(prog, "%s shader uses too many input vectors " + "(%u > %u)\n", + _mesa_shader_stage_to_string(consumer->Stage), + input_vectors, + max_input_components / 4); + else + linker_error(prog, "%s shader uses too many input components " + "(%u > %u)\n", + _mesa_shader_stage_to_string(consumer->Stage), + input_components, + max_input_components); + + return false; + } + + return true; +} diff --git a/src/compiler/glsl/link_varyings.h b/src/compiler/glsl/link_varyings.h new file mode 100644 index 00000000000..b2812614ecc --- /dev/null +++ b/src/compiler/glsl/link_varyings.h @@ -0,0 +1,299 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_LINK_VARYINGS_H +#define GLSL_LINK_VARYINGS_H + +/** + * \file link_varyings.h + * + * Linker functions related specifically to linking varyings between shader + * stages. + */ + + +#include "main/glheader.h" + + +struct gl_shader_program; +struct gl_shader; +class ir_variable; + + +/** + * Data structure describing a varying which is available for use in transform + * feedback. + * + * For example, if the vertex shader contains: + * + * struct S { + * vec4 foo; + * float[3] bar; + * }; + * + * varying S[2] v; + * + * Then there would be tfeedback_candidate objects corresponding to the + * following varyings: + * + * v[0].foo + * v[0].bar + * v[1].foo + * v[1].bar + */ +struct tfeedback_candidate +{ + /** + * Toplevel variable containing this varying. In the above example, this + * would point to the declaration of the varying v. + */ + ir_variable *toplevel_var; + + /** + * Type of this varying. In the above example, this would point to the + * glsl_type for "vec4" or "float[3]". + */ + const glsl_type *type; + + /** + * Offset within the toplevel variable where this varying occurs (counted + * in multiples of the size of a float). + */ + unsigned offset; +}; + + +/** + * Data structure tracking information about a transform feedback declaration + * during linking. + */ +class tfeedback_decl +{ +public: + void init(struct gl_context *ctx, const void *mem_ctx, const char *input); + static bool is_same(const tfeedback_decl &x, const tfeedback_decl &y); + bool assign_location(struct gl_context *ctx, + struct gl_shader_program *prog); + unsigned get_num_outputs() const; + bool store(struct gl_context *ctx, struct gl_shader_program *prog, + struct gl_transform_feedback_info *info, unsigned buffer, + const unsigned max_outputs) const; + const tfeedback_candidate *find_candidate(gl_shader_program *prog, + hash_table *tfeedback_candidates); + + bool is_next_buffer_separator() const + { + return this->next_buffer_separator; + } + + bool is_varying() const + { + return !this->next_buffer_separator && !this->skip_components; + } + + const char *name() const + { + return this->orig_name; + } + + unsigned get_stream_id() const + { + return this->stream_id; + } + + /** + * The total number of varying components taken up by this variable. Only + * valid if assign_location() has been called. + */ + unsigned num_components() const + { + if (this->lowered_builtin_array_variable) + return this->size; + else + return this->vector_elements * this->matrix_columns * this->size * + (this->is_double() ? 2 : 1); + } + + unsigned get_location() const { + return this->location; + } + +private: + + bool is_double() const + { + switch (this->type) { + case GL_DOUBLE: + case GL_DOUBLE_VEC2: + case GL_DOUBLE_VEC3: + case GL_DOUBLE_VEC4: + case GL_DOUBLE_MAT2: + case GL_DOUBLE_MAT2x3: + case GL_DOUBLE_MAT2x4: + case GL_DOUBLE_MAT3: + case GL_DOUBLE_MAT3x2: + case GL_DOUBLE_MAT3x4: + case GL_DOUBLE_MAT4: + case GL_DOUBLE_MAT4x2: + case GL_DOUBLE_MAT4x3: + return true; + default: + return false; + } + } + + /** + * The name that was supplied to glTransformFeedbackVaryings. Used for + * error reporting and glGetTransformFeedbackVarying(). + */ + const char *orig_name; + + /** + * The name of the variable, parsed from orig_name. + */ + const char *var_name; + + /** + * True if the declaration in orig_name represents an array. + */ + bool is_subscripted; + + /** + * If is_subscripted is true, the subscript that was specified in orig_name. + */ + unsigned array_subscript; + + /** + * Non-zero if the variable is gl_ClipDistance, glTessLevelOuter or + * gl_TessLevelInner and the driver lowers it to gl_*MESA. + */ + enum { + none, + clip_distance, + tess_level_outer, + tess_level_inner, + } lowered_builtin_array_variable; + + /** + * The vertex shader output location that the linker assigned for this + * variable. -1 if a location hasn't been assigned yet. + */ + int location; + + /** + * If non-zero, then this variable may be packed along with other variables + * into a single varying slot, so this offset should be applied when + * accessing components. For example, an offset of 1 means that the x + * component of this variable is actually stored in component y of the + * location specified by \c location. + * + * Only valid if location != -1. + */ + unsigned location_frac; + + /** + * If location != -1, the number of vector elements in this variable, or 1 + * if this variable is a scalar. + */ + unsigned vector_elements; + + /** + * If location != -1, the number of matrix columns in this variable, or 1 + * if this variable is not a matrix. + */ + unsigned matrix_columns; + + /** Type of the varying returned by glGetTransformFeedbackVarying() */ + GLenum type; + + /** + * If location != -1, the size that should be returned by + * glGetTransformFeedbackVarying(). + */ + unsigned size; + + /** + * How many components to skip. If non-zero, this is + * gl_SkipComponents{1,2,3,4} from ARB_transform_feedback3. + */ + unsigned skip_components; + + /** + * Whether this is gl_NextBuffer from ARB_transform_feedback3. + */ + bool next_buffer_separator; + + /** + * If find_candidate() has been called, pointer to the tfeedback_candidate + * data structure that was found. Otherwise NULL. + */ + const tfeedback_candidate *matched_candidate; + + /** + * StreamId assigned to this varying (defaults to 0). Can only be set to + * values other than 0 in geometry shaders that use the stream layout + * modifier. Accepted values must be in the range [0, MAX_VERTEX_STREAMS-1]. + */ + unsigned stream_id; +}; + + +void +cross_validate_outputs_to_inputs(struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer); + +bool +parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, + const void *mem_ctx, unsigned num_names, + char **varying_names, tfeedback_decl *decls); + +void +remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, + gl_shader *sh, + enum ir_variable_mode mode); + +bool +store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls); + +bool +assign_varying_locations(struct gl_context *ctx, + void *mem_ctx, + struct gl_shader_program *prog, + gl_shader *producer, gl_shader *consumer, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls); + +bool +check_against_output_limit(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader *producer); + +bool +check_against_input_limit(struct gl_context *ctx, + struct gl_shader_program *prog, + gl_shader *consumer); + +#endif /* GLSL_LINK_VARYINGS_H */ diff --git a/src/compiler/glsl/linker.cpp b/src/compiler/glsl/linker.cpp new file mode 100644 index 00000000000..6657777d74c --- /dev/null +++ b/src/compiler/glsl/linker.cpp @@ -0,0 +1,4676 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file linker.cpp + * GLSL linker implementation + * + * Given a set of shaders that are to be linked to generate a final program, + * there are three distinct stages. + * + * In the first stage shaders are partitioned into groups based on the shader + * type. All shaders of a particular type (e.g., vertex shaders) are linked + * together. + * + * - Undefined references in each shader are resolve to definitions in + * another shader. + * - Types and qualifiers of uniforms, outputs, and global variables defined + * in multiple shaders with the same name are verified to be the same. + * - Initializers for uniforms and global variables defined + * in multiple shaders with the same name are verified to be the same. + * + * The result, in the terminology of the GLSL spec, is a set of shader + * executables for each processing unit. + * + * After the first stage is complete, a series of semantic checks are performed + * on each of the shader executables. + * + * - Each shader executable must define a \c main function. + * - Each vertex shader executable must write to \c gl_Position. + * - Each fragment shader executable must write to either \c gl_FragData or + * \c gl_FragColor. + * + * In the final stage individual shader executables are linked to create a + * complete exectuable. + * + * - Types of uniforms defined in multiple shader stages with the same name + * are verified to be the same. + * - Initializers for uniforms defined in multiple shader stages with the + * same name are verified to be the same. + * - Types and qualifiers of outputs defined in one stage are verified to + * be the same as the types and qualifiers of inputs defined with the same + * name in a later stage. + * + * \author Ian Romanick + */ + +#include +#include "util/strndup.h" +#include "main/core.h" +#include "glsl_symbol_table.h" +#include "glsl_parser_extras.h" +#include "ir.h" +#include "program.h" +#include "program/hash_table.h" +#include "linker.h" +#include "link_varyings.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" +#include "ir_uniform.h" + +#include "main/shaderobj.h" +#include "main/enums.h" + + +void linker_error(gl_shader_program *, const char *, ...); + +namespace { + +/** + * Visitor that determines whether or not a variable is ever written. + */ +class find_assignment_visitor : public ir_hierarchical_visitor { +public: + find_assignment_visitor(const char *name) + : name(name), found(false) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_assignment *ir) + { + ir_variable *const var = ir->lhs->variable_referenced(); + + if (strcmp(name, var->name) == 0) { + found = true; + return visit_stop; + } + + return visit_continue_with_parent; + } + + virtual ir_visitor_status visit_enter(ir_call *ir) + { + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_rvalue *param_rval = (ir_rvalue *) actual_node; + ir_variable *sig_param = (ir_variable *) formal_node; + + if (sig_param->data.mode == ir_var_function_out || + sig_param->data.mode == ir_var_function_inout) { + ir_variable *var = param_rval->variable_referenced(); + if (var && strcmp(name, var->name) == 0) { + found = true; + return visit_stop; + } + } + } + + if (ir->return_deref != NULL) { + ir_variable *const var = ir->return_deref->variable_referenced(); + + if (strcmp(name, var->name) == 0) { + found = true; + return visit_stop; + } + } + + return visit_continue_with_parent; + } + + bool variable_found() + { + return found; + } + +private: + const char *name; /**< Find writes to a variable with this name. */ + bool found; /**< Was a write to the variable found? */ +}; + + +/** + * Visitor that determines whether or not a variable is ever read. + */ +class find_deref_visitor : public ir_hierarchical_visitor { +public: + find_deref_visitor(const char *name) + : name(name), found(false) + { + /* empty */ + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (strcmp(this->name, ir->var->name) == 0) { + this->found = true; + return visit_stop; + } + + return visit_continue; + } + + bool variable_found() const + { + return this->found; + } + +private: + const char *name; /**< Find writes to a variable with this name. */ + bool found; /**< Was a write to the variable found? */ +}; + + +class geom_array_resize_visitor : public ir_hierarchical_visitor { +public: + unsigned num_vertices; + gl_shader_program *prog; + + geom_array_resize_visitor(unsigned num_vertices, gl_shader_program *prog) + { + this->num_vertices = num_vertices; + this->prog = prog; + } + + virtual ~geom_array_resize_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit(ir_variable *var) + { + if (!var->type->is_array() || var->data.mode != ir_var_shader_in) + return visit_continue; + + unsigned size = var->type->length; + + /* Generate a link error if the shader has declared this array with an + * incorrect size. + */ + if (size && size != this->num_vertices) { + linker_error(this->prog, "size of array %s declared as %u, " + "but number of input vertices is %u\n", + var->name, size, this->num_vertices); + return visit_continue; + } + + /* Generate a link error if the shader attempts to access an input + * array using an index too large for its actual size assigned at link + * time. + */ + if (var->data.max_array_access >= this->num_vertices) { + linker_error(this->prog, "geometry shader accesses element %i of " + "%s, but only %i input vertices\n", + var->data.max_array_access, var->name, this->num_vertices); + return visit_continue; + } + + var->type = glsl_type::get_array_instance(var->type->fields.array, + this->num_vertices); + var->data.max_array_access = this->num_vertices - 1; + + return visit_continue; + } + + /* Dereferences of input variables need to be updated so that their type + * matches the newly assigned type of the variable they are accessing. */ + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + ir->type = ir->var->type; + return visit_continue; + } + + /* Dereferences of 2D input arrays need to be updated so that their type + * matches the newly assigned type of the array they are accessing. */ + virtual ir_visitor_status visit_leave(ir_dereference_array *ir) + { + const glsl_type *const vt = ir->array->type; + if (vt->is_array()) + ir->type = vt->fields.array; + return visit_continue; + } +}; + +class tess_eval_array_resize_visitor : public ir_hierarchical_visitor { +public: + unsigned num_vertices; + gl_shader_program *prog; + + tess_eval_array_resize_visitor(unsigned num_vertices, gl_shader_program *prog) + { + this->num_vertices = num_vertices; + this->prog = prog; + } + + virtual ~tess_eval_array_resize_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit(ir_variable *var) + { + if (!var->type->is_array() || var->data.mode != ir_var_shader_in || var->data.patch) + return visit_continue; + + var->type = glsl_type::get_array_instance(var->type->fields.array, + this->num_vertices); + var->data.max_array_access = this->num_vertices - 1; + + return visit_continue; + } + + /* Dereferences of input variables need to be updated so that their type + * matches the newly assigned type of the variable they are accessing. */ + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + ir->type = ir->var->type; + return visit_continue; + } + + /* Dereferences of 2D input arrays need to be updated so that their type + * matches the newly assigned type of the array they are accessing. */ + virtual ir_visitor_status visit_leave(ir_dereference_array *ir) + { + const glsl_type *const vt = ir->array->type; + if (vt->is_array()) + ir->type = vt->fields.array; + return visit_continue; + } +}; + +class barrier_use_visitor : public ir_hierarchical_visitor { +public: + barrier_use_visitor(gl_shader_program *prog) + : prog(prog), in_main(false), after_return(false), control_flow(0) + { + } + + virtual ~barrier_use_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_function *ir) + { + if (strcmp(ir->name, "main") == 0) + in_main = true; + + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_function *) + { + in_main = false; + after_return = false; + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_return *) + { + after_return = true; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_if *) + { + ++control_flow; + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_if *) + { + --control_flow; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_loop *) + { + ++control_flow; + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_loop *) + { + --control_flow; + return visit_continue; + } + + /* FINISHME: `switch` is not expressed at the IR level -- it's already + * been lowered to a mess of `if`s. We'll correctly disallow any use of + * barrier() in a conditional path within the switch, but not in a path + * which is always hit. + */ + + virtual ir_visitor_status visit_enter(ir_call *ir) + { + if (ir->use_builtin && strcmp(ir->callee_name(), "barrier") == 0) { + /* Use of barrier(); determine if it is legal: */ + if (!in_main) { + linker_error(prog, "Builtin barrier() may only be used in main"); + return visit_stop; + } + + if (after_return) { + linker_error(prog, "Builtin barrier() may not be used after return"); + return visit_stop; + } + + if (control_flow != 0) { + linker_error(prog, "Builtin barrier() may not be used inside control flow"); + return visit_stop; + } + } + return visit_continue; + } + +private: + gl_shader_program *prog; + bool in_main, after_return; + int control_flow; +}; + +/** + * Visitor that determines the highest stream id to which a (geometry) shader + * emits vertices. It also checks whether End{Stream}Primitive is ever called. + */ +class find_emit_vertex_visitor : public ir_hierarchical_visitor { +public: + find_emit_vertex_visitor(int max_allowed) + : max_stream_allowed(max_allowed), + invalid_stream_id(0), + invalid_stream_id_from_emit_vertex(false), + end_primitive_found(false), + uses_non_zero_stream(false) + { + /* empty */ + } + + virtual ir_visitor_status visit_leave(ir_emit_vertex *ir) + { + int stream_id = ir->stream_id(); + + if (stream_id < 0) { + invalid_stream_id = stream_id; + invalid_stream_id_from_emit_vertex = true; + return visit_stop; + } + + if (stream_id > max_stream_allowed) { + invalid_stream_id = stream_id; + invalid_stream_id_from_emit_vertex = true; + return visit_stop; + } + + if (stream_id != 0) + uses_non_zero_stream = true; + + return visit_continue; + } + + virtual ir_visitor_status visit_leave(ir_end_primitive *ir) + { + end_primitive_found = true; + + int stream_id = ir->stream_id(); + + if (stream_id < 0) { + invalid_stream_id = stream_id; + invalid_stream_id_from_emit_vertex = false; + return visit_stop; + } + + if (stream_id > max_stream_allowed) { + invalid_stream_id = stream_id; + invalid_stream_id_from_emit_vertex = false; + return visit_stop; + } + + if (stream_id != 0) + uses_non_zero_stream = true; + + return visit_continue; + } + + bool error() + { + return invalid_stream_id != 0; + } + + const char *error_func() + { + return invalid_stream_id_from_emit_vertex ? + "EmitStreamVertex" : "EndStreamPrimitive"; + } + + int error_stream() + { + return invalid_stream_id; + } + + bool uses_streams() + { + return uses_non_zero_stream; + } + + bool uses_end_primitive() + { + return end_primitive_found; + } + +private: + int max_stream_allowed; + int invalid_stream_id; + bool invalid_stream_id_from_emit_vertex; + bool end_primitive_found; + bool uses_non_zero_stream; +}; + +/* Class that finds array derefs and check if indexes are dynamic. */ +class dynamic_sampler_array_indexing_visitor : public ir_hierarchical_visitor +{ +public: + dynamic_sampler_array_indexing_visitor() : + dynamic_sampler_array_indexing(false) + { + } + + ir_visitor_status visit_enter(ir_dereference_array *ir) + { + if (!ir->variable_referenced()) + return visit_continue; + + if (!ir->variable_referenced()->type->contains_sampler()) + return visit_continue; + + if (!ir->array_index->constant_expression_value()) { + dynamic_sampler_array_indexing = true; + return visit_stop; + } + return visit_continue; + } + + bool uses_dynamic_sampler_array_indexing() + { + return dynamic_sampler_array_indexing; + } + +private: + bool dynamic_sampler_array_indexing; +}; + +} /* anonymous namespace */ + +void +linker_error(gl_shader_program *prog, const char *fmt, ...) +{ + va_list ap; + + ralloc_strcat(&prog->InfoLog, "error: "); + va_start(ap, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); + va_end(ap); + + prog->LinkStatus = false; +} + + +void +linker_warning(gl_shader_program *prog, const char *fmt, ...) +{ + va_list ap; + + ralloc_strcat(&prog->InfoLog, "warning: "); + va_start(ap, fmt); + ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); + va_end(ap); + +} + + +/** + * Given a string identifying a program resource, break it into a base name + * and an optional array index in square brackets. + * + * If an array index is present, \c out_base_name_end is set to point to the + * "[" that precedes the array index, and the array index itself is returned + * as a long. + * + * If no array index is present (or if the array index is negative or + * mal-formed), \c out_base_name_end, is set to point to the null terminator + * at the end of the input string, and -1 is returned. + * + * Only the final array index is parsed; if the string contains other array + * indices (or structure field accesses), they are left in the base name. + * + * No attempt is made to check that the base name is properly formed; + * typically the caller will look up the base name in a hash table, so + * ill-formed base names simply turn into hash table lookup failures. + */ +long +parse_program_resource_name(const GLchar *name, + const GLchar **out_base_name_end) +{ + /* Section 7.3.1 ("Program Interfaces") of the OpenGL 4.3 spec says: + * + * "When an integer array element or block instance number is part of + * the name string, it will be specified in decimal form without a "+" + * or "-" sign or any extra leading zeroes. Additionally, the name + * string will not include white space anywhere in the string." + */ + + const size_t len = strlen(name); + *out_base_name_end = name + len; + + if (len == 0 || name[len-1] != ']') + return -1; + + /* Walk backwards over the string looking for a non-digit character. This + * had better be the opening bracket for an array index. + * + * Initially, i specifies the location of the ']'. Since the string may + * contain only the ']' charcater, walk backwards very carefully. + */ + unsigned i; + for (i = len - 1; (i > 0) && isdigit(name[i-1]); --i) + /* empty */ ; + + if ((i == 0) || name[i-1] != '[') + return -1; + + long array_index = strtol(&name[i], NULL, 10); + if (array_index < 0) + return -1; + + /* Check for leading zero */ + if (name[i] == '0' && name[i+1] != ']') + return -1; + + *out_base_name_end = name + (i - 1); + return array_index; +} + + +void +link_invalidate_variable_locations(exec_list *ir) +{ + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL) + continue; + + /* Only assign locations for variables that lack an explicit location. + * Explicit locations are set for all built-in variables, generic vertex + * shader inputs (via layout(location=...)), and generic fragment shader + * outputs (also via layout(location=...)). + */ + if (!var->data.explicit_location) { + var->data.location = -1; + var->data.location_frac = 0; + } + + /* ir_variable::is_unmatched_generic_inout is used by the linker while + * connecting outputs from one stage to inputs of the next stage. + */ + if (var->data.explicit_location && + var->data.location < VARYING_SLOT_VAR0) { + var->data.is_unmatched_generic_inout = 0; + } else { + var->data.is_unmatched_generic_inout = 1; + } + } +} + + +/** + * Set clip_distance_array_size based on the given shader. + * + * Also check for errors based on incorrect usage of gl_ClipVertex and + * gl_ClipDistance. + * + * Return false if an error was reported. + */ +static void +analyze_clip_usage(struct gl_shader_program *prog, + struct gl_shader *shader, + GLuint *clip_distance_array_size) +{ + *clip_distance_array_size = 0; + + if (!prog->IsES && prog->Version >= 130) { + /* From section 7.1 (Vertex Shader Special Variables) of the + * GLSL 1.30 spec: + * + * "It is an error for a shader to statically write both + * gl_ClipVertex and gl_ClipDistance." + * + * This does not apply to GLSL ES shaders, since GLSL ES defines neither + * gl_ClipVertex nor gl_ClipDistance. + */ + find_assignment_visitor clip_vertex("gl_ClipVertex"); + find_assignment_visitor clip_distance("gl_ClipDistance"); + + clip_vertex.run(shader->ir); + clip_distance.run(shader->ir); + if (clip_vertex.variable_found() && clip_distance.variable_found()) { + linker_error(prog, "%s shader writes to both `gl_ClipVertex' " + "and `gl_ClipDistance'\n", + _mesa_shader_stage_to_string(shader->Stage)); + return; + } + + if (clip_distance.variable_found()) { + ir_variable *clip_distance_var = + shader->symbols->get_variable("gl_ClipDistance"); + + assert(clip_distance_var); + *clip_distance_array_size = clip_distance_var->type->length; + } + } +} + + +/** + * Verify that a vertex shader executable meets all semantic requirements. + * + * Also sets prog->Vert.ClipDistanceArraySize as a side effect. + * + * \param shader Vertex shader executable to be verified + */ +void +validate_vertex_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return; + + /* From the GLSL 1.10 spec, page 48: + * + * "The variable gl_Position is available only in the vertex + * language and is intended for writing the homogeneous vertex + * position. All executions of a well-formed vertex shader + * executable must write a value into this variable. [...] The + * variable gl_Position is available only in the vertex + * language and is intended for writing the homogeneous vertex + * position. All executions of a well-formed vertex shader + * executable must write a value into this variable." + * + * while in GLSL 1.40 this text is changed to: + * + * "The variable gl_Position is available only in the vertex + * language and is intended for writing the homogeneous vertex + * position. It can be written at any time during shader + * execution. It may also be read back by a vertex shader + * after being written. This value will be used by primitive + * assembly, clipping, culling, and other fixed functionality + * operations, if present, that operate on primitives after + * vertex processing has occurred. Its value is undefined if + * the vertex shader executable does not write gl_Position." + * + * All GLSL ES Versions are similar to GLSL 1.40--failing to write to + * gl_Position is not an error. + */ + if (prog->Version < (prog->IsES ? 300 : 140)) { + find_assignment_visitor find("gl_Position"); + find.run(shader->ir); + if (!find.variable_found()) { + if (prog->IsES) { + linker_warning(prog, + "vertex shader does not write to `gl_Position'." + "It's value is undefined. \n"); + } else { + linker_error(prog, + "vertex shader does not write to `gl_Position'. \n"); + } + return; + } + } + + analyze_clip_usage(prog, shader, &prog->Vert.ClipDistanceArraySize); +} + +void +validate_tess_eval_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return; + + analyze_clip_usage(prog, shader, &prog->TessEval.ClipDistanceArraySize); +} + + +/** + * Verify that a fragment shader executable meets all semantic requirements + * + * \param shader Fragment shader executable to be verified + */ +void +validate_fragment_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return; + + find_assignment_visitor frag_color("gl_FragColor"); + find_assignment_visitor frag_data("gl_FragData"); + + frag_color.run(shader->ir); + frag_data.run(shader->ir); + + if (frag_color.variable_found() && frag_data.variable_found()) { + linker_error(prog, "fragment shader writes to both " + "`gl_FragColor' and `gl_FragData'\n"); + } +} + +/** + * Verify that a geometry shader executable meets all semantic requirements + * + * Also sets prog->Geom.VerticesIn, and prog->Geom.ClipDistanceArraySize as + * a side effect. + * + * \param shader Geometry shader executable to be verified + */ +void +validate_geometry_shader_executable(struct gl_shader_program *prog, + struct gl_shader *shader) +{ + if (shader == NULL) + return; + + unsigned num_vertices = vertices_per_prim(prog->Geom.InputType); + prog->Geom.VerticesIn = num_vertices; + + analyze_clip_usage(prog, shader, &prog->Geom.ClipDistanceArraySize); +} + +/** + * Check if geometry shaders emit to non-zero streams and do corresponding + * validations. + */ +static void +validate_geometry_shader_emissions(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) { + find_emit_vertex_visitor emit_vertex(ctx->Const.MaxVertexStreams - 1); + emit_vertex.run(prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->ir); + if (emit_vertex.error()) { + linker_error(prog, "Invalid call %s(%d). Accepted values for the " + "stream parameter are in the range [0, %d].\n", + emit_vertex.error_func(), + emit_vertex.error_stream(), + ctx->Const.MaxVertexStreams - 1); + } + prog->Geom.UsesStreams = emit_vertex.uses_streams(); + prog->Geom.UsesEndPrimitive = emit_vertex.uses_end_primitive(); + + /* From the ARB_gpu_shader5 spec: + * + * "Multiple vertex streams are supported only if the output primitive + * type is declared to be "points". A program will fail to link if it + * contains a geometry shader calling EmitStreamVertex() or + * EndStreamPrimitive() if its output primitive type is not "points". + * + * However, in the same spec: + * + * "The function EmitVertex() is equivalent to calling EmitStreamVertex() + * with set to zero." + * + * And: + * + * "The function EndPrimitive() is equivalent to calling + * EndStreamPrimitive() with set to zero." + * + * Since we can call EmitVertex() and EndPrimitive() when we output + * primitives other than points, calling EmitStreamVertex(0) or + * EmitEndPrimitive(0) should not produce errors. This it also what Nvidia + * does. Currently we only set prog->Geom.UsesStreams to TRUE when + * EmitStreamVertex() or EmitEndPrimitive() are called with a non-zero + * stream. + */ + if (prog->Geom.UsesStreams && prog->Geom.OutputType != GL_POINTS) { + linker_error(prog, "EmitStreamVertex(n) and EndStreamPrimitive(n) " + "with n>0 requires point output\n"); + } + } +} + +bool +validate_intrastage_arrays(struct gl_shader_program *prog, + ir_variable *const var, + ir_variable *const existing) +{ + /* Consider the types to be "the same" if both types are arrays + * of the same type and one of the arrays is implicitly sized. + * In addition, set the type of the linked variable to the + * explicitly sized array. + */ + if (var->type->is_array() && existing->type->is_array()) { + if ((var->type->fields.array == existing->type->fields.array) && + ((var->type->length == 0)|| (existing->type->length == 0))) { + if (var->type->length != 0) { + if (var->type->length <= existing->data.max_array_access) { + linker_error(prog, "%s `%s' declared as type " + "`%s' but outermost dimension has an index" + " of `%i'\n", + mode_string(var), + var->name, var->type->name, + existing->data.max_array_access); + } + existing->type = var->type; + return true; + } else if (existing->type->length != 0) { + if(existing->type->length <= var->data.max_array_access && + !existing->data.from_ssbo_unsized_array) { + linker_error(prog, "%s `%s' declared as type " + "`%s' but outermost dimension has an index" + " of `%i'\n", + mode_string(var), + var->name, existing->type->name, + var->data.max_array_access); + } + return true; + } + } else { + /* The arrays of structs could have different glsl_type pointers but + * they are actually the same type. Use record_compare() to check that. + */ + if (existing->type->fields.array->is_record() && + var->type->fields.array->is_record() && + existing->type->fields.array->record_compare(var->type->fields.array)) + return true; + } + } + return false; +} + + +/** + * Perform validation of global variables used across multiple shaders + */ +void +cross_validate_globals(struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders, + bool uniforms_only) +{ + /* Examine all of the uniforms in all of the shaders and cross validate + * them. + */ + glsl_symbol_table variables; + for (unsigned i = 0; i < num_shaders; i++) { + if (shader_list[i] == NULL) + continue; + + foreach_in_list(ir_instruction, node, shader_list[i]->ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL) + continue; + + if (uniforms_only && (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage)) + continue; + + /* don't cross validate subroutine uniforms */ + if (var->type->contains_subroutine()) + continue; + + /* Don't cross validate temporaries that are at global scope. These + * will eventually get pulled into the shaders 'main'. + */ + if (var->data.mode == ir_var_temporary) + continue; + + /* If a global with this name has already been seen, verify that the + * new instance has the same type. In addition, if the globals have + * initializers, the values of the initializers must be the same. + */ + ir_variable *const existing = variables.get_variable(var->name); + if (existing != NULL) { + /* Check if types match. Interface blocks have some special + * rules so we handle those elsewhere. + */ + if (var->type != existing->type && + !var->is_interface_instance()) { + if (!validate_intrastage_arrays(prog, var, existing)) { + if (var->type->is_record() && existing->type->is_record() + && existing->type->record_compare(var->type)) { + existing->type = var->type; + } else { + /* If it is an unsized array in a Shader Storage Block, + * two different shaders can access to different elements. + * Because of that, they might be converted to different + * sized arrays, then check that they are compatible but + * ignore the array size. + */ + if (!(var->data.mode == ir_var_shader_storage && + var->data.from_ssbo_unsized_array && + existing->data.mode == ir_var_shader_storage && + existing->data.from_ssbo_unsized_array && + var->type->gl_type == existing->type->gl_type)) { + linker_error(prog, "%s `%s' declared as type " + "`%s' and type `%s'\n", + mode_string(var), + var->name, var->type->name, + existing->type->name); + return; + } + } + } + } + + if (var->data.explicit_location) { + if (existing->data.explicit_location + && (var->data.location != existing->data.location)) { + linker_error(prog, "explicit locations for %s " + "`%s' have differing values\n", + mode_string(var), var->name); + return; + } + + existing->data.location = var->data.location; + existing->data.explicit_location = true; + } else { + /* Check if uniform with implicit location was marked explicit + * by earlier shader stage. If so, mark it explicit in this stage + * too to make sure later processing does not treat it as + * implicit one. + */ + if (existing->data.explicit_location) { + var->data.location = existing->data.location; + var->data.explicit_location = true; + } + } + + /* From the GLSL 4.20 specification: + * "A link error will result if two compilation units in a program + * specify different integer-constant bindings for the same + * opaque-uniform name. However, it is not an error to specify a + * binding on some but not all declarations for the same name" + */ + if (var->data.explicit_binding) { + if (existing->data.explicit_binding && + var->data.binding != existing->data.binding) { + linker_error(prog, "explicit bindings for %s " + "`%s' have differing values\n", + mode_string(var), var->name); + return; + } + + existing->data.binding = var->data.binding; + existing->data.explicit_binding = true; + } + + if (var->type->contains_atomic() && + var->data.offset != existing->data.offset) { + linker_error(prog, "offset specifications for %s " + "`%s' have differing values\n", + mode_string(var), var->name); + return; + } + + /* Validate layout qualifiers for gl_FragDepth. + * + * From the AMD/ARB_conservative_depth specs: + * + * "If gl_FragDepth is redeclared in any fragment shader in a + * program, it must be redeclared in all fragment shaders in + * that program that have static assignments to + * gl_FragDepth. All redeclarations of gl_FragDepth in all + * fragment shaders in a single program must have the same set + * of qualifiers." + */ + if (strcmp(var->name, "gl_FragDepth") == 0) { + bool layout_declared = var->data.depth_layout != ir_depth_layout_none; + bool layout_differs = + var->data.depth_layout != existing->data.depth_layout; + + if (layout_declared && layout_differs) { + linker_error(prog, + "All redeclarations of gl_FragDepth in all " + "fragment shaders in a single program must have " + "the same set of qualifiers.\n"); + } + + if (var->data.used && layout_differs) { + linker_error(prog, + "If gl_FragDepth is redeclared with a layout " + "qualifier in any fragment shader, it must be " + "redeclared with the same layout qualifier in " + "all fragment shaders that have assignments to " + "gl_FragDepth\n"); + } + } + + /* Page 35 (page 41 of the PDF) of the GLSL 4.20 spec says: + * + * "If a shared global has multiple initializers, the + * initializers must all be constant expressions, and they + * must all have the same value. Otherwise, a link error will + * result. (A shared global having only one initializer does + * not require that initializer to be a constant expression.)" + * + * Previous to 4.20 the GLSL spec simply said that initializers + * must have the same value. In this case of non-constant + * initializers, this was impossible to determine. As a result, + * no vendor actually implemented that behavior. The 4.20 + * behavior matches the implemented behavior of at least one other + * vendor, so we'll implement that for all GLSL versions. + */ + if (var->constant_initializer != NULL) { + if (existing->constant_initializer != NULL) { + if (!var->constant_initializer->has_value(existing->constant_initializer)) { + linker_error(prog, "initializers for %s " + "`%s' have differing values\n", + mode_string(var), var->name); + return; + } + } else { + /* If the first-seen instance of a particular uniform did not + * have an initializer but a later instance does, copy the + * initializer to the version stored in the symbol table. + */ + /* FINISHME: This is wrong. The constant_value field should + * FINISHME: not be modified! Imagine a case where a shader + * FINISHME: without an initializer is linked in two different + * FINISHME: programs with shaders that have differing + * FINISHME: initializers. Linking with the first will + * FINISHME: modify the shader, and linking with the second + * FINISHME: will fail. + */ + existing->constant_initializer = + var->constant_initializer->clone(ralloc_parent(existing), + NULL); + } + } + + if (var->data.has_initializer) { + if (existing->data.has_initializer + && (var->constant_initializer == NULL + || existing->constant_initializer == NULL)) { + linker_error(prog, + "shared global variable `%s' has multiple " + "non-constant initializers.\n", + var->name); + return; + } + + /* Some instance had an initializer, so keep track of that. In + * this location, all sorts of initializers (constant or + * otherwise) will propagate the existence to the variable + * stored in the symbol table. + */ + existing->data.has_initializer = true; + } + + if (existing->data.invariant != var->data.invariant) { + linker_error(prog, "declarations for %s `%s' have " + "mismatching invariant qualifiers\n", + mode_string(var), var->name); + return; + } + if (existing->data.centroid != var->data.centroid) { + linker_error(prog, "declarations for %s `%s' have " + "mismatching centroid qualifiers\n", + mode_string(var), var->name); + return; + } + if (existing->data.sample != var->data.sample) { + linker_error(prog, "declarations for %s `%s` have " + "mismatching sample qualifiers\n", + mode_string(var), var->name); + return; + } + if (existing->data.image_format != var->data.image_format) { + linker_error(prog, "declarations for %s `%s` have " + "mismatching image format qualifiers\n", + mode_string(var), var->name); + return; + } + } else + variables.add_variable(var); + } + } +} + + +/** + * Perform validation of uniforms used across multiple shader stages + */ +void +cross_validate_uniforms(struct gl_shader_program *prog) +{ + cross_validate_globals(prog, prog->_LinkedShaders, + MESA_SHADER_STAGES, true); +} + +/** + * Accumulates the array of prog->BufferInterfaceBlocks and checks that all + * definitons of blocks agree on their contents. + */ +static bool +interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) +{ + unsigned max_num_uniform_blocks = 0; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i]) + max_num_uniform_blocks += prog->_LinkedShaders[i]->NumBufferInterfaceBlocks; + } + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + prog->InterfaceBlockStageIndex[i] = ralloc_array(prog, int, + max_num_uniform_blocks); + for (unsigned int j = 0; j < max_num_uniform_blocks; j++) + prog->InterfaceBlockStageIndex[i][j] = -1; + + if (sh == NULL) + continue; + + for (unsigned int j = 0; j < sh->NumBufferInterfaceBlocks; j++) { + int index = link_cross_validate_uniform_block(prog, + &prog->BufferInterfaceBlocks, + &prog->NumBufferInterfaceBlocks, + &sh->BufferInterfaceBlocks[j]); + + if (index == -1) { + linker_error(prog, "uniform block `%s' has mismatching definitions\n", + sh->BufferInterfaceBlocks[j].Name); + return false; + } + + prog->InterfaceBlockStageIndex[i][index] = j; + } + } + + return true; +} + + +/** + * Populates a shaders symbol table with all global declarations + */ +static void +populate_symbol_table(gl_shader *sh) +{ + sh->symbols = new(sh) glsl_symbol_table; + + foreach_in_list(ir_instruction, inst, sh->ir) { + ir_variable *var; + ir_function *func; + + if ((func = inst->as_function()) != NULL) { + sh->symbols->add_function(func); + } else if ((var = inst->as_variable()) != NULL) { + if (var->data.mode != ir_var_temporary) + sh->symbols->add_variable(var); + } + } +} + + +/** + * Remap variables referenced in an instruction tree + * + * This is used when instruction trees are cloned from one shader and placed in + * another. These trees will contain references to \c ir_variable nodes that + * do not exist in the target shader. This function finds these \c ir_variable + * references and replaces the references with matching variables in the target + * shader. + * + * If there is no matching variable in the target shader, a clone of the + * \c ir_variable is made and added to the target shader. The new variable is + * added to \b both the instruction stream and the symbol table. + * + * \param inst IR tree that is to be processed. + * \param symbols Symbol table containing global scope symbols in the + * linked shader. + * \param instructions Instruction stream where new variable declarations + * should be added. + */ +void +remap_variables(ir_instruction *inst, struct gl_shader *target, + hash_table *temps) +{ + class remap_visitor : public ir_hierarchical_visitor { + public: + remap_visitor(struct gl_shader *target, + hash_table *temps) + { + this->target = target; + this->symbols = target->symbols; + this->instructions = target->ir; + this->temps = temps; + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + if (ir->var->data.mode == ir_var_temporary) { + ir_variable *var = (ir_variable *) hash_table_find(temps, ir->var); + + assert(var != NULL); + ir->var = var; + return visit_continue; + } + + ir_variable *const existing = + this->symbols->get_variable(ir->var->name); + if (existing != NULL) + ir->var = existing; + else { + ir_variable *copy = ir->var->clone(this->target, NULL); + + this->symbols->add_variable(copy); + this->instructions->push_head(copy); + ir->var = copy; + } + + return visit_continue; + } + + private: + struct gl_shader *target; + glsl_symbol_table *symbols; + exec_list *instructions; + hash_table *temps; + }; + + remap_visitor v(target, temps); + + inst->accept(&v); +} + + +/** + * Move non-declarations from one instruction stream to another + * + * The intended usage pattern of this function is to pass the pointer to the + * head sentinel of a list (i.e., a pointer to the list cast to an \c exec_node + * pointer) for \c last and \c false for \c make_copies on the first + * call. Successive calls pass the return value of the previous call for + * \c last and \c true for \c make_copies. + * + * \param instructions Source instruction stream + * \param last Instruction after which new instructions should be + * inserted in the target instruction stream + * \param make_copies Flag selecting whether instructions in \c instructions + * should be copied (via \c ir_instruction::clone) into the + * target list or moved. + * + * \return + * The new "last" instruction in the target instruction stream. This pointer + * is suitable for use as the \c last parameter of a later call to this + * function. + */ +exec_node * +move_non_declarations(exec_list *instructions, exec_node *last, + bool make_copies, gl_shader *target) +{ + hash_table *temps = NULL; + + if (make_copies) + temps = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + + foreach_in_list_safe(ir_instruction, inst, instructions) { + if (inst->as_function()) + continue; + + ir_variable *var = inst->as_variable(); + if ((var != NULL) && (var->data.mode != ir_var_temporary)) + continue; + + assert(inst->as_assignment() + || inst->as_call() + || inst->as_if() /* for initializers with the ?: operator */ + || ((var != NULL) && (var->data.mode == ir_var_temporary))); + + if (make_copies) { + inst = inst->clone(target, NULL); + + if (var != NULL) + hash_table_insert(temps, inst, var); + else + remap_variables(inst, target, temps); + } else { + inst->remove(); + } + + last->insert_after(inst); + last = inst; + } + + if (make_copies) + hash_table_dtor(temps); + + return last; +} + + +/** + * This class is only used in link_intrastage_shaders() below but declaring + * it inside that function leads to compiler warnings with some versions of + * gcc. + */ +class array_sizing_visitor : public ir_hierarchical_visitor { +public: + array_sizing_visitor() + : mem_ctx(ralloc_context(NULL)), + unnamed_interfaces(hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare)) + { + } + + ~array_sizing_visitor() + { + hash_table_dtor(this->unnamed_interfaces); + ralloc_free(this->mem_ctx); + } + + virtual ir_visitor_status visit(ir_variable *var) + { + const glsl_type *type_without_array; + fixup_type(&var->type, var->data.max_array_access, + var->data.from_ssbo_unsized_array); + type_without_array = var->type->without_array(); + if (var->type->is_interface()) { + if (interface_contains_unsized_arrays(var->type)) { + const glsl_type *new_type = + resize_interface_members(var->type, + var->get_max_ifc_array_access(), + var->is_in_shader_storage_block()); + var->type = new_type; + var->change_interface_type(new_type); + } + } else if (type_without_array->is_interface()) { + if (interface_contains_unsized_arrays(type_without_array)) { + const glsl_type *new_type = + resize_interface_members(type_without_array, + var->get_max_ifc_array_access(), + var->is_in_shader_storage_block()); + var->change_interface_type(new_type); + var->type = update_interface_members_array(var->type, new_type); + } + } else if (const glsl_type *ifc_type = var->get_interface_type()) { + /* Store a pointer to the variable in the unnamed_interfaces + * hashtable. + */ + ir_variable **interface_vars = (ir_variable **) + hash_table_find(this->unnamed_interfaces, ifc_type); + if (interface_vars == NULL) { + interface_vars = rzalloc_array(mem_ctx, ir_variable *, + ifc_type->length); + hash_table_insert(this->unnamed_interfaces, interface_vars, + ifc_type); + } + unsigned index = ifc_type->field_index(var->name); + assert(index < ifc_type->length); + assert(interface_vars[index] == NULL); + interface_vars[index] = var; + } + return visit_continue; + } + + /** + * For each unnamed interface block that was discovered while running the + * visitor, adjust the interface type to reflect the newly assigned array + * sizes, and fix up the ir_variable nodes to point to the new interface + * type. + */ + void fixup_unnamed_interface_types() + { + hash_table_call_foreach(this->unnamed_interfaces, + fixup_unnamed_interface_type, NULL); + } + +private: + /** + * If the type pointed to by \c type represents an unsized array, replace + * it with a sized array whose size is determined by max_array_access. + */ + static void fixup_type(const glsl_type **type, unsigned max_array_access, + bool from_ssbo_unsized_array) + { + if (!from_ssbo_unsized_array && (*type)->is_unsized_array()) { + *type = glsl_type::get_array_instance((*type)->fields.array, + max_array_access + 1); + assert(*type != NULL); + } + } + + static const glsl_type * + update_interface_members_array(const glsl_type *type, + const glsl_type *new_interface_type) + { + const glsl_type *element_type = type->fields.array; + if (element_type->is_array()) { + const glsl_type *new_array_type = + update_interface_members_array(element_type, new_interface_type); + return glsl_type::get_array_instance(new_array_type, type->length); + } else { + return glsl_type::get_array_instance(new_interface_type, + type->length); + } + } + + /** + * Determine whether the given interface type contains unsized arrays (if + * it doesn't, array_sizing_visitor doesn't need to process it). + */ + static bool interface_contains_unsized_arrays(const glsl_type *type) + { + for (unsigned i = 0; i < type->length; i++) { + const glsl_type *elem_type = type->fields.structure[i].type; + if (elem_type->is_unsized_array()) + return true; + } + return false; + } + + /** + * Create a new interface type based on the given type, with unsized arrays + * replaced by sized arrays whose size is determined by + * max_ifc_array_access. + */ + static const glsl_type * + resize_interface_members(const glsl_type *type, + const unsigned *max_ifc_array_access, + bool is_ssbo) + { + unsigned num_fields = type->length; + glsl_struct_field *fields = new glsl_struct_field[num_fields]; + memcpy(fields, type->fields.structure, + num_fields * sizeof(*fields)); + for (unsigned i = 0; i < num_fields; i++) { + /* If SSBO last member is unsized array, we don't replace it by a sized + * array. + */ + if (is_ssbo && i == (num_fields - 1)) + fixup_type(&fields[i].type, max_ifc_array_access[i], + true); + else + fixup_type(&fields[i].type, max_ifc_array_access[i], + false); + } + glsl_interface_packing packing = + (glsl_interface_packing) type->interface_packing; + const glsl_type *new_ifc_type = + glsl_type::get_interface_instance(fields, num_fields, + packing, type->name); + delete [] fields; + return new_ifc_type; + } + + static void fixup_unnamed_interface_type(const void *key, void *data, + void *) + { + const glsl_type *ifc_type = (const glsl_type *) key; + ir_variable **interface_vars = (ir_variable **) data; + unsigned num_fields = ifc_type->length; + glsl_struct_field *fields = new glsl_struct_field[num_fields]; + memcpy(fields, ifc_type->fields.structure, + num_fields * sizeof(*fields)); + bool interface_type_changed = false; + for (unsigned i = 0; i < num_fields; i++) { + if (interface_vars[i] != NULL && + fields[i].type != interface_vars[i]->type) { + fields[i].type = interface_vars[i]->type; + interface_type_changed = true; + } + } + if (!interface_type_changed) { + delete [] fields; + return; + } + glsl_interface_packing packing = + (glsl_interface_packing) ifc_type->interface_packing; + const glsl_type *new_ifc_type = + glsl_type::get_interface_instance(fields, num_fields, packing, + ifc_type->name); + delete [] fields; + for (unsigned i = 0; i < num_fields; i++) { + if (interface_vars[i] != NULL) + interface_vars[i]->change_interface_type(new_ifc_type); + } + } + + /** + * Memory context used to allocate the data in \c unnamed_interfaces. + */ + void *mem_ctx; + + /** + * Hash table from const glsl_type * to an array of ir_variable *'s + * pointing to the ir_variables constituting each unnamed interface block. + */ + hash_table *unnamed_interfaces; +}; + + +/** + * Performs the cross-validation of tessellation control shader vertices and + * layout qualifiers for the attached tessellation control shaders, + * and propagates them to the linked TCS and linked shader program. + */ +static void +link_tcs_out_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + linked_shader->TessCtrl.VerticesOut = 0; + + if (linked_shader->Stage != MESA_SHADER_TESS_CTRL) + return; + + /* From the GLSL 4.0 spec (chapter 4.3.8.2): + * + * "All tessellation control shader layout declarations in a program + * must specify the same output patch vertex count. There must be at + * least one layout qualifier specifying an output patch vertex count + * in any program containing tessellation control shaders; however, + * such a declaration is not required in all tessellation control + * shaders." + */ + + for (unsigned i = 0; i < num_shaders; i++) { + struct gl_shader *shader = shader_list[i]; + + if (shader->TessCtrl.VerticesOut != 0) { + if (linked_shader->TessCtrl.VerticesOut != 0 && + linked_shader->TessCtrl.VerticesOut != shader->TessCtrl.VerticesOut) { + linker_error(prog, "tessellation control shader defined with " + "conflicting output vertex count (%d and %d)\n", + linked_shader->TessCtrl.VerticesOut, + shader->TessCtrl.VerticesOut); + return; + } + linked_shader->TessCtrl.VerticesOut = shader->TessCtrl.VerticesOut; + } + } + + /* Just do the intrastage -> interstage propagation right now, + * since we already know we're in the right type of shader program + * for doing it. + */ + if (linked_shader->TessCtrl.VerticesOut == 0) { + linker_error(prog, "tessellation control shader didn't declare " + "vertices out layout qualifier\n"); + return; + } + prog->TessCtrl.VerticesOut = linked_shader->TessCtrl.VerticesOut; +} + + +/** + * Performs the cross-validation of tessellation evaluation shader + * primitive type, vertex spacing, ordering and point_mode layout qualifiers + * for the attached tessellation evaluation shaders, and propagates them + * to the linked TES and linked shader program. + */ +static void +link_tes_in_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + linked_shader->TessEval.PrimitiveMode = PRIM_UNKNOWN; + linked_shader->TessEval.Spacing = 0; + linked_shader->TessEval.VertexOrder = 0; + linked_shader->TessEval.PointMode = -1; + + if (linked_shader->Stage != MESA_SHADER_TESS_EVAL) + return; + + /* From the GLSL 4.0 spec (chapter 4.3.8.1): + * + * "At least one tessellation evaluation shader (compilation unit) in + * a program must declare a primitive mode in its input layout. + * Declaration vertex spacing, ordering, and point mode identifiers is + * optional. It is not required that all tessellation evaluation + * shaders in a program declare a primitive mode. If spacing or + * vertex ordering declarations are omitted, the tessellation + * primitive generator will use equal spacing or counter-clockwise + * vertex ordering, respectively. If a point mode declaration is + * omitted, the tessellation primitive generator will produce lines or + * triangles according to the primitive mode." + */ + + for (unsigned i = 0; i < num_shaders; i++) { + struct gl_shader *shader = shader_list[i]; + + if (shader->TessEval.PrimitiveMode != PRIM_UNKNOWN) { + if (linked_shader->TessEval.PrimitiveMode != PRIM_UNKNOWN && + linked_shader->TessEval.PrimitiveMode != shader->TessEval.PrimitiveMode) { + linker_error(prog, "tessellation evaluation shader defined with " + "conflicting input primitive modes.\n"); + return; + } + linked_shader->TessEval.PrimitiveMode = shader->TessEval.PrimitiveMode; + } + + if (shader->TessEval.Spacing != 0) { + if (linked_shader->TessEval.Spacing != 0 && + linked_shader->TessEval.Spacing != shader->TessEval.Spacing) { + linker_error(prog, "tessellation evaluation shader defined with " + "conflicting vertex spacing.\n"); + return; + } + linked_shader->TessEval.Spacing = shader->TessEval.Spacing; + } + + if (shader->TessEval.VertexOrder != 0) { + if (linked_shader->TessEval.VertexOrder != 0 && + linked_shader->TessEval.VertexOrder != shader->TessEval.VertexOrder) { + linker_error(prog, "tessellation evaluation shader defined with " + "conflicting ordering.\n"); + return; + } + linked_shader->TessEval.VertexOrder = shader->TessEval.VertexOrder; + } + + if (shader->TessEval.PointMode != -1) { + if (linked_shader->TessEval.PointMode != -1 && + linked_shader->TessEval.PointMode != shader->TessEval.PointMode) { + linker_error(prog, "tessellation evaluation shader defined with " + "conflicting point modes.\n"); + return; + } + linked_shader->TessEval.PointMode = shader->TessEval.PointMode; + } + + } + + /* Just do the intrastage -> interstage propagation right now, + * since we already know we're in the right type of shader program + * for doing it. + */ + if (linked_shader->TessEval.PrimitiveMode == PRIM_UNKNOWN) { + linker_error(prog, + "tessellation evaluation shader didn't declare input " + "primitive modes.\n"); + return; + } + prog->TessEval.PrimitiveMode = linked_shader->TessEval.PrimitiveMode; + + if (linked_shader->TessEval.Spacing == 0) + linked_shader->TessEval.Spacing = GL_EQUAL; + prog->TessEval.Spacing = linked_shader->TessEval.Spacing; + + if (linked_shader->TessEval.VertexOrder == 0) + linked_shader->TessEval.VertexOrder = GL_CCW; + prog->TessEval.VertexOrder = linked_shader->TessEval.VertexOrder; + + if (linked_shader->TessEval.PointMode == -1) + linked_shader->TessEval.PointMode = GL_FALSE; + prog->TessEval.PointMode = linked_shader->TessEval.PointMode; +} + + +/** + * Performs the cross-validation of layout qualifiers specified in + * redeclaration of gl_FragCoord for the attached fragment shaders, + * and propagates them to the linked FS and linked shader program. + */ +static void +link_fs_input_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + linked_shader->redeclares_gl_fragcoord = false; + linked_shader->uses_gl_fragcoord = false; + linked_shader->origin_upper_left = false; + linked_shader->pixel_center_integer = false; + + if (linked_shader->Stage != MESA_SHADER_FRAGMENT || + (prog->Version < 150 && !prog->ARB_fragment_coord_conventions_enable)) + return; + + for (unsigned i = 0; i < num_shaders; i++) { + struct gl_shader *shader = shader_list[i]; + /* From the GLSL 1.50 spec, page 39: + * + * "If gl_FragCoord is redeclared in any fragment shader in a program, + * it must be redeclared in all the fragment shaders in that program + * that have a static use gl_FragCoord." + */ + if ((linked_shader->redeclares_gl_fragcoord + && !shader->redeclares_gl_fragcoord + && shader->uses_gl_fragcoord) + || (shader->redeclares_gl_fragcoord + && !linked_shader->redeclares_gl_fragcoord + && linked_shader->uses_gl_fragcoord)) { + linker_error(prog, "fragment shader defined with conflicting " + "layout qualifiers for gl_FragCoord\n"); + } + + /* From the GLSL 1.50 spec, page 39: + * + * "All redeclarations of gl_FragCoord in all fragment shaders in a + * single program must have the same set of qualifiers." + */ + if (linked_shader->redeclares_gl_fragcoord && shader->redeclares_gl_fragcoord + && (shader->origin_upper_left != linked_shader->origin_upper_left + || shader->pixel_center_integer != linked_shader->pixel_center_integer)) { + linker_error(prog, "fragment shader defined with conflicting " + "layout qualifiers for gl_FragCoord\n"); + } + + /* Update the linked shader state. Note that uses_gl_fragcoord should + * accumulate the results. The other values should replace. If there + * are multiple redeclarations, all the fields except uses_gl_fragcoord + * are already known to be the same. + */ + if (shader->redeclares_gl_fragcoord || shader->uses_gl_fragcoord) { + linked_shader->redeclares_gl_fragcoord = + shader->redeclares_gl_fragcoord; + linked_shader->uses_gl_fragcoord = linked_shader->uses_gl_fragcoord + || shader->uses_gl_fragcoord; + linked_shader->origin_upper_left = shader->origin_upper_left; + linked_shader->pixel_center_integer = shader->pixel_center_integer; + } + + linked_shader->EarlyFragmentTests |= shader->EarlyFragmentTests; + } +} + +/** + * Performs the cross-validation of geometry shader max_vertices and + * primitive type layout qualifiers for the attached geometry shaders, + * and propagates them to the linked GS and linked shader program. + */ +static void +link_gs_inout_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + linked_shader->Geom.VerticesOut = 0; + linked_shader->Geom.Invocations = 0; + linked_shader->Geom.InputType = PRIM_UNKNOWN; + linked_shader->Geom.OutputType = PRIM_UNKNOWN; + + /* No in/out qualifiers defined for anything but GLSL 1.50+ + * geometry shaders so far. + */ + if (linked_shader->Stage != MESA_SHADER_GEOMETRY || prog->Version < 150) + return; + + /* From the GLSL 1.50 spec, page 46: + * + * "All geometry shader output layout declarations in a program + * must declare the same layout and same value for + * max_vertices. There must be at least one geometry output + * layout declaration somewhere in a program, but not all + * geometry shaders (compilation units) are required to + * declare it." + */ + + for (unsigned i = 0; i < num_shaders; i++) { + struct gl_shader *shader = shader_list[i]; + + if (shader->Geom.InputType != PRIM_UNKNOWN) { + if (linked_shader->Geom.InputType != PRIM_UNKNOWN && + linked_shader->Geom.InputType != shader->Geom.InputType) { + linker_error(prog, "geometry shader defined with conflicting " + "input types\n"); + return; + } + linked_shader->Geom.InputType = shader->Geom.InputType; + } + + if (shader->Geom.OutputType != PRIM_UNKNOWN) { + if (linked_shader->Geom.OutputType != PRIM_UNKNOWN && + linked_shader->Geom.OutputType != shader->Geom.OutputType) { + linker_error(prog, "geometry shader defined with conflicting " + "output types\n"); + return; + } + linked_shader->Geom.OutputType = shader->Geom.OutputType; + } + + if (shader->Geom.VerticesOut != 0) { + if (linked_shader->Geom.VerticesOut != 0 && + linked_shader->Geom.VerticesOut != shader->Geom.VerticesOut) { + linker_error(prog, "geometry shader defined with conflicting " + "output vertex count (%d and %d)\n", + linked_shader->Geom.VerticesOut, + shader->Geom.VerticesOut); + return; + } + linked_shader->Geom.VerticesOut = shader->Geom.VerticesOut; + } + + if (shader->Geom.Invocations != 0) { + if (linked_shader->Geom.Invocations != 0 && + linked_shader->Geom.Invocations != shader->Geom.Invocations) { + linker_error(prog, "geometry shader defined with conflicting " + "invocation count (%d and %d)\n", + linked_shader->Geom.Invocations, + shader->Geom.Invocations); + return; + } + linked_shader->Geom.Invocations = shader->Geom.Invocations; + } + } + + /* Just do the intrastage -> interstage propagation right now, + * since we already know we're in the right type of shader program + * for doing it. + */ + if (linked_shader->Geom.InputType == PRIM_UNKNOWN) { + linker_error(prog, + "geometry shader didn't declare primitive input type\n"); + return; + } + prog->Geom.InputType = linked_shader->Geom.InputType; + + if (linked_shader->Geom.OutputType == PRIM_UNKNOWN) { + linker_error(prog, + "geometry shader didn't declare primitive output type\n"); + return; + } + prog->Geom.OutputType = linked_shader->Geom.OutputType; + + if (linked_shader->Geom.VerticesOut == 0) { + linker_error(prog, + "geometry shader didn't declare max_vertices\n"); + return; + } + prog->Geom.VerticesOut = linked_shader->Geom.VerticesOut; + + if (linked_shader->Geom.Invocations == 0) + linked_shader->Geom.Invocations = 1; + + prog->Geom.Invocations = linked_shader->Geom.Invocations; +} + + +/** + * Perform cross-validation of compute shader local_size_{x,y,z} layout + * qualifiers for the attached compute shaders, and propagate them to the + * linked CS and linked shader program. + */ +static void +link_cs_input_layout_qualifiers(struct gl_shader_program *prog, + struct gl_shader *linked_shader, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + for (int i = 0; i < 3; i++) + linked_shader->Comp.LocalSize[i] = 0; + + /* This function is called for all shader stages, but it only has an effect + * for compute shaders. + */ + if (linked_shader->Stage != MESA_SHADER_COMPUTE) + return; + + /* From the ARB_compute_shader spec, in the section describing local size + * declarations: + * + * If multiple compute shaders attached to a single program object + * declare local work-group size, the declarations must be identical; + * otherwise a link-time error results. Furthermore, if a program + * object contains any compute shaders, at least one must contain an + * input layout qualifier specifying the local work sizes of the + * program, or a link-time error will occur. + */ + for (unsigned sh = 0; sh < num_shaders; sh++) { + struct gl_shader *shader = shader_list[sh]; + + if (shader->Comp.LocalSize[0] != 0) { + if (linked_shader->Comp.LocalSize[0] != 0) { + for (int i = 0; i < 3; i++) { + if (linked_shader->Comp.LocalSize[i] != + shader->Comp.LocalSize[i]) { + linker_error(prog, "compute shader defined with conflicting " + "local sizes\n"); + return; + } + } + } + for (int i = 0; i < 3; i++) + linked_shader->Comp.LocalSize[i] = shader->Comp.LocalSize[i]; + } + } + + /* Just do the intrastage -> interstage propagation right now, + * since we already know we're in the right type of shader program + * for doing it. + */ + if (linked_shader->Comp.LocalSize[0] == 0) { + linker_error(prog, "compute shader didn't declare local size\n"); + return; + } + for (int i = 0; i < 3; i++) + prog->Comp.LocalSize[i] = linked_shader->Comp.LocalSize[i]; +} + + +/** + * Combine a group of shaders for a single stage to generate a linked shader + * + * \note + * If this function is supplied a single shader, it is cloned, and the new + * shader is returned. + */ +static struct gl_shader * +link_intrastage_shaders(void *mem_ctx, + struct gl_context *ctx, + struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders) +{ + struct gl_uniform_block *uniform_blocks = NULL; + + /* Check that global variables defined in multiple shaders are consistent. + */ + cross_validate_globals(prog, shader_list, num_shaders, false); + if (!prog->LinkStatus) + return NULL; + + /* Check that interface blocks defined in multiple shaders are consistent. + */ + validate_intrastage_interface_blocks(prog, (const gl_shader **)shader_list, + num_shaders); + if (!prog->LinkStatus) + return NULL; + + /* Link up uniform blocks defined within this stage. */ + const unsigned num_uniform_blocks = + link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders, + &uniform_blocks); + if (!prog->LinkStatus) + return NULL; + + /* Check that there is only a single definition of each function signature + * across all shaders. + */ + for (unsigned i = 0; i < (num_shaders - 1); i++) { + foreach_in_list(ir_instruction, node, shader_list[i]->ir) { + ir_function *const f = node->as_function(); + + if (f == NULL) + continue; + + for (unsigned j = i + 1; j < num_shaders; j++) { + ir_function *const other = + shader_list[j]->symbols->get_function(f->name); + + /* If the other shader has no function (and therefore no function + * signatures) with the same name, skip to the next shader. + */ + if (other == NULL) + continue; + + foreach_in_list(ir_function_signature, sig, &f->signatures) { + if (!sig->is_defined || sig->is_builtin()) + continue; + + ir_function_signature *other_sig = + other->exact_matching_signature(NULL, &sig->parameters); + + if ((other_sig != NULL) && other_sig->is_defined + && !other_sig->is_builtin()) { + linker_error(prog, "function `%s' is multiply defined\n", + f->name); + return NULL; + } + } + } + } + } + + /* Find the shader that defines main, and make a clone of it. + * + * Starting with the clone, search for undefined references. If one is + * found, find the shader that defines it. Clone the reference and add + * it to the shader. Repeat until there are no undefined references or + * until a reference cannot be resolved. + */ + gl_shader *main = NULL; + for (unsigned i = 0; i < num_shaders; i++) { + if (_mesa_get_main_function_signature(shader_list[i]) != NULL) { + main = shader_list[i]; + break; + } + } + + if (main == NULL) { + linker_error(prog, "%s shader lacks `main'\n", + _mesa_shader_stage_to_string(shader_list[0]->Stage)); + return NULL; + } + + gl_shader *linked = ctx->Driver.NewShader(NULL, 0, main->Type); + linked->ir = new(linked) exec_list; + clone_ir_list(mem_ctx, linked->ir, main->ir); + + linked->BufferInterfaceBlocks = uniform_blocks; + linked->NumBufferInterfaceBlocks = num_uniform_blocks; + ralloc_steal(linked, linked->BufferInterfaceBlocks); + + link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_tes_in_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_gs_inout_layout_qualifiers(prog, linked, shader_list, num_shaders); + link_cs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); + + populate_symbol_table(linked); + + /* The pointer to the main function in the final linked shader (i.e., the + * copy of the original shader that contained the main function). + */ + ir_function_signature *const main_sig = + _mesa_get_main_function_signature(linked); + + /* Move any instructions other than variable declarations or function + * declarations into main. + */ + exec_node *insertion_point = + move_non_declarations(linked->ir, (exec_node *) &main_sig->body, false, + linked); + + for (unsigned i = 0; i < num_shaders; i++) { + if (shader_list[i] == main) + continue; + + insertion_point = move_non_declarations(shader_list[i]->ir, + insertion_point, true, linked); + } + + /* Check if any shader needs built-in functions. */ + bool need_builtins = false; + for (unsigned i = 0; i < num_shaders; i++) { + if (shader_list[i]->uses_builtin_functions) { + need_builtins = true; + break; + } + } + + bool ok; + if (need_builtins) { + /* Make a temporary array one larger than shader_list, which will hold + * the built-in function shader as well. + */ + gl_shader **linking_shaders = (gl_shader **) + calloc(num_shaders + 1, sizeof(gl_shader *)); + + ok = linking_shaders != NULL; + + if (ok) { + memcpy(linking_shaders, shader_list, num_shaders * sizeof(gl_shader *)); + linking_shaders[num_shaders] = _mesa_glsl_get_builtin_function_shader(); + + ok = link_function_calls(prog, linked, linking_shaders, num_shaders + 1); + + free(linking_shaders); + } else { + _mesa_error_no_memory(__func__); + } + } else { + ok = link_function_calls(prog, linked, shader_list, num_shaders); + } + + + if (!ok) { + _mesa_delete_shader(ctx, linked); + return NULL; + } + + /* At this point linked should contain all of the linked IR, so + * validate it to make sure nothing went wrong. + */ + validate_ir_tree(linked->ir); + + /* Set the size of geometry shader input arrays */ + if (linked->Stage == MESA_SHADER_GEOMETRY) { + unsigned num_vertices = vertices_per_prim(prog->Geom.InputType); + geom_array_resize_visitor input_resize_visitor(num_vertices, prog); + foreach_in_list(ir_instruction, ir, linked->ir) { + ir->accept(&input_resize_visitor); + } + } + + if (ctx->Const.VertexID_is_zero_based) + lower_vertex_id(linked); + + /* Validate correct usage of barrier() in the tess control shader */ + if (linked->Stage == MESA_SHADER_TESS_CTRL) { + barrier_use_visitor visitor(prog); + foreach_in_list(ir_instruction, ir, linked->ir) { + ir->accept(&visitor); + } + } + + /* Make a pass over all variable declarations to ensure that arrays with + * unspecified sizes have a size specified. The size is inferred from the + * max_array_access field. + */ + array_sizing_visitor v; + v.run(linked->ir); + v.fixup_unnamed_interface_types(); + + return linked; +} + +/** + * Update the sizes of linked shader uniform arrays to the maximum + * array index used. + * + * From page 81 (page 95 of the PDF) of the OpenGL 2.1 spec: + * + * If one or more elements of an array are active, + * GetActiveUniform will return the name of the array in name, + * subject to the restrictions listed above. The type of the array + * is returned in type. The size parameter contains the highest + * array element index used, plus one. The compiler or linker + * determines the highest index used. There will be only one + * active uniform reported by the GL per uniform array. + + */ +static void +update_array_sizes(struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_uniform) || + !var->type->is_array()) + continue; + + /* GL_ARB_uniform_buffer_object says that std140 uniforms + * will not be eliminated. Since we always do std140, just + * don't resize arrays in UBOs. + * + * Atomic counters are supposed to get deterministic + * locations assigned based on the declaration ordering and + * sizes, array compaction would mess that up. + * + * Subroutine uniforms are not removed. + */ + if (var->is_in_buffer_block() || var->type->contains_atomic() || + var->type->contains_subroutine()) + continue; + + unsigned int size = var->data.max_array_access; + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + if (prog->_LinkedShaders[j] == NULL) + continue; + + foreach_in_list(ir_instruction, node2, prog->_LinkedShaders[j]->ir) { + ir_variable *other_var = node2->as_variable(); + if (!other_var) + continue; + + if (strcmp(var->name, other_var->name) == 0 && + other_var->data.max_array_access > size) { + size = other_var->data.max_array_access; + } + } + } + + if (size + 1 != var->type->length) { + /* If this is a built-in uniform (i.e., it's backed by some + * fixed-function state), adjust the number of state slots to + * match the new array size. The number of slots per array entry + * is not known. It seems safe to assume that the total number of + * slots is an integer multiple of the number of array elements. + * Determine the number of slots per array element by dividing by + * the old (total) size. + */ + const unsigned num_slots = var->get_num_state_slots(); + if (num_slots > 0) { + var->set_num_state_slots((size + 1) + * (num_slots / var->type->length)); + } + + var->type = glsl_type::get_array_instance(var->type->fields.array, + size + 1); + /* FINISHME: We should update the types of array + * dereferences of this variable now. + */ + } + } + } +} + +/** + * Resize tessellation evaluation per-vertex inputs to the size of + * tessellation control per-vertex outputs. + */ +static void +resize_tes_inputs(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + if (prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] == NULL) + return; + + gl_shader *const tcs = prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]; + gl_shader *const tes = prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; + + /* If no control shader is present, then the TES inputs are statically + * sized to MaxPatchVertices; the actual size of the arrays won't be + * known until draw time. + */ + const int num_vertices = tcs + ? tcs->TessCtrl.VerticesOut + : ctx->Const.MaxPatchVertices; + + tess_eval_array_resize_visitor input_resize_visitor(num_vertices, prog); + foreach_in_list(ir_instruction, ir, tes->ir) { + ir->accept(&input_resize_visitor); + } + + if (tcs) { + /* Convert the gl_PatchVerticesIn system value into a constant, since + * the value is known at this point. + */ + foreach_in_list(ir_instruction, ir, tes->ir) { + ir_variable *var = ir->as_variable(); + if (var && var->data.mode == ir_var_system_value && + var->data.location == SYSTEM_VALUE_VERTICES_IN) { + void *mem_ctx = ralloc_parent(var); + var->data.mode = ir_var_auto; + var->data.location = 0; + var->constant_value = new(mem_ctx) ir_constant(num_vertices); + } + } + } +} + +/** + * Find a contiguous set of available bits in a bitmask. + * + * \param used_mask Bits representing used (1) and unused (0) locations + * \param needed_count Number of contiguous bits needed. + * + * \return + * Base location of the available bits on success or -1 on failure. + */ +int +find_available_slots(unsigned used_mask, unsigned needed_count) +{ + unsigned needed_mask = (1 << needed_count) - 1; + const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count; + + /* The comparison to 32 is redundant, but without it GCC emits "warning: + * cannot optimize possibly infinite loops" for the loop below. + */ + if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32)) + return -1; + + for (int i = 0; i <= max_bit_to_test; i++) { + if ((needed_mask & ~used_mask) == needed_mask) + return i; + + needed_mask <<= 1; + } + + return -1; +} + + +/** + * Assign locations for either VS inputs or FS outputs + * + * \param prog Shader program whose variables need locations assigned + * \param constants Driver specific constant values for the program. + * \param target_index Selector for the program target to receive location + * assignmnets. Must be either \c MESA_SHADER_VERTEX or + * \c MESA_SHADER_FRAGMENT. + * + * \return + * If locations are successfully assigned, true is returned. Otherwise an + * error is emitted to the shader link log and false is returned. + */ +bool +assign_attribute_or_color_locations(gl_shader_program *prog, + struct gl_constants *constants, + unsigned target_index) +{ + /* Maximum number of generic locations. This corresponds to either the + * maximum number of draw buffers or the maximum number of generic + * attributes. + */ + unsigned max_index = (target_index == MESA_SHADER_VERTEX) ? + constants->Program[target_index].MaxAttribs : + MAX2(constants->MaxDrawBuffers, constants->MaxDualSourceDrawBuffers); + + /* Mark invalid locations as being used. + */ + unsigned used_locations = (max_index >= 32) + ? ~0 : ~((1 << max_index) - 1); + unsigned double_storage_locations = 0; + + assert((target_index == MESA_SHADER_VERTEX) + || (target_index == MESA_SHADER_FRAGMENT)); + + gl_shader *const sh = prog->_LinkedShaders[target_index]; + if (sh == NULL) + return true; + + /* Operate in a total of four passes. + * + * 1. Invalidate the location assignments for all vertex shader inputs. + * + * 2. Assign locations for inputs that have user-defined (via + * glBindVertexAttribLocation) locations and outputs that have + * user-defined locations (via glBindFragDataLocation). + * + * 3. Sort the attributes without assigned locations by number of slots + * required in decreasing order. Fragmentation caused by attribute + * locations assigned by the application may prevent large attributes + * from having enough contiguous space. + * + * 4. Assign locations to any inputs without assigned locations. + */ + + const int generic_base = (target_index == MESA_SHADER_VERTEX) + ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0; + + const enum ir_variable_mode direction = + (target_index == MESA_SHADER_VERTEX) + ? ir_var_shader_in : ir_var_shader_out; + + + /* Temporary storage for the set of attributes that need locations assigned. + */ + struct temp_attr { + unsigned slots; + ir_variable *var; + + /* Used below in the call to qsort. */ + static int compare(const void *a, const void *b) + { + const temp_attr *const l = (const temp_attr *) a; + const temp_attr *const r = (const temp_attr *) b; + + /* Reversed because we want a descending order sort below. */ + return r->slots - l->slots; + } + } to_assign[16]; + + unsigned num_attr = 0; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != (unsigned) direction)) + continue; + + if (var->data.explicit_location) { + var->data.is_unmatched_generic_inout = 0; + if ((var->data.location >= (int)(max_index + generic_base)) + || (var->data.location < 0)) { + linker_error(prog, + "invalid explicit location %d specified for `%s'\n", + (var->data.location < 0) + ? var->data.location + : var->data.location - generic_base, + var->name); + return false; + } + } else if (target_index == MESA_SHADER_VERTEX) { + unsigned binding; + + if (prog->AttributeBindings->get(binding, var->name)) { + assert(binding >= VERT_ATTRIB_GENERIC0); + var->data.location = binding; + var->data.is_unmatched_generic_inout = 0; + } + } else if (target_index == MESA_SHADER_FRAGMENT) { + unsigned binding; + unsigned index; + + if (prog->FragDataBindings->get(binding, var->name)) { + assert(binding >= FRAG_RESULT_DATA0); + var->data.location = binding; + var->data.is_unmatched_generic_inout = 0; + + if (prog->FragDataIndexBindings->get(index, var->name)) { + var->data.index = index; + } + } + } + + /* From GL4.5 core spec, section 15.2 (Shader Execution): + * + * "Output binding assignments will cause LinkProgram to fail: + * ... + * If the program has an active output assigned to a location greater + * than or equal to the value of MAX_DUAL_SOURCE_DRAW_BUFFERS and has + * an active output assigned an index greater than or equal to one;" + */ + if (target_index == MESA_SHADER_FRAGMENT && var->data.index >= 1 && + var->data.location - generic_base >= + (int) constants->MaxDualSourceDrawBuffers) { + linker_error(prog, + "output location %d >= GL_MAX_DUAL_SOURCE_DRAW_BUFFERS " + "with index %u for %s\n", + var->data.location - generic_base, var->data.index, + var->name); + return false; + } + + const unsigned slots = var->type->count_attribute_slots(target_index == MESA_SHADER_VERTEX ? true : false); + + /* If the variable is not a built-in and has a location statically + * assigned in the shader (presumably via a layout qualifier), make sure + * that it doesn't collide with other assigned locations. Otherwise, + * add it to the list of variables that need linker-assigned locations. + */ + if (var->data.location != -1) { + if (var->data.location >= generic_base && var->data.index < 1) { + /* From page 61 of the OpenGL 4.0 spec: + * + * "LinkProgram will fail if the attribute bindings assigned + * by BindAttribLocation do not leave not enough space to + * assign a location for an active matrix attribute or an + * active attribute array, both of which require multiple + * contiguous generic attributes." + * + * I think above text prohibits the aliasing of explicit and + * automatic assignments. But, aliasing is allowed in manual + * assignments of attribute locations. See below comments for + * the details. + * + * From OpenGL 4.0 spec, page 61: + * + * "It is possible for an application to bind more than one + * attribute name to the same location. This is referred to as + * aliasing. This will only work if only one of the aliased + * attributes is active in the executable program, or if no + * path through the shader consumes more than one attribute of + * a set of attributes aliased to the same location. A link + * error can occur if the linker determines that every path + * through the shader consumes multiple aliased attributes, + * but implementations are not required to generate an error + * in this case." + * + * From GLSL 4.30 spec, page 54: + * + * "A program will fail to link if any two non-vertex shader + * input variables are assigned to the same location. For + * vertex shaders, multiple input variables may be assigned + * to the same location using either layout qualifiers or via + * the OpenGL API. However, such aliasing is intended only to + * support vertex shaders where each execution path accesses + * at most one input per each location. Implementations are + * permitted, but not required, to generate link-time errors + * if they detect that every path through the vertex shader + * executable accesses multiple inputs assigned to any single + * location. For all shader types, a program will fail to link + * if explicit location assignments leave the linker unable + * to find space for other variables without explicit + * assignments." + * + * From OpenGL ES 3.0 spec, page 56: + * + * "Binding more than one attribute name to the same location + * is referred to as aliasing, and is not permitted in OpenGL + * ES Shading Language 3.00 vertex shaders. LinkProgram will + * fail when this condition exists. However, aliasing is + * possible in OpenGL ES Shading Language 1.00 vertex shaders. + * This will only work if only one of the aliased attributes + * is active in the executable program, or if no path through + * the shader consumes more than one attribute of a set of + * attributes aliased to the same location. A link error can + * occur if the linker determines that every path through the + * shader consumes multiple aliased attributes, but implemen- + * tations are not required to generate an error in this case." + * + * After looking at above references from OpenGL, OpenGL ES and + * GLSL specifications, we allow aliasing of vertex input variables + * in: OpenGL 2.0 (and above) and OpenGL ES 2.0. + * + * NOTE: This is not required by the spec but its worth mentioning + * here that we're not doing anything to make sure that no path + * through the vertex shader executable accesses multiple inputs + * assigned to any single location. + */ + + /* Mask representing the contiguous slots that will be used by + * this attribute. + */ + const unsigned attr = var->data.location - generic_base; + const unsigned use_mask = (1 << slots) - 1; + const char *const string = (target_index == MESA_SHADER_VERTEX) + ? "vertex shader input" : "fragment shader output"; + + /* Generate a link error if the requested locations for this + * attribute exceed the maximum allowed attribute location. + */ + if (attr + slots > max_index) { + linker_error(prog, + "insufficient contiguous locations " + "available for %s `%s' %d %d %d\n", string, + var->name, used_locations, use_mask, attr); + return false; + } + + /* Generate a link error if the set of bits requested for this + * attribute overlaps any previously allocated bits. + */ + if ((~(use_mask << attr) & used_locations) != used_locations) { + if (target_index == MESA_SHADER_FRAGMENT || + (prog->IsES && prog->Version >= 300)) { + linker_error(prog, + "overlapping location is assigned " + "to %s `%s' %d %d %d\n", string, + var->name, used_locations, use_mask, attr); + return false; + } else { + linker_warning(prog, + "overlapping location is assigned " + "to %s `%s' %d %d %d\n", string, + var->name, used_locations, use_mask, attr); + } + } + + used_locations |= (use_mask << attr); + + /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes): + * + * "A program with more than the value of MAX_VERTEX_ATTRIBS + * active attribute variables may fail to link, unless + * device-dependent optimizations are able to make the program + * fit within available hardware resources. For the purposes + * of this test, attribute variables of the type dvec3, dvec4, + * dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may + * count as consuming twice as many attributes as equivalent + * single-precision types. While these types use the same number + * of generic attributes as their single-precision equivalents, + * implementations are permitted to consume two single-precision + * vectors of internal storage for each three- or four-component + * double-precision vector." + * + * Mark this attribute slot as taking up twice as much space + * so we can count it properly against limits. According to + * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this + * is optional behavior, but it seems preferable. + */ + if (var->type->without_array()->is_dual_slot_double()) + double_storage_locations |= (use_mask << attr); + } + + continue; + } + + to_assign[num_attr].slots = slots; + to_assign[num_attr].var = var; + num_attr++; + } + + if (target_index == MESA_SHADER_VERTEX) { + unsigned total_attribs_size = + _mesa_bitcount(used_locations & ((1 << max_index) - 1)) + + _mesa_bitcount(double_storage_locations); + if (total_attribs_size > max_index) { + linker_error(prog, + "attempt to use %d vertex attribute slots only %d available ", + total_attribs_size, max_index); + return false; + } + } + + /* If all of the attributes were assigned locations by the application (or + * are built-in attributes with fixed locations), return early. This should + * be the common case. + */ + if (num_attr == 0) + return true; + + qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare); + + if (target_index == MESA_SHADER_VERTEX) { + /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS. It can + * only be explicitly assigned by via glBindAttribLocation. Mark it as + * reserved to prevent it from being automatically allocated below. + */ + find_deref_visitor find("gl_Vertex"); + find.run(sh->ir); + if (find.variable_found()) + used_locations |= (1 << 0); + } + + for (unsigned i = 0; i < num_attr; i++) { + /* Mask representing the contiguous slots that will be used by this + * attribute. + */ + const unsigned use_mask = (1 << to_assign[i].slots) - 1; + + int location = find_available_slots(used_locations, to_assign[i].slots); + + if (location < 0) { + const char *const string = (target_index == MESA_SHADER_VERTEX) + ? "vertex shader input" : "fragment shader output"; + + linker_error(prog, + "insufficient contiguous locations " + "available for %s `%s'\n", + string, to_assign[i].var->name); + return false; + } + + to_assign[i].var->data.location = generic_base + location; + to_assign[i].var->data.is_unmatched_generic_inout = 0; + used_locations |= (use_mask << location); + } + + return true; +} + +/** + * Match explicit locations of outputs to inputs and deactivate the + * unmatch flag if found so we don't optimise them away. + */ +static void +match_explicit_outputs_to_inputs(struct gl_shader_program *prog, + gl_shader *producer, + gl_shader *consumer) +{ + glsl_symbol_table parameters; + ir_variable *explicit_locations[MAX_VARYING] = { NULL }; + + /* Find all shader outputs in the "producer" stage. + */ + foreach_in_list(ir_instruction, node, producer->ir) { + ir_variable *const var = node->as_variable(); + + if ((var == NULL) || (var->data.mode != ir_var_shader_out)) + continue; + + if (var->data.explicit_location && + var->data.location >= VARYING_SLOT_VAR0) { + const unsigned idx = var->data.location - VARYING_SLOT_VAR0; + if (explicit_locations[idx] == NULL) + explicit_locations[idx] = var; + } + } + + /* Match inputs to outputs */ + foreach_in_list(ir_instruction, node, consumer->ir) { + ir_variable *const input = node->as_variable(); + + if ((input == NULL) || (input->data.mode != ir_var_shader_in)) + continue; + + ir_variable *output = NULL; + if (input->data.explicit_location + && input->data.location >= VARYING_SLOT_VAR0) { + output = explicit_locations[input->data.location - VARYING_SLOT_VAR0]; + + if (output != NULL){ + input->data.is_unmatched_generic_inout = 0; + output->data.is_unmatched_generic_inout = 0; + } + } + } +} + +/** + * Store the gl_FragDepth layout in the gl_shader_program struct. + */ +static void +store_fragdepth_layout(struct gl_shader_program *prog) +{ + if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { + return; + } + + struct exec_list *ir = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->ir; + + /* We don't look up the gl_FragDepth symbol directly because if + * gl_FragDepth is not used in the shader, it's removed from the IR. + * However, the symbol won't be removed from the symbol table. + * + * We're only interested in the cases where the variable is NOT removed + * from the IR. + */ + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != ir_var_shader_out) { + continue; + } + + if (strcmp(var->name, "gl_FragDepth") == 0) { + switch (var->data.depth_layout) { + case ir_depth_layout_none: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; + return; + case ir_depth_layout_any: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; + return; + case ir_depth_layout_greater: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; + return; + case ir_depth_layout_less: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; + return; + case ir_depth_layout_unchanged: + prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; + return; + default: + assert(0); + return; + } + } + } +} + +/** + * Validate the resources used by a program versus the implementation limits + */ +static void +check_resources(struct gl_context *ctx, struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + if (sh->num_samplers > ctx->Const.Program[i].MaxTextureImageUnits) { + linker_error(prog, "Too many %s shader texture samplers\n", + _mesa_shader_stage_to_string(i)); + } + + if (sh->num_uniform_components > + ctx->Const.Program[i].MaxUniformComponents) { + if (ctx->Const.GLSLSkipStrictMaxUniformLimitCheck) { + linker_warning(prog, "Too many %s shader default uniform block " + "components, but the driver will try to optimize " + "them out; this is non-portable out-of-spec " + "behavior\n", + _mesa_shader_stage_to_string(i)); + } else { + linker_error(prog, "Too many %s shader default uniform block " + "components\n", + _mesa_shader_stage_to_string(i)); + } + } + + if (sh->num_combined_uniform_components > + ctx->Const.Program[i].MaxCombinedUniformComponents) { + if (ctx->Const.GLSLSkipStrictMaxUniformLimitCheck) { + linker_warning(prog, "Too many %s shader uniform components, " + "but the driver will try to optimize them out; " + "this is non-portable out-of-spec behavior\n", + _mesa_shader_stage_to_string(i)); + } else { + linker_error(prog, "Too many %s shader uniform components\n", + _mesa_shader_stage_to_string(i)); + } + } + } + + unsigned blocks[MESA_SHADER_STAGES] = {0}; + unsigned total_uniform_blocks = 0; + unsigned shader_blocks[MESA_SHADER_STAGES] = {0}; + unsigned total_shader_storage_blocks = 0; + + for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { + /* Don't check SSBOs for Uniform Block Size */ + if (!prog->BufferInterfaceBlocks[i].IsShaderStorage && + prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) { + linker_error(prog, "Uniform block %s too big (%d/%d)\n", + prog->BufferInterfaceBlocks[i].Name, + prog->BufferInterfaceBlocks[i].UniformBufferSize, + ctx->Const.MaxUniformBlockSize); + } + + if (prog->BufferInterfaceBlocks[i].IsShaderStorage && + prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) { + linker_error(prog, "Shader storage block %s too big (%d/%d)\n", + prog->BufferInterfaceBlocks[i].Name, + prog->BufferInterfaceBlocks[i].UniformBufferSize, + ctx->Const.MaxShaderStorageBlockSize); + } + + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + if (prog->InterfaceBlockStageIndex[j][i] != -1) { + struct gl_shader *sh = prog->_LinkedShaders[j]; + int stage_index = prog->InterfaceBlockStageIndex[j][i]; + if (sh && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) { + shader_blocks[j]++; + total_shader_storage_blocks++; + } else { + blocks[j]++; + total_uniform_blocks++; + } + } + } + + if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) { + linker_error(prog, "Too many combined uniform blocks (%d/%d)\n", + total_uniform_blocks, + ctx->Const.MaxCombinedUniformBlocks); + } else { + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + const unsigned max_uniform_blocks = + ctx->Const.Program[i].MaxUniformBlocks; + if (blocks[i] > max_uniform_blocks) { + linker_error(prog, "Too many %s uniform blocks (%d/%d)\n", + _mesa_shader_stage_to_string(i), + blocks[i], + max_uniform_blocks); + break; + } + } + } + + if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) { + linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n", + total_shader_storage_blocks, + ctx->Const.MaxCombinedShaderStorageBlocks); + } else { + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + const unsigned max_shader_storage_blocks = + ctx->Const.Program[i].MaxShaderStorageBlocks; + if (shader_blocks[i] > max_shader_storage_blocks) { + linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n", + _mesa_shader_stage_to_string(i), + shader_blocks[i], + max_shader_storage_blocks); + break; + } + } + } + } +} + +static void +link_calculate_subroutine_compat(struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + int count; + if (!sh) + continue; + + for (unsigned j = 0; j < sh->NumSubroutineUniformRemapTable; j++) { + struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[j]; + + if (!uni) + continue; + + count = 0; + for (unsigned f = 0; f < sh->NumSubroutineFunctions; f++) { + struct gl_subroutine_function *fn = &sh->SubroutineFunctions[f]; + for (int k = 0; k < fn->num_compat_types; k++) { + if (fn->types[k] == uni->type) { + count++; + break; + } + } + } + uni->num_compatible_subroutines = count; + } + } +} + +static void +check_subroutine_resources(struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh) { + if (sh->NumSubroutineUniformRemapTable > MAX_SUBROUTINE_UNIFORM_LOCATIONS) + linker_error(prog, "Too many %s shader subroutine uniforms\n", + _mesa_shader_stage_to_string(i)); + } + } +} +/** + * Validate shader image resources. + */ +static void +check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog) +{ + unsigned total_image_units = 0; + unsigned fragment_outputs = 0; + unsigned total_shader_storage_blocks = 0; + + if (!ctx->Extensions.ARB_shader_image_load_store) + return; + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh) { + if (sh->NumImages > ctx->Const.Program[i].MaxImageUniforms) + linker_error(prog, "Too many %s shader image uniforms (%u > %u)\n", + _mesa_shader_stage_to_string(i), sh->NumImages, + ctx->Const.Program[i].MaxImageUniforms); + + total_image_units += sh->NumImages; + + for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) { + int stage_index = prog->InterfaceBlockStageIndex[i][j]; + if (stage_index != -1 && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) + total_shader_storage_blocks++; + } + + if (i == MESA_SHADER_FRAGMENT) { + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + if (var && var->data.mode == ir_var_shader_out) + /* since there are no double fs outputs - pass false */ + fragment_outputs += var->type->count_attribute_slots(false); + } + } + } + } + + if (total_image_units > ctx->Const.MaxCombinedImageUniforms) + linker_error(prog, "Too many combined image uniforms\n"); + + if (total_image_units + fragment_outputs + total_shader_storage_blocks > + ctx->Const.MaxCombinedShaderOutputResources) + linker_error(prog, "Too many combined image uniforms, shader storage " + " buffers and fragment outputs\n"); +} + + +/** + * Initializes explicit location slots to INACTIVE_UNIFORM_EXPLICIT_LOCATION + * for a variable, checks for overlaps between other uniforms using explicit + * locations. + */ +static bool +reserve_explicit_locations(struct gl_shader_program *prog, + string_to_uint_map *map, ir_variable *var) +{ + unsigned slots = var->type->uniform_locations(); + unsigned max_loc = var->data.location + slots - 1; + + /* Resize remap table if locations do not fit in the current one. */ + if (max_loc + 1 > prog->NumUniformRemapTable) { + prog->UniformRemapTable = + reralloc(prog, prog->UniformRemapTable, + gl_uniform_storage *, + max_loc + 1); + + if (!prog->UniformRemapTable) { + linker_error(prog, "Out of memory during linking.\n"); + return false; + } + + /* Initialize allocated space. */ + for (unsigned i = prog->NumUniformRemapTable; i < max_loc + 1; i++) + prog->UniformRemapTable[i] = NULL; + + prog->NumUniformRemapTable = max_loc + 1; + } + + for (unsigned i = 0; i < slots; i++) { + unsigned loc = var->data.location + i; + + /* Check if location is already used. */ + if (prog->UniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) { + + /* Possibly same uniform from a different stage, this is ok. */ + unsigned hash_loc; + if (map->get(hash_loc, var->name) && hash_loc == loc - i) + continue; + + /* ARB_explicit_uniform_location specification states: + * + * "No two default-block uniform variables in the program can have + * the same location, even if they are unused, otherwise a compiler + * or linker error will be generated." + */ + linker_error(prog, + "location qualifier for uniform %s overlaps " + "previously used location\n", + var->name); + return false; + } + + /* Initialize location as inactive before optimization + * rounds and location assignment. + */ + prog->UniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION; + } + + /* Note, base location used for arrays. */ + map->put(var->data.location, var->name); + + return true; +} + +static bool +reserve_subroutine_explicit_locations(struct gl_shader_program *prog, + struct gl_shader *sh, + ir_variable *var) +{ + unsigned slots = var->type->uniform_locations(); + unsigned max_loc = var->data.location + slots - 1; + + /* Resize remap table if locations do not fit in the current one. */ + if (max_loc + 1 > sh->NumSubroutineUniformRemapTable) { + sh->SubroutineUniformRemapTable = + reralloc(sh, sh->SubroutineUniformRemapTable, + gl_uniform_storage *, + max_loc + 1); + + if (!sh->SubroutineUniformRemapTable) { + linker_error(prog, "Out of memory during linking.\n"); + return false; + } + + /* Initialize allocated space. */ + for (unsigned i = sh->NumSubroutineUniformRemapTable; i < max_loc + 1; i++) + sh->SubroutineUniformRemapTable[i] = NULL; + + sh->NumSubroutineUniformRemapTable = max_loc + 1; + } + + for (unsigned i = 0; i < slots; i++) { + unsigned loc = var->data.location + i; + + /* Check if location is already used. */ + if (sh->SubroutineUniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) { + + /* ARB_explicit_uniform_location specification states: + * "No two subroutine uniform variables can have the same location + * in the same shader stage, otherwise a compiler or linker error + * will be generated." + */ + linker_error(prog, + "location qualifier for uniform %s overlaps " + "previously used location\n", + var->name); + return false; + } + + /* Initialize location as inactive before optimization + * rounds and location assignment. + */ + sh->SubroutineUniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION; + } + + return true; +} +/** + * Check and reserve all explicit uniform locations, called before + * any optimizations happen to handle also inactive uniforms and + * inactive array elements that may get trimmed away. + */ +static void +check_explicit_uniform_locations(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + if (!ctx->Extensions.ARB_explicit_uniform_location) + return; + + /* This map is used to detect if overlapping explicit locations + * occur with the same uniform (from different stage) or a different one. + */ + string_to_uint_map *uniform_map = new string_to_uint_map; + + if (!uniform_map) { + linker_error(prog, "Out of memory during linking.\n"); + return; + } + + unsigned entries_total = 0; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = prog->_LinkedShaders[i]; + + if (!sh) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + if (!var || var->data.mode != ir_var_uniform) + continue; + + entries_total += var->type->uniform_locations(); + + if (var->data.explicit_location) { + bool ret; + if (var->type->without_array()->is_subroutine()) + ret = reserve_subroutine_explicit_locations(prog, sh, var); + else + ret = reserve_explicit_locations(prog, uniform_map, var); + if (!ret) { + delete uniform_map; + return; + } + } + } + } + + /* Verify that total amount of entries for explicit and implicit locations + * is less than MAX_UNIFORM_LOCATIONS. + */ + if (entries_total >= ctx->Const.MaxUserAssignableUniformLocations) { + linker_error(prog, "count of uniform locations >= MAX_UNIFORM_LOCATIONS" + "(%u >= %u)", entries_total, + ctx->Const.MaxUserAssignableUniformLocations); + } + delete uniform_map; +} + +static bool +should_add_buffer_variable(struct gl_shader_program *shProg, + GLenum type, const char *name) +{ + bool found_interface = false; + unsigned block_name_len = 0; + const char *block_name_dot = strchr(name, '.'); + + /* These rules only apply to buffer variables. So we return + * true for the rest of types. + */ + if (type != GL_BUFFER_VARIABLE) + return true; + + for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { + const char *block_name = shProg->BufferInterfaceBlocks[i].Name; + block_name_len = strlen(block_name); + + const char *block_square_bracket = strchr(block_name, '['); + if (block_square_bracket) { + /* The block is part of an array of named interfaces, + * for the name comparison we ignore the "[x]" part. + */ + block_name_len -= strlen(block_square_bracket); + } + + if (block_name_dot) { + /* Check if the variable name starts with the interface + * name. The interface name (if present) should have the + * length than the interface block name we are comparing to. + */ + unsigned len = strlen(name) - strlen(block_name_dot); + if (len != block_name_len) + continue; + } + + if (strncmp(block_name, name, block_name_len) == 0) { + found_interface = true; + break; + } + } + + /* We remove the interface name from the buffer variable name, + * including the dot that follows it. + */ + if (found_interface) + name = name + block_name_len + 1; + + /* From: ARB_program_interface_query extension: + * + * "For an active shader storage block member declared as an array, an + * entry will be generated only for the first array element, regardless + * of its type. For arrays of aggregate types, the enumeration rules are + * applied recursively for the single enumerated array element. + */ + const char *struct_first_dot = strchr(name, '.'); + const char *first_square_bracket = strchr(name, '['); + + /* The buffer variable is on top level and it is not an array */ + if (!first_square_bracket) { + return true; + /* The shader storage block member is a struct, then generate the entry */ + } else if (struct_first_dot && struct_first_dot < first_square_bracket) { + return true; + } else { + /* Shader storage block member is an array, only generate an entry for the + * first array element. + */ + if (strncmp(first_square_bracket, "[0]", 3) == 0) + return true; + } + + return false; +} + +static bool +add_program_resource(struct gl_shader_program *prog, GLenum type, + const void *data, uint8_t stages) +{ + assert(data); + + /* If resource already exists, do not add it again. */ + for (unsigned i = 0; i < prog->NumProgramResourceList; i++) + if (prog->ProgramResourceList[i].Data == data) + return true; + + prog->ProgramResourceList = + reralloc(prog, + prog->ProgramResourceList, + gl_program_resource, + prog->NumProgramResourceList + 1); + + if (!prog->ProgramResourceList) { + linker_error(prog, "Out of memory during linking.\n"); + return false; + } + + struct gl_program_resource *res = + &prog->ProgramResourceList[prog->NumProgramResourceList]; + + res->Type = type; + res->Data = data; + res->StageReferences = stages; + + prog->NumProgramResourceList++; + + return true; +} + +/* Function checks if a variable var is a packed varying and + * if given name is part of packed varying's list. + * + * If a variable is a packed varying, it has a name like + * 'packed:a,b,c' where a, b and c are separate variables. + */ +static bool +included_in_packed_varying(ir_variable *var, const char *name) +{ + if (strncmp(var->name, "packed:", 7) != 0) + return false; + + char *list = strdup(var->name + 7); + assert(list); + + bool found = false; + char *saveptr; + char *token = strtok_r(list, ",", &saveptr); + while (token) { + if (strcmp(token, name) == 0) { + found = true; + break; + } + token = strtok_r(NULL, ",", &saveptr); + } + free(list); + return found; +} + +/** + * Function builds a stage reference bitmask from variable name. + */ +static uint8_t +build_stageref(struct gl_shader_program *shProg, const char *name, + unsigned mode) +{ + uint8_t stages = 0; + + /* Note, that we assume MAX 8 stages, if there will be more stages, type + * used for reference mask in gl_program_resource will need to be changed. + */ + assert(MESA_SHADER_STAGES < 8); + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = shProg->_LinkedShaders[i]; + if (!sh) + continue; + + /* Shader symbol table may contain variables that have + * been optimized away. Search IR for the variable instead. + */ + foreach_in_list(ir_instruction, node, sh->ir) { + ir_variable *var = node->as_variable(); + if (var) { + unsigned baselen = strlen(var->name); + + if (included_in_packed_varying(var, name)) { + stages |= (1 << i); + break; + } + + /* Type needs to match if specified, otherwise we might + * pick a variable with same name but different interface. + */ + if (var->data.mode != mode) + continue; + + if (strncmp(var->name, name, baselen) == 0) { + /* Check for exact name matches but also check for arrays and + * structs. + */ + if (name[baselen] == '\0' || + name[baselen] == '[' || + name[baselen] == '.') { + stages |= (1 << i); + break; + } + } + } + } + } + return stages; +} + +/** + * Create gl_shader_variable from ir_variable class. + */ +static gl_shader_variable * +create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in) +{ + gl_shader_variable *out = ralloc(shProg, struct gl_shader_variable); + if (!out) + return NULL; + + out->type = in->type; + out->name = ralloc_strdup(shProg, in->name); + + if (!out->name) + return NULL; + + out->location = in->data.location; + out->index = in->data.index; + out->patch = in->data.patch; + out->mode = in->data.mode; + + return out; +} + +static bool +add_interface_variables(struct gl_shader_program *shProg, + exec_list *ir, GLenum programInterface) +{ + foreach_in_list(ir_instruction, node, ir) { + ir_variable *var = node->as_variable(); + uint8_t mask = 0; + + if (!var) + continue; + + switch (var->data.mode) { + /* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes): + * "For GetActiveAttrib, all active vertex shader input variables + * are enumerated, including the special built-in inputs gl_VertexID + * and gl_InstanceID." + */ + case ir_var_system_value: + if (var->data.location != SYSTEM_VALUE_VERTEX_ID && + var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE && + var->data.location != SYSTEM_VALUE_INSTANCE_ID) + continue; + /* Mark special built-in inputs referenced by the vertex stage so + * that they are considered active by the shader queries. + */ + mask = (1 << (MESA_SHADER_VERTEX)); + /* FALLTHROUGH */ + case ir_var_shader_in: + if (programInterface != GL_PROGRAM_INPUT) + continue; + break; + case ir_var_shader_out: + if (programInterface != GL_PROGRAM_OUTPUT) + continue; + break; + default: + continue; + }; + + /* Skip packed varyings, packed varyings are handled separately + * by add_packed_varyings. + */ + if (strncmp(var->name, "packed:", 7) == 0) + continue; + + /* Skip fragdata arrays, these are handled separately + * by add_fragdata_arrays. + */ + if (strncmp(var->name, "gl_out_FragData", 15) == 0) + continue; + + gl_shader_variable *sha_v = create_shader_variable(shProg, var); + if (!sha_v) + return false; + + if (!add_program_resource(shProg, programInterface, sha_v, + build_stageref(shProg, sha_v->name, + sha_v->mode) | mask)) + return false; + } + return true; +} + +static bool +add_packed_varyings(struct gl_shader_program *shProg, int stage, GLenum type) +{ + struct gl_shader *sh = shProg->_LinkedShaders[stage]; + GLenum iface; + + if (!sh || !sh->packed_varyings) + return true; + + foreach_in_list(ir_instruction, node, sh->packed_varyings) { + ir_variable *var = node->as_variable(); + if (var) { + switch (var->data.mode) { + case ir_var_shader_in: + iface = GL_PROGRAM_INPUT; + break; + case ir_var_shader_out: + iface = GL_PROGRAM_OUTPUT; + break; + default: + unreachable("unexpected type"); + } + + if (type == iface) { + gl_shader_variable *sha_v = create_shader_variable(shProg, var); + if (!sha_v) + return false; + if (!add_program_resource(shProg, iface, sha_v, + build_stageref(shProg, sha_v->name, + sha_v->mode))) + return false; + } + } + } + return true; +} + +static bool +add_fragdata_arrays(struct gl_shader_program *shProg) +{ + struct gl_shader *sh = shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + if (!sh || !sh->fragdata_arrays) + return true; + + foreach_in_list(ir_instruction, node, sh->fragdata_arrays) { + ir_variable *var = node->as_variable(); + if (var) { + assert(var->data.mode == ir_var_shader_out); + gl_shader_variable *sha_v = create_shader_variable(shProg, var); + if (!sha_v) + return false; + if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, sha_v, + 1 << MESA_SHADER_FRAGMENT)) + return false; + } + } + return true; +} + +static char* +get_top_level_name(const char *name) +{ + const char *first_dot = strchr(name, '.'); + const char *first_square_bracket = strchr(name, '['); + int name_size = 0; + /* From ARB_program_interface_query spec: + * + * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the + * number of active array elements of the top-level shader storage block + * member containing to the active variable is written to . If the + * top-level block member is not declared as an array, the value one is + * written to . If the top-level block member is an array with no + * declared size, the value zero is written to . + */ + + /* The buffer variable is on top level.*/ + if (!first_square_bracket && !first_dot) + name_size = strlen(name); + else if ((!first_square_bracket || + (first_dot && first_dot < first_square_bracket))) + name_size = first_dot - name; + else + name_size = first_square_bracket - name; + + return strndup(name, name_size); +} + +static char* +get_var_name(const char *name) +{ + const char *first_dot = strchr(name, '.'); + + if (!first_dot) + return strdup(name); + + return strndup(first_dot+1, strlen(first_dot) - 1); +} + +static bool +is_top_level_shader_storage_block_member(const char* name, + const char* interface_name, + const char* field_name) +{ + bool result = false; + + /* If the given variable is already a top-level shader storage + * block member, then return array_size = 1. + * We could have two possibilities: if we have an instanced + * shader storage block or not instanced. + * + * For the first, we check create a name as it was in top level and + * compare it with the real name. If they are the same, then + * the variable is already at top-level. + * + * Full instanced name is: interface name + '.' + var name + + * NULL character + */ + int name_length = strlen(interface_name) + 1 + strlen(field_name) + 1; + char *full_instanced_name = (char *) calloc(name_length, sizeof(char)); + if (!full_instanced_name) { + fprintf(stderr, "%s: Cannot allocate space for name\n", __func__); + return false; + } + + snprintf(full_instanced_name, name_length, "%s.%s", + interface_name, field_name); + + /* Check if its top-level shader storage block member of an + * instanced interface block, or of a unnamed interface block. + */ + if (strcmp(name, full_instanced_name) == 0 || + strcmp(name, field_name) == 0) + result = true; + + free(full_instanced_name); + return result; +} + +static int +get_array_size(struct gl_uniform_storage *uni, const glsl_struct_field *field, + char *interface_name, char *var_name) +{ + /* From GL_ARB_program_interface_query spec: + * + * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer + * identifying the number of active array elements of the top-level + * shader storage block member containing to the active variable is + * written to . If the top-level block member is not + * declared as an array, the value one is written to . If + * the top-level block member is an array with no declared size, + * the value zero is written to . + */ + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) + return 1; + else if (field->type->is_unsized_array()) + return 0; + else if (field->type->is_array()) + return field->type->length; + + return 1; +} + +static int +get_array_stride(struct gl_uniform_storage *uni, const glsl_type *interface, + const glsl_struct_field *field, char *interface_name, + char *var_name) +{ + /* From GL_ARB_program_interface_query: + * + * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer + * identifying the stride between array elements of the top-level + * shader storage block member containing the active variable is + * written to . For top-level block members declared as + * arrays, the value written is the difference, in basic machine + * units, between the offsets of the active variable for + * consecutive elements in the top-level array. For top-level + * block members not declared as an array, zero is written to + * ." + */ + if (field->type->is_array()) { + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(field->matrix_layout); + bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; + const glsl_type *array_type = field->type->fields.array; + + if (is_top_level_shader_storage_block_member(uni->name, + interface_name, + var_name)) + return 0; + + if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { + if (array_type->is_record() || array_type->is_array()) + return glsl_align(array_type->std140_size(row_major), 16); + else + return MAX2(array_type->std140_base_alignment(row_major), 16); + } else { + return array_type->std430_array_stride(row_major); + } + } + return 0; +} + +static void +calculate_array_size_and_stride(struct gl_shader_program *shProg, + struct gl_uniform_storage *uni) +{ + int block_index = uni->block_index; + int array_size = -1; + int array_stride = -1; + char *var_name = get_top_level_name(uni->name); + char *interface_name = + get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name); + + if (strcmp(var_name, interface_name) == 0) { + /* Deal with instanced array of SSBOs */ + char *temp_name = get_var_name(uni->name); + if (!temp_name) { + linker_error(shProg, "Out of memory during linking.\n"); + goto write_top_level_array_size_and_stride; + } + free(var_name); + var_name = get_top_level_name(temp_name); + free(temp_name); + if (!var_name) { + linker_error(shProg, "Out of memory during linking.\n"); + goto write_top_level_array_size_and_stride; + } + } + + for (unsigned i = 0; i < shProg->NumShaders; i++) { + if (shProg->Shaders[i] == NULL) + continue; + + const gl_shader *stage = shProg->Shaders[i]; + foreach_in_list(ir_instruction, node, stage->ir) { + ir_variable *var = node->as_variable(); + if (!var || !var->get_interface_type() || + var->data.mode != ir_var_shader_storage) + continue; + + const glsl_type *interface = var->get_interface_type(); + + if (strcmp(interface_name, interface->name) != 0) + continue; + + for (unsigned i = 0; i < interface->length; i++) { + const glsl_struct_field *field = &interface->fields.structure[i]; + if (strcmp(field->name, var_name) != 0) + continue; + + array_stride = get_array_stride(uni, interface, field, + interface_name, var_name); + array_size = get_array_size(uni, field, interface_name, var_name); + goto write_top_level_array_size_and_stride; + } + } + } +write_top_level_array_size_and_stride: + free(interface_name); + free(var_name); + uni->top_level_array_stride = array_stride; + uni->top_level_array_size = array_size; +} + +/** + * Builds up a list of program resources that point to existing + * resource data. + */ +void +build_program_resource_list(struct gl_shader_program *shProg) +{ + /* Rebuild resource list. */ + if (shProg->ProgramResourceList) { + ralloc_free(shProg->ProgramResourceList); + shProg->ProgramResourceList = NULL; + shProg->NumProgramResourceList = 0; + } + + int input_stage = MESA_SHADER_STAGES, output_stage = 0; + + /* Determine first input and final output stage. These are used to + * detect which variables should be enumerated in the resource list + * for GL_PROGRAM_INPUT and GL_PROGRAM_OUTPUT. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!shProg->_LinkedShaders[i]) + continue; + if (input_stage == MESA_SHADER_STAGES) + input_stage = i; + output_stage = i; + } + + /* Empty shader, no resources. */ + if (input_stage == MESA_SHADER_STAGES && output_stage == 0) + return; + + /* Program interface needs to expose varyings in case of SSO. */ + if (shProg->SeparateShader) { + if (!add_packed_varyings(shProg, input_stage, GL_PROGRAM_INPUT)) + return; + + if (!add_packed_varyings(shProg, output_stage, GL_PROGRAM_OUTPUT)) + return; + } + + if (!add_fragdata_arrays(shProg)) + return; + + /* Add inputs and outputs to the resource list. */ + if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir, + GL_PROGRAM_INPUT)) + return; + + if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage]->ir, + GL_PROGRAM_OUTPUT)) + return; + + /* Add transform feedback varyings. */ + if (shProg->LinkedTransformFeedback.NumVarying > 0) { + for (int i = 0; i < shProg->LinkedTransformFeedback.NumVarying; i++) { + if (!add_program_resource(shProg, GL_TRANSFORM_FEEDBACK_VARYING, + &shProg->LinkedTransformFeedback.Varyings[i], + 0)) + return; + } + } + + /* Add uniforms from uniform storage. */ + for (unsigned i = 0; i < shProg->NumUniformStorage; i++) { + /* Do not add uniforms internally used by Mesa. */ + if (shProg->UniformStorage[i].hidden) + continue; + + uint8_t stageref = + build_stageref(shProg, shProg->UniformStorage[i].name, + ir_var_uniform); + + /* Add stagereferences for uniforms in a uniform block. */ + int block_index = shProg->UniformStorage[i].block_index; + if (block_index != -1) { + for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { + if (shProg->InterfaceBlockStageIndex[j][block_index] != -1) + stageref |= (1 << j); + } + } + + bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage; + GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM; + if (!should_add_buffer_variable(shProg, type, + shProg->UniformStorage[i].name)) + continue; + + if (is_shader_storage) { + calculate_array_size_and_stride(shProg, &shProg->UniformStorage[i]); + } + + if (!add_program_resource(shProg, type, + &shProg->UniformStorage[i], stageref)) + return; + } + + /* Add program uniform blocks and shader storage blocks. */ + for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { + bool is_shader_storage = shProg->BufferInterfaceBlocks[i].IsShaderStorage; + GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK; + if (!add_program_resource(shProg, type, + &shProg->BufferInterfaceBlocks[i], 0)) + return; + } + + /* Add atomic counter buffers. */ + for (unsigned i = 0; i < shProg->NumAtomicBuffers; i++) { + if (!add_program_resource(shProg, GL_ATOMIC_COUNTER_BUFFER, + &shProg->AtomicBuffers[i], 0)) + return; + } + + for (unsigned i = 0; i < shProg->NumUniformStorage; i++) { + GLenum type; + if (!shProg->UniformStorage[i].hidden) + continue; + + for (int j = MESA_SHADER_VERTEX; j < MESA_SHADER_STAGES; j++) { + if (!shProg->UniformStorage[i].opaque[j].active || + !shProg->UniformStorage[i].type->is_subroutine()) + continue; + + type = _mesa_shader_stage_to_subroutine_uniform((gl_shader_stage)j); + /* add shader subroutines */ + if (!add_program_resource(shProg, type, &shProg->UniformStorage[i], 0)) + return; + } + } + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + struct gl_shader *sh = shProg->_LinkedShaders[i]; + GLuint type; + + if (!sh) + continue; + + type = _mesa_shader_stage_to_subroutine((gl_shader_stage)i); + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + if (!add_program_resource(shProg, type, &sh->SubroutineFunctions[j], 0)) + return; + } + } +} + +/** + * This check is done to make sure we allow only constant expression + * indexing and "constant-index-expression" (indexing with an expression + * that includes loop induction variable). + */ +static bool +validate_sampler_array_indexing(struct gl_context *ctx, + struct gl_shader_program *prog) +{ + dynamic_sampler_array_indexing_visitor v; + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + bool no_dynamic_indexing = + ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler; + + /* Search for array derefs in shader. */ + v.run(prog->_LinkedShaders[i]->ir); + if (v.uses_dynamic_sampler_array_indexing()) { + const char *msg = "sampler arrays indexed with non-constant " + "expressions is forbidden in GLSL %s %u"; + /* Backend has indicated that it has no dynamic indexing support. */ + if (no_dynamic_indexing) { + linker_error(prog, msg, prog->IsES ? "ES" : "", prog->Version); + return false; + } else { + linker_warning(prog, msg, prog->IsES ? "ES" : "", prog->Version); + } + } + } + return true; +} + +static void +link_assign_subroutine_types(struct gl_shader_program *prog) +{ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + gl_shader *sh = prog->_LinkedShaders[i]; + + if (sh == NULL) + continue; + + foreach_in_list(ir_instruction, node, sh->ir) { + ir_function *fn = node->as_function(); + if (!fn) + continue; + + if (fn->is_subroutine) + sh->NumSubroutineUniformTypes++; + + if (!fn->num_subroutine_types) + continue; + + sh->SubroutineFunctions = reralloc(sh, sh->SubroutineFunctions, + struct gl_subroutine_function, + sh->NumSubroutineFunctions + 1); + sh->SubroutineFunctions[sh->NumSubroutineFunctions].name = ralloc_strdup(sh, fn->name); + sh->SubroutineFunctions[sh->NumSubroutineFunctions].num_compat_types = fn->num_subroutine_types; + sh->SubroutineFunctions[sh->NumSubroutineFunctions].types = + ralloc_array(sh, const struct glsl_type *, + fn->num_subroutine_types); + + /* From Section 4.4.4(Subroutine Function Layout Qualifiers) of the + * GLSL 4.5 spec: + * + * "Each subroutine with an index qualifier in the shader must be + * given a unique index, otherwise a compile or link error will be + * generated." + */ + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + if (sh->SubroutineFunctions[j].index != -1 && + sh->SubroutineFunctions[j].index == fn->subroutine_index) { + linker_error(prog, "each subroutine index qualifier in the " + "shader must be unique\n"); + return; + } + } + sh->SubroutineFunctions[sh->NumSubroutineFunctions].index = + fn->subroutine_index; + + for (int j = 0; j < fn->num_subroutine_types; j++) + sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j]; + sh->NumSubroutineFunctions++; + } + + /* Assign index for subroutines without an explicit index*/ + int index = 0; + for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { + while (sh->SubroutineFunctions[j].index == -1) { + for (unsigned k = 0; k < sh->NumSubroutineFunctions; k++) { + if (sh->SubroutineFunctions[k].index == index) + break; + else if (k == sh->NumSubroutineFunctions - 1) + sh->SubroutineFunctions[j].index = index; + } + index++; + } + } + } +} + +static void +split_ubos_and_ssbos(void *mem_ctx, + struct gl_uniform_block *blocks, + unsigned num_blocks, + struct gl_uniform_block ***ubos, + unsigned *num_ubos, + unsigned **ubo_interface_block_indices, + struct gl_uniform_block ***ssbos, + unsigned *num_ssbos, + unsigned **ssbo_interface_block_indices) +{ + unsigned num_ubo_blocks = 0; + unsigned num_ssbo_blocks = 0; + + for (unsigned i = 0; i < num_blocks; i++) { + if (blocks[i].IsShaderStorage) + num_ssbo_blocks++; + else + num_ubo_blocks++; + } + + *ubos = ralloc_array(mem_ctx, gl_uniform_block *, num_ubo_blocks); + *num_ubos = 0; + + *ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks); + *num_ssbos = 0; + + if (ubo_interface_block_indices) + *ubo_interface_block_indices = + ralloc_array(mem_ctx, unsigned, num_ubo_blocks); + + if (ssbo_interface_block_indices) + *ssbo_interface_block_indices = + ralloc_array(mem_ctx, unsigned, num_ssbo_blocks); + + for (unsigned i = 0; i < num_blocks; i++) { + if (blocks[i].IsShaderStorage) { + (*ssbos)[*num_ssbos] = &blocks[i]; + if (ssbo_interface_block_indices) + (*ssbo_interface_block_indices)[*num_ssbos] = i; + (*num_ssbos)++; + } else { + (*ubos)[*num_ubos] = &blocks[i]; + if (ubo_interface_block_indices) + (*ubo_interface_block_indices)[*num_ubos] = i; + (*num_ubos)++; + } + } + + assert(*num_ubos + *num_ssbos == num_blocks); +} + +static void +set_always_active_io(exec_list *ir, ir_variable_mode io_mode) +{ + assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); + + foreach_in_list(ir_instruction, node, ir) { + ir_variable *const var = node->as_variable(); + + if (var == NULL || var->data.mode != io_mode) + continue; + + /* Don't set always active on builtins that haven't been redeclared */ + if (var->data.how_declared == ir_var_declared_implicitly) + continue; + + var->data.always_active_io = true; + } +} + +/** + * When separate shader programs are enabled, only input/outputs between + * the stages of a multi-stage separate program can be safely removed + * from the shader interface. Other inputs/outputs must remain active. + */ +static void +disable_varying_optimizations_for_sso(struct gl_shader_program *prog) +{ + unsigned first, last; + assert(prog->SeparateShader); + + first = MESA_SHADER_STAGES; + last = 0; + + /* Determine first and last stage. Excluding the compute stage */ + for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { + if (!prog->_LinkedShaders[i]) + continue; + if (first == MESA_SHADER_STAGES) + first = i; + last = i; + } + + if (first == MESA_SHADER_STAGES) + return; + + for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) { + gl_shader *sh = prog->_LinkedShaders[stage]; + if (!sh) + continue; + + if (first == last) { + /* For a single shader program only allow inputs to the vertex shader + * and outputs from the fragment shader to be removed. + */ + if (stage != MESA_SHADER_VERTEX) + set_always_active_io(sh->ir, ir_var_shader_in); + if (stage != MESA_SHADER_FRAGMENT) + set_always_active_io(sh->ir, ir_var_shader_out); + } else { + /* For multi-stage separate shader programs only allow inputs and + * outputs between the shader stages to be removed as well as inputs + * to the vertex shader and outputs from the fragment shader. + */ + if (stage == first && stage != MESA_SHADER_VERTEX) + set_always_active_io(sh->ir, ir_var_shader_in); + else if (stage == last && stage != MESA_SHADER_FRAGMENT) + set_always_active_io(sh->ir, ir_var_shader_out); + } + } +} + +void +link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) +{ + tfeedback_decl *tfeedback_decls = NULL; + unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying; + + void *mem_ctx = ralloc_context(NULL); // temporary linker context + + prog->LinkStatus = true; /* All error paths will set this to false */ + prog->Validated = false; + prog->_Used = false; + + prog->ARB_fragment_coord_conventions_enable = false; + + /* Separate the shaders into groups based on their type. + */ + struct gl_shader **shader_list[MESA_SHADER_STAGES]; + unsigned num_shaders[MESA_SHADER_STAGES]; + + for (int i = 0; i < MESA_SHADER_STAGES; i++) { + shader_list[i] = (struct gl_shader **) + calloc(prog->NumShaders, sizeof(struct gl_shader *)); + num_shaders[i] = 0; + } + + unsigned min_version = UINT_MAX; + unsigned max_version = 0; + const bool is_es_prog = + (prog->NumShaders > 0 && prog->Shaders[0]->IsES) ? true : false; + for (unsigned i = 0; i < prog->NumShaders; i++) { + min_version = MIN2(min_version, prog->Shaders[i]->Version); + max_version = MAX2(max_version, prog->Shaders[i]->Version); + + if (prog->Shaders[i]->IsES != is_es_prog) { + linker_error(prog, "all shaders must use same shading " + "language version\n"); + goto done; + } + + if (prog->Shaders[i]->ARB_fragment_coord_conventions_enable) { + prog->ARB_fragment_coord_conventions_enable = true; + } + + gl_shader_stage shader_type = prog->Shaders[i]->Stage; + shader_list[shader_type][num_shaders[shader_type]] = prog->Shaders[i]; + num_shaders[shader_type]++; + } + + /* In desktop GLSL, different shader versions may be linked together. In + * GLSL ES, all shader versions must be the same. + */ + if (is_es_prog && min_version != max_version) { + linker_error(prog, "all shaders must use same shading " + "language version\n"); + goto done; + } + + prog->Version = max_version; + prog->IsES = is_es_prog; + + /* From OpenGL 4.5 Core specification (7.3 Program Objects): + * "Linking can fail for a variety of reasons as specified in the OpenGL + * Shading Language Specification, as well as any of the following + * reasons: + * + * * No shader objects are attached to program. + * + * ..." + * + * Same rule applies for OpenGL ES >= 3.1. + */ + + if (prog->NumShaders == 0 && + ((ctx->API == API_OPENGL_CORE && ctx->Version >= 45) || + (ctx->API == API_OPENGLES2 && ctx->Version >= 31))) { + linker_error(prog, "No shader objects are attached to program.\n"); + goto done; + } + + /* Some shaders have to be linked with some other shaders present. + */ + if (num_shaders[MESA_SHADER_GEOMETRY] > 0 && + num_shaders[MESA_SHADER_VERTEX] == 0 && + !prog->SeparateShader) { + linker_error(prog, "Geometry shader must be linked with " + "vertex shader\n"); + goto done; + } + if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 && + num_shaders[MESA_SHADER_VERTEX] == 0 && + !prog->SeparateShader) { + linker_error(prog, "Tessellation evaluation shader must be linked with " + "vertex shader\n"); + goto done; + } + if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && + num_shaders[MESA_SHADER_VERTEX] == 0 && + !prog->SeparateShader) { + linker_error(prog, "Tessellation control shader must be linked with " + "vertex shader\n"); + goto done; + } + + /* The spec is self-contradictory here. It allows linking without a tess + * eval shader, but that can only be used with transform feedback and + * rasterization disabled. However, transform feedback isn't allowed + * with GL_PATCHES, so it can't be used. + * + * More investigation showed that the idea of transform feedback after + * a tess control shader was dropped, because some hw vendors couldn't + * support tessellation without a tess eval shader, but the linker section + * wasn't updated to reflect that. + * + * All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this + * spec bug. + * + * Do what's reasonable and always require a tess eval shader if a tess + * control shader is present. + */ + if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && + num_shaders[MESA_SHADER_TESS_EVAL] == 0 && + !prog->SeparateShader) { + linker_error(prog, "Tessellation control shader must be linked with " + "tessellation evaluation shader\n"); + goto done; + } + + /* Compute shaders have additional restrictions. */ + if (num_shaders[MESA_SHADER_COMPUTE] > 0 && + num_shaders[MESA_SHADER_COMPUTE] != prog->NumShaders) { + linker_error(prog, "Compute shaders may not be linked with any other " + "type of shader\n"); + } + + for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] != NULL) + _mesa_delete_shader(ctx, prog->_LinkedShaders[i]); + + prog->_LinkedShaders[i] = NULL; + } + + /* Link all shaders for a particular stage and validate the result. + */ + for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { + if (num_shaders[stage] > 0) { + gl_shader *const sh = + link_intrastage_shaders(mem_ctx, ctx, prog, shader_list[stage], + num_shaders[stage]); + + if (!prog->LinkStatus) { + if (sh) + _mesa_delete_shader(ctx, sh); + goto done; + } + + switch (stage) { + case MESA_SHADER_VERTEX: + validate_vertex_shader_executable(prog, sh); + break; + case MESA_SHADER_TESS_CTRL: + /* nothing to be done */ + break; + case MESA_SHADER_TESS_EVAL: + validate_tess_eval_shader_executable(prog, sh); + break; + case MESA_SHADER_GEOMETRY: + validate_geometry_shader_executable(prog, sh); + break; + case MESA_SHADER_FRAGMENT: + validate_fragment_shader_executable(prog, sh); + break; + } + if (!prog->LinkStatus) { + if (sh) + _mesa_delete_shader(ctx, sh); + goto done; + } + + _mesa_reference_shader(ctx, &prog->_LinkedShaders[stage], sh); + } + } + + if (num_shaders[MESA_SHADER_GEOMETRY] > 0) + prog->LastClipDistanceArraySize = prog->Geom.ClipDistanceArraySize; + else if (num_shaders[MESA_SHADER_TESS_EVAL] > 0) + prog->LastClipDistanceArraySize = prog->TessEval.ClipDistanceArraySize; + else if (num_shaders[MESA_SHADER_VERTEX] > 0) + prog->LastClipDistanceArraySize = prog->Vert.ClipDistanceArraySize; + else + prog->LastClipDistanceArraySize = 0; /* Not used */ + + /* Here begins the inter-stage linking phase. Some initial validation is + * performed, then locations are assigned for uniforms, attributes, and + * varyings. + */ + cross_validate_uniforms(prog); + if (!prog->LinkStatus) + goto done; + + unsigned first, last, prev; + + first = MESA_SHADER_STAGES; + last = 0; + + /* Determine first and last stage. */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (!prog->_LinkedShaders[i]) + continue; + if (first == MESA_SHADER_STAGES) + first = i; + last = i; + } + + check_explicit_uniform_locations(ctx, prog); + link_assign_subroutine_types(prog); + + if (!prog->LinkStatus) + goto done; + + resize_tes_inputs(ctx, prog); + + /* Validate the inputs of each stage with the output of the preceding + * stage. + */ + prev = first; + for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + validate_interstage_inout_blocks(prog, prog->_LinkedShaders[prev], + prog->_LinkedShaders[i]); + if (!prog->LinkStatus) + goto done; + + cross_validate_outputs_to_inputs(prog, + prog->_LinkedShaders[prev], + prog->_LinkedShaders[i]); + if (!prog->LinkStatus) + goto done; + + prev = i; + } + + /* Cross-validate uniform blocks between shader stages */ + validate_interstage_uniform_blocks(prog, prog->_LinkedShaders, + MESA_SHADER_STAGES); + if (!prog->LinkStatus) + goto done; + + for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] != NULL) + lower_named_interface_blocks(mem_ctx, prog->_LinkedShaders[i]); + } + + /* Implement the GLSL 1.30+ rule for discard vs infinite loops Do + * it before optimization because we want most of the checks to get + * dropped thanks to constant propagation. + * + * This rule also applies to GLSL ES 3.00. + */ + if (max_version >= (is_es_prog ? 300 : 130)) { + struct gl_shader *sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + if (sh) { + lower_discard_flow(sh->ir); + } + } + + if (prog->SeparateShader) + disable_varying_optimizations_for_sso(prog); + + if (!interstage_cross_validate_uniform_blocks(prog)) + goto done; + + /* Do common optimization before assigning storage for attributes, + * uniforms, and varyings. Later optimization could possibly make + * some of that unused. + */ + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + detect_recursion_linked(prog, prog->_LinkedShaders[i]->ir); + if (!prog->LinkStatus) + goto done; + + if (ctx->Const.ShaderCompilerOptions[i].LowerClipDistance) { + lower_clip_distance(prog->_LinkedShaders[i]); + } + + if (ctx->Const.LowerTessLevel) { + lower_tess_level(prog->_LinkedShaders[i]); + } + + while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, false, + &ctx->Const.ShaderCompilerOptions[i], + ctx->Const.NativeIntegers)) + ; + + lower_const_arrays_to_uniforms(prog->_LinkedShaders[i]->ir); + } + + /* Validation for special cases where we allow sampler array indexing + * with loop induction variable. This check emits a warning or error + * depending if backend can handle dynamic indexing. + */ + if ((!prog->IsES && prog->Version < 130) || + (prog->IsES && prog->Version < 300)) { + if (!validate_sampler_array_indexing(ctx, prog)) + goto done; + } + + /* Check and validate stream emissions in geometry shaders */ + validate_geometry_shader_emissions(ctx, prog); + + /* Mark all generic shader inputs and outputs as unpaired. */ + for (unsigned i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { + if (prog->_LinkedShaders[i] != NULL) { + link_invalidate_variable_locations(prog->_LinkedShaders[i]->ir); + } + } + + prev = first; + for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + match_explicit_outputs_to_inputs(prog, prog->_LinkedShaders[prev], + prog->_LinkedShaders[i]); + prev = i; + } + + if (!assign_attribute_or_color_locations(prog, &ctx->Const, + MESA_SHADER_VERTEX)) { + goto done; + } + + if (!assign_attribute_or_color_locations(prog, &ctx->Const, + MESA_SHADER_FRAGMENT)) { + goto done; + } + + if (num_tfeedback_decls != 0) { + /* From GL_EXT_transform_feedback: + * A program will fail to link if: + * + * * the specified by TransformFeedbackVaryingsEXT is + * non-zero, but the program object has no vertex or geometry + * shader; + */ + if (first == MESA_SHADER_FRAGMENT) { + linker_error(prog, "Transform feedback varyings specified, but " + "no vertex or geometry shader is present.\n"); + goto done; + } + + tfeedback_decls = ralloc_array(mem_ctx, tfeedback_decl, + prog->TransformFeedback.NumVarying); + if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls, + prog->TransformFeedback.VaryingNames, + tfeedback_decls)) + goto done; + } + + /* Linking the stages in the opposite order (from fragment to vertex) + * ensures that inter-shader outputs written to in an earlier stage are + * eliminated if they are (transitively) not used in a later stage. + */ + int next; + + if (first < MESA_SHADER_FRAGMENT) { + gl_shader *const sh = prog->_LinkedShaders[last]; + + if (first != MESA_SHADER_VERTEX) { + /* There was no vertex shader, but we still have to assign varying + * locations for use by tessellation/geometry shader inputs in SSO. + * + * If the shader is not separable (i.e., prog->SeparateShader is + * false), linking will have already failed when first is not + * MESA_SHADER_VERTEX. + */ + if (!assign_varying_locations(ctx, mem_ctx, prog, + NULL, prog->_LinkedShaders[first], + num_tfeedback_decls, tfeedback_decls)) + goto done; + } + + if (last != MESA_SHADER_FRAGMENT && + (num_tfeedback_decls != 0 || prog->SeparateShader)) { + /* There was no fragment shader, but we still have to assign varying + * locations for use by transform feedback. + */ + if (!assign_varying_locations(ctx, mem_ctx, prog, + sh, NULL, + num_tfeedback_decls, tfeedback_decls)) + goto done; + } + + do_dead_builtin_varyings(ctx, sh, NULL, + num_tfeedback_decls, tfeedback_decls); + + remove_unused_shader_inputs_and_outputs(prog->SeparateShader, sh, + ir_var_shader_out); + } + else if (first == MESA_SHADER_FRAGMENT) { + /* If the program only contains a fragment shader... + */ + gl_shader *const sh = prog->_LinkedShaders[first]; + + do_dead_builtin_varyings(ctx, NULL, sh, + num_tfeedback_decls, tfeedback_decls); + + if (prog->SeparateShader) { + if (!assign_varying_locations(ctx, mem_ctx, prog, + NULL /* producer */, + sh /* consumer */, + 0 /* num_tfeedback_decls */, + NULL /* tfeedback_decls */)) + goto done; + } else { + remove_unused_shader_inputs_and_outputs(false, sh, + ir_var_shader_in); + } + } + + next = last; + for (int i = next - 1; i >= 0; i--) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + gl_shader *const sh_i = prog->_LinkedShaders[i]; + gl_shader *const sh_next = prog->_LinkedShaders[next]; + + if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, + next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, + tfeedback_decls)) + goto done; + + do_dead_builtin_varyings(ctx, sh_i, sh_next, + next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, + tfeedback_decls); + + /* This must be done after all dead varyings are eliminated. */ + if (!check_against_output_limit(ctx, prog, sh_i)) + goto done; + if (!check_against_input_limit(ctx, prog, sh_next)) + goto done; + + next = i; + } + + if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls)) + goto done; + + update_array_sizes(prog); + link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue); + link_assign_atomic_counter_resources(ctx, prog); + store_fragdepth_layout(prog); + + link_calculate_subroutine_compat(prog); + check_resources(ctx, prog); + check_subroutine_resources(prog); + check_image_resources(ctx, prog); + link_check_atomic_counter_resources(ctx, prog); + + if (!prog->LinkStatus) + goto done; + + /* OpenGL ES requires that a vertex shader and a fragment shader both be + * present in a linked program. GL_ARB_ES2_compatibility doesn't say + * anything about shader linking when one of the shaders (vertex or + * fragment shader) is absent. So, the extension shouldn't change the + * behavior specified in GLSL specification. + */ + if (!prog->SeparateShader && ctx->API == API_OPENGLES2) { + /* With ES < 3.1 one needs to have always vertex + fragment shader. */ + if (ctx->Version < 31) { + if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) { + linker_error(prog, "program lacks a vertex shader\n"); + } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { + linker_error(prog, "program lacks a fragment shader\n"); + } + } else { + /* From OpenGL ES 3.1 specification (7.3 Program Objects): + * "Linking can fail for a variety of reasons as specified in the + * OpenGL ES Shading Language Specification, as well as any of the + * following reasons: + * + * ... + * + * * program contains objects to form either a vertex shader or + * fragment shader, and program is not separable, and does not + * contain objects to form both a vertex shader and fragment + * shader." + */ + if (!!prog->_LinkedShaders[MESA_SHADER_VERTEX] ^ + !!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) { + linker_error(prog, "Program needs to contain both vertex and " + "fragment shaders.\n"); + } + } + } + + /* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks + * for gl_shader_program and gl_shader, so that drivers that need separate + * index spaces for each set can have that. + */ + for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] != NULL) { + gl_shader *sh = prog->_LinkedShaders[i]; + split_ubos_and_ssbos(sh, + sh->BufferInterfaceBlocks, + sh->NumBufferInterfaceBlocks, + &sh->UniformBlocks, + &sh->NumUniformBlocks, + NULL, + &sh->ShaderStorageBlocks, + &sh->NumShaderStorageBlocks, + NULL); + } + } + + split_ubos_and_ssbos(prog, + prog->BufferInterfaceBlocks, + prog->NumBufferInterfaceBlocks, + &prog->UniformBlocks, + &prog->NumUniformBlocks, + &prog->UboInterfaceBlockIndex, + &prog->ShaderStorageBlocks, + &prog->NumShaderStorageBlocks, + &prog->SsboInterfaceBlockIndex); + + /* FINISHME: Assign fragment shader output locations. */ + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + if (prog->_LinkedShaders[i] == NULL) + continue; + + if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks) + lower_ubo_reference(prog->_LinkedShaders[i]); + + if (ctx->Const.ShaderCompilerOptions[i].LowerShaderSharedVariables) + lower_shared_reference(prog->_LinkedShaders[i], + &prog->Comp.SharedSize); + + lower_vector_derefs(prog->_LinkedShaders[i]); + } + +done: + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { + free(shader_list[i]); + if (prog->_LinkedShaders[i] == NULL) + continue; + + /* Do a final validation step to make sure that the IR wasn't + * invalidated by any modifications performed after intrastage linking. + */ + validate_ir_tree(prog->_LinkedShaders[i]->ir); + + /* Retain any live IR, but trash the rest. */ + reparent_ir(prog->_LinkedShaders[i]->ir, prog->_LinkedShaders[i]->ir); + + /* The symbol table in the linked shaders may contain references to + * variables that were removed (e.g., unused uniforms). Since it may + * contain junk, there is no possible valid use. Delete it and set the + * pointer to NULL. + */ + delete prog->_LinkedShaders[i]->symbols; + prog->_LinkedShaders[i]->symbols = NULL; + } + + ralloc_free(mem_ctx); +} diff --git a/src/compiler/glsl/linker.h b/src/compiler/glsl/linker.h new file mode 100644 index 00000000000..c80be1c7e22 --- /dev/null +++ b/src/compiler/glsl/linker.h @@ -0,0 +1,205 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef GLSL_LINKER_H +#define GLSL_LINKER_H + +extern bool +link_function_calls(gl_shader_program *prog, gl_shader *main, + gl_shader **shader_list, unsigned num_shaders); + +extern void +link_invalidate_variable_locations(exec_list *ir); + +extern void +link_assign_uniform_locations(struct gl_shader_program *prog, + unsigned int boolean_true); + +extern void +link_set_uniform_initializers(struct gl_shader_program *prog, + unsigned int boolean_true); + +extern int +link_cross_validate_uniform_block(void *mem_ctx, + struct gl_uniform_block **linked_blocks, + unsigned int *num_linked_blocks, + struct gl_uniform_block *new_block); + +extern bool +link_uniform_blocks_are_compatible(const gl_uniform_block *a, + const gl_uniform_block *b); + +extern unsigned +link_uniform_blocks(void *mem_ctx, + struct gl_context *ctx, + struct gl_shader_program *prog, + struct gl_shader **shader_list, + unsigned num_shaders, + struct gl_uniform_block **blocks_ret); + +bool +validate_intrastage_arrays(struct gl_shader_program *prog, + ir_variable *const var, + ir_variable *const existing); + +void +validate_intrastage_interface_blocks(struct gl_shader_program *prog, + const gl_shader **shader_list, + unsigned num_shaders); + +void +validate_interstage_inout_blocks(struct gl_shader_program *prog, + const gl_shader *producer, + const gl_shader *consumer); + +void +validate_interstage_uniform_blocks(struct gl_shader_program *prog, + gl_shader **stages, int num_stages); + +extern void +link_assign_atomic_counter_resources(struct gl_context *ctx, + struct gl_shader_program *prog); + +extern void +link_check_atomic_counter_resources(struct gl_context *ctx, + struct gl_shader_program *prog); + +/** + * Class for processing all of the leaf fields of a variable that corresponds + * to a program resource. + * + * The leaf fields are all the parts of the variable that the application + * could query using \c glGetProgramResourceIndex (or that could be returned + * by \c glGetProgramResourceName). + * + * Classes my derive from this class to implement specific functionality. + * This class only provides the mechanism to iterate over the leaves. Derived + * classes must implement \c ::visit_field and may override \c ::process. + */ +class program_resource_visitor { +public: + /** + * Begin processing a variable + * + * Classes that overload this function should call \c ::process from the + * base class to start the recursive processing of the variable. + * + * \param var The variable that is to be processed + * + * Calls \c ::visit_field for each leaf of the variable. + * + * \warning + * When processing a uniform block, this entry should only be used in cases + * where the row / column ordering of matrices in the block does not + * matter. For example, enumerating the names of members of the block, but + * not for determining the offsets of members. + */ + void process(ir_variable *var); + + /** + * Begin processing a variable of a structured type. + * + * This flavor of \c process should be used to handle structured types + * (i.e., structures, interfaces, or arrays there of) that need special + * name handling. A common usage is to handle cases where the block name + * (instead of the instance name) is used for an interface block. + * + * \param type Type that is to be processed, associated with \c name + * \param name Base name of the structured variable being processed + * + * \note + * \c type must be \c GLSL_TYPE_RECORD, \c GLSL_TYPE_INTERFACE, or an array + * there of. + */ + void process(const glsl_type *type, const char *name); + +protected: + /** + * Method invoked for each leaf of the variable + * + * \param type Type of the field. + * \param name Fully qualified name of the field. + * \param row_major For a matrix type, is it stored row-major. + * \param record_type Type of the record containing the field. + * \param last_field Set if \c name is the last field of the structure + * containing it. This will always be false for items + * not contained in a structure or interface block. + * + * The default implementation just calls the other \c visit_field method. + */ + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major, const glsl_type *record_type, + const unsigned packing, + bool last_field); + + /** + * Method invoked for each leaf of the variable + * + * \param type Type of the field. + * \param name Fully qualified name of the field. + * \param row_major For a matrix type, is it stored row-major. + */ + virtual void visit_field(const glsl_type *type, const char *name, + bool row_major) = 0; + + /** + * Visit a record before visiting its fields + * + * For structures-of-structures or interfaces-of-structures, this visits + * the inner structure before visiting its fields. + * + * The default implementation does nothing. + */ + virtual void visit_field(const glsl_struct_field *field); + + virtual void enter_record(const glsl_type *type, const char *name, + bool row_major, const unsigned packing); + + virtual void leave_record(const glsl_type *type, const char *name, + bool row_major, const unsigned packing); + + virtual void set_record_array_count(unsigned record_array_count); + +private: + /** + * \param name_length Length of the current name \b not including the + * terminating \c NUL character. + * \param last_field Set if \c name is the last field of the structure + * containing it. This will always be false for items + * not contained in a structure or interface block. + */ + void recursion(const glsl_type *t, char **name, size_t name_length, + bool row_major, const glsl_type *record_type, + const unsigned packing, + bool last_field, unsigned record_array_count); +}; + +void +linker_error(gl_shader_program *prog, const char *fmt, ...); + +void +linker_warning(gl_shader_program *prog, const char *fmt, ...); + +#endif /* GLSL_LINKER_H */ diff --git a/src/compiler/glsl/list.h b/src/compiler/glsl/list.h new file mode 100644 index 00000000000..a1c4d82b017 --- /dev/null +++ b/src/compiler/glsl/list.h @@ -0,0 +1,700 @@ +/* + * Copyright © 2008, 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file list.h + * \brief Doubly-linked list abstract container type. + * + * Each doubly-linked list has a sentinel head and tail node. These nodes + * contain no data. The head sentinel can be identified by its \c prev + * pointer being \c NULL. The tail sentinel can be identified by its + * \c next pointer being \c NULL. + * + * A list is empty if either the head sentinel's \c next pointer points to the + * tail sentinel or the tail sentinel's \c prev poiner points to the head + * sentinel. + * + * Instead of tracking two separate \c node structures and a \c list structure + * that points to them, the sentinel nodes are in a single structure. Noting + * that each sentinel node always has one \c NULL pointer, the \c NULL + * pointers occupy the same memory location. In the \c list structure + * contains a the following: + * + * - A \c head pointer that represents the \c next pointer of the + * head sentinel node. + * - A \c tail pointer that represents the \c prev pointer of the head + * sentinel node and the \c next pointer of the tail sentinel node. This + * pointer is \b always \c NULL. + * - A \c tail_prev pointer that represents the \c prev pointer of the + * tail sentinel node. + * + * Therefore, if \c head->next is \c NULL or \c tail_prev->prev is \c NULL, + * the list is empty. + * + * Do note that this means that the list nodes will contain pointers into the + * list structure itself and as a result you may not \c realloc() an \c + * exec_list or any structure in which an \c exec_list is embedded. + * + * To anyone familiar with "exec lists" on the Amiga, this structure should + * be immediately recognizable. See the following link for the original Amiga + * operating system documentation on the subject. + * + * http://www.natami.net/dev/Libraries_Manual_guide/node02D7.html + * + * \author Ian Romanick + */ + +#pragma once +#ifndef LIST_CONTAINER_H +#define LIST_CONTAINER_H + +#ifndef __cplusplus +#include +#endif +#include + +#include "util/ralloc.h" + +struct exec_node { + struct exec_node *next; + struct exec_node *prev; + +#ifdef __cplusplus + DECLARE_RALLOC_CXX_OPERATORS(exec_node) + + exec_node() : next(NULL), prev(NULL) + { + /* empty */ + } + + const exec_node *get_next() const; + exec_node *get_next(); + + const exec_node *get_prev() const; + exec_node *get_prev(); + + void remove(); + + /** + * Link a node with itself + * + * This creates a sort of degenerate list that is occasionally useful. + */ + void self_link(); + + /** + * Insert a node in the list after the current node + */ + void insert_after(exec_node *after); + /** + * Insert a node in the list before the current node + */ + void insert_before(exec_node *before); + + /** + * Insert another list in the list before the current node + */ + void insert_before(struct exec_list *before); + + /** + * Replace the current node with the given node. + */ + void replace_with(exec_node *replacement); + + /** + * Is this the sentinel at the tail of the list? + */ + bool is_tail_sentinel() const; + + /** + * Is this the sentinel at the head of the list? + */ + bool is_head_sentinel() const; +#endif +}; + +static inline void +exec_node_init(struct exec_node *n) +{ + n->next = NULL; + n->prev = NULL; +} + +static inline const struct exec_node * +exec_node_get_next_const(const struct exec_node *n) +{ + return n->next; +} + +static inline struct exec_node * +exec_node_get_next(struct exec_node *n) +{ + return n->next; +} + +static inline const struct exec_node * +exec_node_get_prev_const(const struct exec_node *n) +{ + return n->prev; +} + +static inline struct exec_node * +exec_node_get_prev(struct exec_node *n) +{ + return n->prev; +} + +static inline void +exec_node_remove(struct exec_node *n) +{ + n->next->prev = n->prev; + n->prev->next = n->next; + n->next = NULL; + n->prev = NULL; +} + +static inline void +exec_node_self_link(struct exec_node *n) +{ + n->next = n; + n->prev = n; +} + +static inline void +exec_node_insert_after(struct exec_node *n, struct exec_node *after) +{ + after->next = n->next; + after->prev = n; + + n->next->prev = after; + n->next = after; +} + +static inline void +exec_node_insert_node_before(struct exec_node *n, struct exec_node *before) +{ + before->next = n; + before->prev = n->prev; + + n->prev->next = before; + n->prev = before; +} + +static inline void +exec_node_replace_with(struct exec_node *n, struct exec_node *replacement) +{ + replacement->prev = n->prev; + replacement->next = n->next; + + n->prev->next = replacement; + n->next->prev = replacement; +} + +static inline bool +exec_node_is_tail_sentinel(const struct exec_node *n) +{ + return n->next == NULL; +} + +static inline bool +exec_node_is_head_sentinel(const struct exec_node *n) +{ + return n->prev == NULL; +} + +#ifdef __cplusplus +inline const exec_node *exec_node::get_next() const +{ + return exec_node_get_next_const(this); +} + +inline exec_node *exec_node::get_next() +{ + return exec_node_get_next(this); +} + +inline const exec_node *exec_node::get_prev() const +{ + return exec_node_get_prev_const(this); +} + +inline exec_node *exec_node::get_prev() +{ + return exec_node_get_prev(this); +} + +inline void exec_node::remove() +{ + exec_node_remove(this); +} + +inline void exec_node::self_link() +{ + exec_node_self_link(this); +} + +inline void exec_node::insert_after(exec_node *after) +{ + exec_node_insert_after(this, after); +} + +inline void exec_node::insert_before(exec_node *before) +{ + exec_node_insert_node_before(this, before); +} + +inline void exec_node::replace_with(exec_node *replacement) +{ + exec_node_replace_with(this, replacement); +} + +inline bool exec_node::is_tail_sentinel() const +{ + return exec_node_is_tail_sentinel(this); +} + +inline bool exec_node::is_head_sentinel() const +{ + return exec_node_is_head_sentinel(this); +} +#endif + +#ifdef __cplusplus +/* This macro will not work correctly if `t' uses virtual inheritance. If you + * are using virtual inheritance, you deserve a slow and painful death. Enjoy! + */ +#define exec_list_offsetof(t, f, p) \ + (((char *) &((t *) p)->f) - ((char *) p)) +#else +#define exec_list_offsetof(t, f, p) offsetof(t, f) +#endif + +/** + * Get a pointer to the structure containing an exec_node + * + * Given a pointer to an \c exec_node embedded in a structure, get a pointer to + * the containing structure. + * + * \param type Base type of the structure containing the node + * \param node Pointer to the \c exec_node + * \param field Name of the field in \c type that is the embedded \c exec_node + */ +#define exec_node_data(type, node, field) \ + ((type *) (((char *) node) - exec_list_offsetof(type, field, node))) + +#ifdef __cplusplus +struct exec_node; +#endif + +struct exec_list { + struct exec_node *head; + struct exec_node *tail; + struct exec_node *tail_pred; + +#ifdef __cplusplus + DECLARE_RALLOC_CXX_OPERATORS(exec_list) + + exec_list() + { + make_empty(); + } + + void make_empty(); + + bool is_empty() const; + + const exec_node *get_head() const; + exec_node *get_head(); + + const exec_node *get_tail() const; + exec_node *get_tail(); + + unsigned length() const; + + void push_head(exec_node *n); + void push_tail(exec_node *n); + void push_degenerate_list_at_head(exec_node *n); + + /** + * Remove the first node from a list and return it + * + * \return + * The first node in the list or \c NULL if the list is empty. + * + * \sa exec_list::get_head + */ + exec_node *pop_head(); + + /** + * Move all of the nodes from this list to the target list + */ + void move_nodes_to(exec_list *target); + + /** + * Append all nodes from the source list to the end of the target list + */ + void append_list(exec_list *source); + + /** + * Prepend all nodes from the source list to the beginning of the target + * list + */ + void prepend_list(exec_list *source); +#endif +}; + +static inline void +exec_list_make_empty(struct exec_list *list) +{ + list->head = (struct exec_node *) & list->tail; + list->tail = NULL; + list->tail_pred = (struct exec_node *) & list->head; +} + +static inline bool +exec_list_is_empty(const struct exec_list *list) +{ + /* There are three ways to test whether a list is empty or not. + * + * - Check to see if the \c head points to the \c tail. + * - Check to see if the \c tail_pred points to the \c head. + * - Check to see if the \c head is the sentinel node by test whether its + * \c next pointer is \c NULL. + * + * The first two methods tend to generate better code on modern systems + * because they save a pointer dereference. + */ + return list->head == (struct exec_node *) &list->tail; +} + +static inline const struct exec_node * +exec_list_get_head_const(const struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->head : NULL; +} + +static inline struct exec_node * +exec_list_get_head(struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->head : NULL; +} + +static inline const struct exec_node * +exec_list_get_tail_const(const struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->tail_pred : NULL; +} + +static inline struct exec_node * +exec_list_get_tail(struct exec_list *list) +{ + return !exec_list_is_empty(list) ? list->tail_pred : NULL; +} + +static inline unsigned +exec_list_length(const struct exec_list *list) +{ + unsigned size = 0; + struct exec_node *node; + + for (node = list->head; node->next != NULL; node = node->next) { + size++; + } + + return size; +} + +static inline void +exec_list_push_head(struct exec_list *list, struct exec_node *n) +{ + n->next = list->head; + n->prev = (struct exec_node *) &list->head; + + n->next->prev = n; + list->head = n; +} + +static inline void +exec_list_push_tail(struct exec_list *list, struct exec_node *n) +{ + n->next = (struct exec_node *) &list->tail; + n->prev = list->tail_pred; + + n->prev->next = n; + list->tail_pred = n; +} + +static inline void +exec_list_push_degenerate_list_at_head(struct exec_list *list, struct exec_node *n) +{ + assert(n->prev->next == n); + + n->prev->next = list->head; + list->head->prev = n->prev; + n->prev = (struct exec_node *) &list->head; + list->head = n; +} + +static inline struct exec_node * +exec_list_pop_head(struct exec_list *list) +{ + struct exec_node *const n = exec_list_get_head(list); + if (n != NULL) + exec_node_remove(n); + + return n; +} + +static inline void +exec_list_move_nodes_to(struct exec_list *list, struct exec_list *target) +{ + if (exec_list_is_empty(list)) { + exec_list_make_empty(target); + } else { + target->head = list->head; + target->tail = NULL; + target->tail_pred = list->tail_pred; + + target->head->prev = (struct exec_node *) &target->head; + target->tail_pred->next = (struct exec_node *) &target->tail; + + exec_list_make_empty(list); + } +} + +static inline void +exec_list_append(struct exec_list *list, struct exec_list *source) +{ + if (exec_list_is_empty(source)) + return; + + /* Link the first node of the source with the last node of the target list. + */ + list->tail_pred->next = source->head; + source->head->prev = list->tail_pred; + + /* Make the tail of the source list be the tail of the target list. + */ + list->tail_pred = source->tail_pred; + list->tail_pred->next = (struct exec_node *) &list->tail; + + /* Make the source list empty for good measure. + */ + exec_list_make_empty(source); +} + +static inline void +exec_list_prepend(struct exec_list *list, struct exec_list *source) +{ + exec_list_append(source, list); + exec_list_move_nodes_to(source, list); +} + +static inline void +exec_node_insert_list_before(struct exec_node *n, struct exec_list *before) +{ + if (exec_list_is_empty(before)) + return; + + before->tail_pred->next = n; + before->head->prev = n->prev; + + n->prev->next = before->head; + n->prev = before->tail_pred; + + exec_list_make_empty(before); +} + +static inline void +exec_list_validate(const struct exec_list *list) +{ + const struct exec_node *node; + + assert(list->head->prev == (const struct exec_node *) &list->head); + assert(list->tail == NULL); + assert(list->tail_pred->next == (const struct exec_node *) &list->tail); + + /* We could try to use one of the interators below for this but they all + * either require C++ or assume the exec_node is embedded in a structure + * which is not the case for this function. + */ + for (node = list->head; node->next != NULL; node = node->next) { + assert(node->next->prev == node); + assert(node->prev->next == node); + } +} + +#ifdef __cplusplus +inline void exec_list::make_empty() +{ + exec_list_make_empty(this); +} + +inline bool exec_list::is_empty() const +{ + return exec_list_is_empty(this); +} + +inline const exec_node *exec_list::get_head() const +{ + return exec_list_get_head_const(this); +} + +inline exec_node *exec_list::get_head() +{ + return exec_list_get_head(this); +} + +inline const exec_node *exec_list::get_tail() const +{ + return exec_list_get_tail_const(this); +} + +inline exec_node *exec_list::get_tail() +{ + return exec_list_get_tail(this); +} + +inline unsigned exec_list::length() const +{ + return exec_list_length(this); +} + +inline void exec_list::push_head(exec_node *n) +{ + exec_list_push_head(this, n); +} + +inline void exec_list::push_tail(exec_node *n) +{ + exec_list_push_tail(this, n); +} + +inline void exec_list::push_degenerate_list_at_head(exec_node *n) +{ + exec_list_push_degenerate_list_at_head(this, n); +} + +inline exec_node *exec_list::pop_head() +{ + return exec_list_pop_head(this); +} + +inline void exec_list::move_nodes_to(exec_list *target) +{ + exec_list_move_nodes_to(this, target); +} + +inline void exec_list::append_list(exec_list *source) +{ + exec_list_append(this, source); +} + +inline void exec_list::prepend_list(exec_list *source) +{ + exec_list_prepend(this, source); +} + +inline void exec_node::insert_before(exec_list *before) +{ + exec_node_insert_list_before(this, before); +} +#endif + +#define foreach_in_list(__type, __inst, __list) \ + for (__type *(__inst) = (__type *)(__list)->head; \ + !(__inst)->is_tail_sentinel(); \ + (__inst) = (__type *)(__inst)->next) + +#define foreach_in_list_reverse(__type, __inst, __list) \ + for (__type *(__inst) = (__type *)(__list)->tail_pred; \ + !(__inst)->is_head_sentinel(); \ + (__inst) = (__type *)(__inst)->prev) + +/** + * This version is safe even if the current node is removed. + */ +#define foreach_in_list_safe(__type, __node, __list) \ + for (__type *__node = (__type *)(__list)->head, \ + *__next = (__type *)__node->next; \ + __next != NULL; \ + __node = __next, __next = (__type *)__next->next) + +#define foreach_in_list_reverse_safe(__type, __node, __list) \ + for (__type *__node = (__type *)(__list)->tail_pred, \ + *__prev = (__type *)__node->prev; \ + __prev != NULL; \ + __node = __prev, __prev = (__type *)__prev->prev) + +#define foreach_in_list_use_after(__type, __inst, __list) \ + __type *(__inst); \ + for ((__inst) = (__type *)(__list)->head; \ + !(__inst)->is_tail_sentinel(); \ + (__inst) = (__type *)(__inst)->next) +/** + * Iterate through two lists at once. Stops at the end of the shorter list. + * + * This is safe against either current node being removed or replaced. + */ +#define foreach_two_lists(__node1, __list1, __node2, __list2) \ + for (struct exec_node * __node1 = (__list1)->head, \ + * __node2 = (__list2)->head, \ + * __next1 = __node1->next, \ + * __next2 = __node2->next \ + ; __next1 != NULL && __next2 != NULL \ + ; __node1 = __next1, \ + __node2 = __next2, \ + __next1 = __next1->next, \ + __next2 = __next2->next) + +#define foreach_list_typed(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->head, __field); \ + (__node)->__field.next != NULL; \ + (__node) = exec_node_data(__type, (__node)->__field.next, __field)) + +#define foreach_list_typed_reverse(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->tail_pred, __field); \ + (__node)->__field.prev != NULL; \ + (__node) = exec_node_data(__type, (__node)->__field.prev, __field)) + +#define foreach_list_typed_safe(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->head, __field), \ + * __next = \ + exec_node_data(__type, (__node)->__field.next, __field); \ + (__node)->__field.next != NULL; \ + __node = __next, __next = \ + exec_node_data(__type, (__next)->__field.next, __field)) + +#define foreach_list_typed_reverse_safe(__type, __node, __field, __list) \ + for (__type * __node = \ + exec_node_data(__type, (__list)->tail_pred, __field), \ + * __prev = \ + exec_node_data(__type, (__node)->__field.prev, __field); \ + (__node)->__field.prev != NULL; \ + __node = __prev, __prev = \ + exec_node_data(__type, (__prev)->__field.prev, __field)) + +#endif /* LIST_CONTAINER_H */ diff --git a/src/compiler/glsl/loop_analysis.cpp b/src/compiler/glsl/loop_analysis.cpp new file mode 100644 index 00000000000..096a80abb34 --- /dev/null +++ b/src/compiler/glsl/loop_analysis.cpp @@ -0,0 +1,640 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "compiler/glsl_types.h" +#include "loop_analysis.h" +#include "ir_hierarchical_visitor.h" + +static bool is_loop_terminator(ir_if *ir); + +static bool all_expression_operands_are_loop_constant(ir_rvalue *, + hash_table *); + +static ir_rvalue *get_basic_induction_increment(ir_assignment *, hash_table *); + + +/** + * Record the fact that the given loop variable was referenced inside the loop. + * + * \arg in_assignee is true if the reference was on the LHS of an assignment. + * + * \arg in_conditional_code_or_nested_loop is true if the reference occurred + * inside an if statement or a nested loop. + * + * \arg current_assignment is the ir_assignment node that the loop variable is + * on the LHS of, if any (ignored if \c in_assignee is false). + */ +void +loop_variable::record_reference(bool in_assignee, + bool in_conditional_code_or_nested_loop, + ir_assignment *current_assignment) +{ + if (in_assignee) { + assert(current_assignment != NULL); + + if (in_conditional_code_or_nested_loop || + current_assignment->condition != NULL) { + this->conditional_or_nested_assignment = true; + } + + if (this->first_assignment == NULL) { + assert(this->num_assignments == 0); + + this->first_assignment = current_assignment; + } + + this->num_assignments++; + } else if (this->first_assignment == current_assignment) { + /* This catches the case where the variable is used in the RHS of an + * assignment where it is also in the LHS. + */ + this->read_before_write = true; + } +} + + +loop_state::loop_state() +{ + this->ht = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + this->mem_ctx = ralloc_context(NULL); + this->loop_found = false; +} + + +loop_state::~loop_state() +{ + hash_table_dtor(this->ht); + ralloc_free(this->mem_ctx); +} + + +loop_variable_state * +loop_state::insert(ir_loop *ir) +{ + loop_variable_state *ls = new(this->mem_ctx) loop_variable_state; + + hash_table_insert(this->ht, ls, ir); + this->loop_found = true; + + return ls; +} + + +loop_variable_state * +loop_state::get(const ir_loop *ir) +{ + return (loop_variable_state *) hash_table_find(this->ht, ir); +} + + +loop_variable * +loop_variable_state::get(const ir_variable *ir) +{ + return (loop_variable *) hash_table_find(this->var_hash, ir); +} + + +loop_variable * +loop_variable_state::insert(ir_variable *var) +{ + void *mem_ctx = ralloc_parent(this); + loop_variable *lv = rzalloc(mem_ctx, loop_variable); + + lv->var = var; + + hash_table_insert(this->var_hash, lv, lv->var); + this->variables.push_tail(lv); + + return lv; +} + + +loop_terminator * +loop_variable_state::insert(ir_if *if_stmt) +{ + void *mem_ctx = ralloc_parent(this); + loop_terminator *t = new(mem_ctx) loop_terminator(); + + t->ir = if_stmt; + this->terminators.push_tail(t); + + return t; +} + + +/** + * If the given variable already is recorded in the state for this loop, + * return the corresponding loop_variable object that records information + * about it. + * + * Otherwise, create a new loop_variable object to record information about + * the variable, and set its \c read_before_write field appropriately based on + * \c in_assignee. + * + * \arg in_assignee is true if this variable was encountered on the LHS of an + * assignment. + */ +loop_variable * +loop_variable_state::get_or_insert(ir_variable *var, bool in_assignee) +{ + loop_variable *lv = this->get(var); + + if (lv == NULL) { + lv = this->insert(var); + lv->read_before_write = !in_assignee; + } + + return lv; +} + + +namespace { + +class loop_analysis : public ir_hierarchical_visitor { +public: + loop_analysis(loop_state *loops); + + virtual ir_visitor_status visit(ir_loop_jump *); + virtual ir_visitor_status visit(ir_dereference_variable *); + + virtual ir_visitor_status visit_enter(ir_call *); + + virtual ir_visitor_status visit_enter(ir_loop *); + virtual ir_visitor_status visit_leave(ir_loop *); + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_leave(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_if *); + virtual ir_visitor_status visit_leave(ir_if *); + + loop_state *loops; + + int if_statement_depth; + + ir_assignment *current_assignment; + + exec_list state; +}; + +} /* anonymous namespace */ + +loop_analysis::loop_analysis(loop_state *loops) + : loops(loops), if_statement_depth(0), current_assignment(NULL) +{ + /* empty */ +} + + +ir_visitor_status +loop_analysis::visit(ir_loop_jump *ir) +{ + (void) ir; + + assert(!this->state.is_empty()); + + loop_variable_state *const ls = + (loop_variable_state *) this->state.get_head(); + + ls->num_loop_jumps++; + + return visit_continue; +} + + +ir_visitor_status +loop_analysis::visit_enter(ir_call *) +{ + /* Mark every loop that we're currently analyzing as containing an ir_call + * (even those at outer nesting levels). + */ + foreach_in_list(loop_variable_state, ls, &this->state) { + ls->contains_calls = true; + } + + return visit_continue_with_parent; +} + + +ir_visitor_status +loop_analysis::visit(ir_dereference_variable *ir) +{ + /* If we're not somewhere inside a loop, there's nothing to do. + */ + if (this->state.is_empty()) + return visit_continue; + + bool nested = false; + + foreach_in_list(loop_variable_state, ls, &this->state) { + ir_variable *var = ir->variable_referenced(); + loop_variable *lv = ls->get_or_insert(var, this->in_assignee); + + lv->record_reference(this->in_assignee, + nested || this->if_statement_depth > 0, + this->current_assignment); + nested = true; + } + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_enter(ir_loop *ir) +{ + loop_variable_state *ls = this->loops->insert(ir); + this->state.push_head(ls); + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_leave(ir_loop *ir) +{ + loop_variable_state *const ls = + (loop_variable_state *) this->state.pop_head(); + + /* Function calls may contain side effects. These could alter any of our + * variables in ways that cannot be known, and may even terminate shader + * execution (say, calling discard in the fragment shader). So we can't + * rely on any of our analysis about assignments to variables. + * + * We could perform some conservative analysis (prove there's no statically + * possible assignment, etc.) but it isn't worth it for now; function + * inlining will allow us to unroll loops anyway. + */ + if (ls->contains_calls) + return visit_continue; + + foreach_in_list(ir_instruction, node, &ir->body_instructions) { + /* Skip over declarations at the start of a loop. + */ + if (node->as_variable()) + continue; + + ir_if *if_stmt = ((ir_instruction *) node)->as_if(); + + if ((if_stmt != NULL) && is_loop_terminator(if_stmt)) + ls->insert(if_stmt); + else + break; + } + + + foreach_in_list_safe(loop_variable, lv, &ls->variables) { + /* Move variables that are already marked as being loop constant to + * a separate list. These trivially don't need to be tested. + */ + if (lv->is_loop_constant()) { + lv->remove(); + ls->constants.push_tail(lv); + } + } + + /* Each variable assigned in the loop that isn't already marked as being loop + * constant might still be loop constant. The requirements at this point + * are: + * + * - Variable is written before it is read. + * + * - Only one assignment to the variable. + * + * - All operands on the RHS of the assignment are also loop constants. + * + * The last requirement is the reason for the progress loop. A variable + * marked as a loop constant on one pass may allow other variables to be + * marked as loop constant on following passes. + */ + bool progress; + do { + progress = false; + + foreach_in_list_safe(loop_variable, lv, &ls->variables) { + if (lv->conditional_or_nested_assignment || (lv->num_assignments > 1)) + continue; + + /* Process the RHS of the assignment. If all of the variables + * accessed there are loop constants, then add this + */ + ir_rvalue *const rhs = lv->first_assignment->rhs; + if (all_expression_operands_are_loop_constant(rhs, ls->var_hash)) { + lv->rhs_clean = true; + + if (lv->is_loop_constant()) { + progress = true; + + lv->remove(); + ls->constants.push_tail(lv); + } + } + } + } while (progress); + + /* The remaining variables that are not loop invariant might be loop + * induction variables. + */ + foreach_in_list_safe(loop_variable, lv, &ls->variables) { + /* If there is more than one assignment to a variable, it cannot be a + * loop induction variable. This isn't strictly true, but this is a + * very simple induction variable detector, and it can't handle more + * complex cases. + */ + if (lv->num_assignments > 1) + continue; + + /* All of the variables with zero assignments in the loop are loop + * invariant, and they should have already been filtered out. + */ + assert(lv->num_assignments == 1); + assert(lv->first_assignment != NULL); + + /* The assignment to the variable in the loop must be unconditional and + * not inside a nested loop. + */ + if (lv->conditional_or_nested_assignment) + continue; + + /* Basic loop induction variables have a single assignment in the loop + * that has the form 'VAR = VAR + i' or 'VAR = VAR - i' where i is a + * loop invariant. + */ + ir_rvalue *const inc = + get_basic_induction_increment(lv->first_assignment, ls->var_hash); + if (inc != NULL) { + lv->increment = inc; + + lv->remove(); + ls->induction_variables.push_tail(lv); + } + } + + /* Search the loop terminating conditions for those of the form 'i < c' + * where i is a loop induction variable, c is a constant, and < is any + * relative operator. From each of these we can infer an iteration count. + * Also figure out which terminator (if any) produces the smallest + * iteration count--this is the limiting terminator. + */ + foreach_in_list(loop_terminator, t, &ls->terminators) { + ir_if *if_stmt = t->ir; + + /* If-statements can be either 'if (expr)' or 'if (deref)'. We only care + * about the former here. + */ + ir_expression *cond = if_stmt->condition->as_expression(); + if (cond == NULL) + continue; + + switch (cond->operation) { + case ir_binop_less: + case ir_binop_greater: + case ir_binop_lequal: + case ir_binop_gequal: { + /* The expressions that we care about will either be of the form + * 'counter < limit' or 'limit < counter'. Figure out which is + * which. + */ + ir_rvalue *counter = cond->operands[0]->as_dereference_variable(); + ir_constant *limit = cond->operands[1]->as_constant(); + enum ir_expression_operation cmp = cond->operation; + + if (limit == NULL) { + counter = cond->operands[1]->as_dereference_variable(); + limit = cond->operands[0]->as_constant(); + + switch (cmp) { + case ir_binop_less: cmp = ir_binop_greater; break; + case ir_binop_greater: cmp = ir_binop_less; break; + case ir_binop_lequal: cmp = ir_binop_gequal; break; + case ir_binop_gequal: cmp = ir_binop_lequal; break; + default: assert(!"Should not get here."); + } + } + + if ((counter == NULL) || (limit == NULL)) + break; + + ir_variable *var = counter->variable_referenced(); + + ir_rvalue *init = find_initial_value(ir, var); + + loop_variable *lv = ls->get(var); + if (lv != NULL && lv->is_induction_var()) { + t->iterations = calculate_iterations(init, limit, lv->increment, + cmp); + + if (t->iterations >= 0 && + (ls->limiting_terminator == NULL || + t->iterations < ls->limiting_terminator->iterations)) { + ls->limiting_terminator = t; + } + } + break; + } + + default: + break; + } + } + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_enter(ir_if *ir) +{ + (void) ir; + + if (!this->state.is_empty()) + this->if_statement_depth++; + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_leave(ir_if *ir) +{ + (void) ir; + + if (!this->state.is_empty()) + this->if_statement_depth--; + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_enter(ir_assignment *ir) +{ + /* If we're not somewhere inside a loop, there's nothing to do. + */ + if (this->state.is_empty()) + return visit_continue_with_parent; + + this->current_assignment = ir; + + return visit_continue; +} + +ir_visitor_status +loop_analysis::visit_leave(ir_assignment *ir) +{ + /* Since the visit_enter exits with visit_continue_with_parent for this + * case, the loop state stack should never be empty here. + */ + assert(!this->state.is_empty()); + + assert(this->current_assignment == ir); + this->current_assignment = NULL; + + return visit_continue; +} + + +class examine_rhs : public ir_hierarchical_visitor { +public: + examine_rhs(hash_table *loop_variables) + { + this->only_uses_loop_constants = true; + this->loop_variables = loop_variables; + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + loop_variable *lv = + (loop_variable *) hash_table_find(this->loop_variables, ir->var); + + assert(lv != NULL); + + if (lv->is_loop_constant()) { + return visit_continue; + } else { + this->only_uses_loop_constants = false; + return visit_stop; + } + } + + hash_table *loop_variables; + bool only_uses_loop_constants; +}; + + +bool +all_expression_operands_are_loop_constant(ir_rvalue *ir, hash_table *variables) +{ + examine_rhs v(variables); + + ir->accept(&v); + + return v.only_uses_loop_constants; +} + + +ir_rvalue * +get_basic_induction_increment(ir_assignment *ir, hash_table *var_hash) +{ + /* The RHS must be a binary expression. + */ + ir_expression *const rhs = ir->rhs->as_expression(); + if ((rhs == NULL) + || ((rhs->operation != ir_binop_add) + && (rhs->operation != ir_binop_sub))) + return NULL; + + /* One of the of operands of the expression must be the variable assigned. + * If the operation is subtraction, the variable in question must be the + * "left" operand. + */ + ir_variable *const var = ir->lhs->variable_referenced(); + + ir_variable *const op0 = rhs->operands[0]->variable_referenced(); + ir_variable *const op1 = rhs->operands[1]->variable_referenced(); + + if (((op0 != var) && (op1 != var)) + || ((op1 == var) && (rhs->operation == ir_binop_sub))) + return NULL; + + ir_rvalue *inc = (op0 == var) ? rhs->operands[1] : rhs->operands[0]; + + if (inc->as_constant() == NULL) { + ir_variable *const inc_var = inc->variable_referenced(); + if (inc_var != NULL) { + loop_variable *lv = + (loop_variable *) hash_table_find(var_hash, inc_var); + + if (lv == NULL || !lv->is_loop_constant()) { + assert(lv != NULL); + inc = NULL; + } + } else + inc = NULL; + } + + if ((inc != NULL) && (rhs->operation == ir_binop_sub)) { + void *mem_ctx = ralloc_parent(ir); + + inc = new(mem_ctx) ir_expression(ir_unop_neg, + inc->type, + inc->clone(mem_ctx, NULL), + NULL); + } + + return inc; +} + + +/** + * Detect whether an if-statement is a loop terminating condition + * + * Detects if-statements of the form + * + * (if (expression bool ...) (break)) + */ +bool +is_loop_terminator(ir_if *ir) +{ + if (!ir->else_instructions.is_empty()) + return false; + + ir_instruction *const inst = + (ir_instruction *) ir->then_instructions.get_head(); + if (inst == NULL) + return false; + + if (inst->ir_type != ir_type_loop_jump) + return false; + + ir_loop_jump *const jump = (ir_loop_jump *) inst; + if (jump->mode != ir_loop_jump::jump_break) + return false; + + return true; +} + + +loop_state * +analyze_loop_variables(exec_list *instructions) +{ + loop_state *loops = new loop_state; + loop_analysis v(loops); + + v.run(instructions); + return v.loops; +} diff --git a/src/compiler/glsl/loop_analysis.h b/src/compiler/glsl/loop_analysis.h new file mode 100644 index 00000000000..3b1971d7edc --- /dev/null +++ b/src/compiler/glsl/loop_analysis.h @@ -0,0 +1,259 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef LOOP_ANALYSIS_H +#define LOOP_ANALYSIS_H + +#include "ir.h" +#include "program/hash_table.h" + +/** + * Analyze and classify all variables used in all loops in the instruction list + */ +extern class loop_state * +analyze_loop_variables(exec_list *instructions); + + +/** + * Fill in loop control fields + * + * Based on analysis of loop variables, this function tries to remove + * redundant sequences in the loop of the form + * + * (if (expression bool ...) (break)) + * + * For example, if it is provable that one loop exit condition will + * always be satisfied before another, the unnecessary exit condition will be + * removed. + */ +extern bool +set_loop_controls(exec_list *instructions, loop_state *ls); + + +extern bool +unroll_loops(exec_list *instructions, loop_state *ls, + const struct gl_shader_compiler_options *options); + +ir_rvalue * +find_initial_value(ir_loop *loop, ir_variable *var); + +int +calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, + enum ir_expression_operation op); + + +/** + * Tracking for all variables used in a loop + */ +class loop_variable_state : public exec_node { +public: + class loop_variable *get(const ir_variable *); + class loop_variable *insert(ir_variable *); + class loop_variable *get_or_insert(ir_variable *, bool in_assignee); + class loop_terminator *insert(ir_if *); + + + /** + * Variables that have not yet been classified + */ + exec_list variables; + + /** + * Variables whose values are constant within the body of the loop + * + * This list contains \c loop_variable objects. + */ + exec_list constants; + + /** + * Induction variables for this loop + * + * This list contains \c loop_variable objects. + */ + exec_list induction_variables; + + /** + * Simple if-statements that lead to the termination of the loop + * + * This list contains \c loop_terminator objects. + * + * \sa is_loop_terminator + */ + exec_list terminators; + + /** + * If any of the terminators in \c terminators leads to termination of the + * loop after a constant number of iterations, this is the terminator that + * leads to termination after the smallest number of iterations. Otherwise + * NULL. + */ + loop_terminator *limiting_terminator; + + /** + * Hash table containing all variables accessed in this loop + */ + hash_table *var_hash; + + /** + * Number of ir_loop_jump instructions that operate on this loop + */ + unsigned num_loop_jumps; + + /** + * Whether this loop contains any function calls. + */ + bool contains_calls; + + loop_variable_state() + { + this->num_loop_jumps = 0; + this->contains_calls = false; + this->var_hash = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + this->limiting_terminator = NULL; + } + + ~loop_variable_state() + { + hash_table_dtor(this->var_hash); + } + + DECLARE_RALLOC_CXX_OPERATORS(loop_variable_state) +}; + + +class loop_variable : public exec_node { +public: + /** The variable in question. */ + ir_variable *var; + + /** Is the variable read in the loop before it is written? */ + bool read_before_write; + + /** Are all variables in the RHS of the assignment loop constants? */ + bool rhs_clean; + + /** + * Is there an assignment to the variable that is conditional, or inside a + * nested loop? + */ + bool conditional_or_nested_assignment; + + /** Reference to the first assignment to the variable in the loop body. */ + ir_assignment *first_assignment; + + /** Number of assignments to the variable in the loop body. */ + unsigned num_assignments; + + /** + * Increment value for a loop induction variable + * + * If this is a loop induction variable, the amount by which the variable + * is incremented on each iteration through the loop. + * + * If this is not a loop induction variable, NULL. + */ + ir_rvalue *increment; + + + inline bool is_induction_var() const + { + /* Induction variables always have a non-null increment, and vice + * versa. + */ + return this->increment != NULL; + } + + + inline bool is_loop_constant() const + { + const bool is_const = (this->num_assignments == 0) + || (((this->num_assignments == 1) + && !this->conditional_or_nested_assignment + && !this->read_before_write + && this->rhs_clean) || this->var->data.read_only); + + /* If the RHS of *the* assignment is clean, then there must be exactly + * one assignment of the variable. + */ + assert((this->rhs_clean && (this->num_assignments == 1)) + || !this->rhs_clean); + + return is_const; + } + + void record_reference(bool in_assignee, + bool in_conditional_code_or_nested_loop, + ir_assignment *current_assignment); +}; + + +class loop_terminator : public exec_node { +public: + loop_terminator() + : ir(NULL), iterations(-1) + { + } + + /** + * Statement which terminates the loop. + */ + ir_if *ir; + + /** + * The number of iterations after which the terminator is known to + * terminate the loop (if that is a fixed value). Otherwise -1. + */ + int iterations; +}; + + +class loop_state { +public: + ~loop_state(); + + /** + * Get the loop variable state data for a particular loop + */ + loop_variable_state *get(const ir_loop *); + + loop_variable_state *insert(ir_loop *ir); + + bool loop_found; + +private: + loop_state(); + + /** + * Hash table containing all loops that have been analyzed. + */ + hash_table *ht; + + void *mem_ctx; + + friend loop_state *analyze_loop_variables(exec_list *instructions); +}; + +#endif /* LOOP_ANALYSIS_H */ diff --git a/src/compiler/glsl/loop_controls.cpp b/src/compiler/glsl/loop_controls.cpp new file mode 100644 index 00000000000..c717605ec74 --- /dev/null +++ b/src/compiler/glsl/loop_controls.cpp @@ -0,0 +1,246 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include "main/compiler.h" +#include "compiler/glsl_types.h" +#include "loop_analysis.h" +#include "ir_hierarchical_visitor.h" + +/** + * Find an initializer of a variable outside a loop + * + * Works backwards from the loop to find the pre-loop value of the variable. + * This is used, for example, to find the initial value of loop induction + * variables. + * + * \param loop Loop where \c var is an induction variable + * \param var Variable whose initializer is to be found + * + * \return + * The \c ir_rvalue assigned to the variable outside the loop. May return + * \c NULL if no initializer can be found. + */ +ir_rvalue * +find_initial_value(ir_loop *loop, ir_variable *var) +{ + for (exec_node *node = loop->prev; + !node->is_head_sentinel(); + node = node->prev) { + ir_instruction *ir = (ir_instruction *) node; + + switch (ir->ir_type) { + case ir_type_call: + case ir_type_loop: + case ir_type_loop_jump: + case ir_type_return: + case ir_type_if: + return NULL; + + case ir_type_function: + case ir_type_function_signature: + assert(!"Should not get here."); + return NULL; + + case ir_type_assignment: { + ir_assignment *assign = ir->as_assignment(); + ir_variable *assignee = assign->lhs->whole_variable_referenced(); + + if (assignee == var) + return (assign->condition != NULL) ? NULL : assign->rhs; + + break; + } + + default: + break; + } + } + + return NULL; +} + + +int +calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, + enum ir_expression_operation op) +{ + if (from == NULL || to == NULL || increment == NULL) + return -1; + + void *mem_ctx = ralloc_context(NULL); + + ir_expression *const sub = + new(mem_ctx) ir_expression(ir_binop_sub, from->type, to, from); + + ir_expression *const div = + new(mem_ctx) ir_expression(ir_binop_div, sub->type, sub, increment); + + ir_constant *iter = div->constant_expression_value(); + + if (iter == NULL) + return -1; + + if (!iter->type->is_integer()) { + const ir_expression_operation op = iter->type->is_double() + ? ir_unop_d2i : ir_unop_f2i; + ir_rvalue *cast = + new(mem_ctx) ir_expression(op, glsl_type::int_type, iter, NULL); + + iter = cast->constant_expression_value(); + } + + int iter_value = iter->get_int_component(0); + + /* Make sure that the calculated number of iterations satisfies the exit + * condition. This is needed to catch off-by-one errors and some types of + * ill-formed loops. For example, we need to detect that the following + * loop does not have a maximum iteration count. + * + * for (float x = 0.0; x != 0.9; x += 0.2) + * ; + */ + const int bias[] = { -1, 0, 1 }; + bool valid_loop = false; + + for (unsigned i = 0; i < ARRAY_SIZE(bias); i++) { + /* Increment may be of type int, uint or float. */ + switch (increment->type->base_type) { + case GLSL_TYPE_INT: + iter = new(mem_ctx) ir_constant(iter_value + bias[i]); + break; + case GLSL_TYPE_UINT: + iter = new(mem_ctx) ir_constant(unsigned(iter_value + bias[i])); + break; + case GLSL_TYPE_FLOAT: + iter = new(mem_ctx) ir_constant(float(iter_value + bias[i])); + break; + case GLSL_TYPE_DOUBLE: + iter = new(mem_ctx) ir_constant(double(iter_value + bias[i])); + break; + default: + unreachable("Unsupported type for loop iterator."); + } + + ir_expression *const mul = + new(mem_ctx) ir_expression(ir_binop_mul, increment->type, iter, + increment); + + ir_expression *const add = + new(mem_ctx) ir_expression(ir_binop_add, mul->type, mul, from); + + ir_expression *const cmp = + new(mem_ctx) ir_expression(op, glsl_type::bool_type, add, to); + + ir_constant *const cmp_result = cmp->constant_expression_value(); + + assert(cmp_result != NULL); + if (cmp_result->get_bool_component(0)) { + iter_value += bias[i]; + valid_loop = true; + break; + } + } + + ralloc_free(mem_ctx); + return (valid_loop) ? iter_value : -1; +} + +namespace { + +class loop_control_visitor : public ir_hierarchical_visitor { +public: + loop_control_visitor(loop_state *state) + { + this->state = state; + this->progress = false; + } + + virtual ir_visitor_status visit_leave(ir_loop *ir); + + loop_state *state; + + bool progress; +}; + +} /* anonymous namespace */ + +ir_visitor_status +loop_control_visitor::visit_leave(ir_loop *ir) +{ + loop_variable_state *const ls = this->state->get(ir); + + /* If we've entered a loop that hasn't been analyzed, something really, + * really bad has happened. + */ + if (ls == NULL) { + assert(ls != NULL); + return visit_continue; + } + + if (ls->limiting_terminator != NULL) { + /* If the limiting terminator has an iteration count of zero, then we've + * proven that the loop cannot run, so delete it. + */ + int iterations = ls->limiting_terminator->iterations; + if (iterations == 0) { + ir->remove(); + this->progress = true; + return visit_continue; + } + } + + /* Remove the conditional break statements associated with all terminators + * that are associated with a fixed iteration count, except for the one + * associated with the limiting terminator--that one needs to stay, since + * it terminates the loop. Exception: if the loop still has a normative + * bound, then that terminates the loop, so we don't even need the limiting + * terminator. + */ + foreach_in_list(loop_terminator, t, &ls->terminators) { + if (t->iterations < 0) + continue; + + if (t != ls->limiting_terminator) { + t->ir->remove(); + + assert(ls->num_loop_jumps > 0); + ls->num_loop_jumps--; + + this->progress = true; + } + } + + return visit_continue; +} + + +bool +set_loop_controls(exec_list *instructions, loop_state *ls) +{ + loop_control_visitor v(ls); + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/loop_unroll.cpp b/src/compiler/glsl/loop_unroll.cpp new file mode 100644 index 00000000000..aea2743cdb1 --- /dev/null +++ b/src/compiler/glsl/loop_unroll.cpp @@ -0,0 +1,432 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "compiler/glsl_types.h" +#include "loop_analysis.h" +#include "ir_hierarchical_visitor.h" + +#include "main/mtypes.h" + +namespace { + +class loop_unroll_visitor : public ir_hierarchical_visitor { +public: + loop_unroll_visitor(loop_state *state, + const struct gl_shader_compiler_options *options) + { + this->state = state; + this->progress = false; + this->options = options; + } + + virtual ir_visitor_status visit_leave(ir_loop *ir); + void simple_unroll(ir_loop *ir, int iterations); + void complex_unroll(ir_loop *ir, int iterations, + bool continue_from_then_branch); + void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest); + + loop_state *state; + + bool progress; + const struct gl_shader_compiler_options *options; +}; + +} /* anonymous namespace */ + +static bool +is_break(ir_instruction *ir) +{ + return ir != NULL && ir->ir_type == ir_type_loop_jump + && ((ir_loop_jump *) ir)->is_break(); +} + +class loop_unroll_count : public ir_hierarchical_visitor { +public: + int nodes; + bool unsupported_variable_indexing; + bool array_indexed_by_induction_var_with_exact_iterations; + /* If there are nested loops, the node count will be inaccurate. */ + bool nested_loop; + + loop_unroll_count(exec_list *list, loop_variable_state *ls, + const struct gl_shader_compiler_options *options) + : ls(ls), options(options) + { + nodes = 0; + nested_loop = false; + unsupported_variable_indexing = false; + array_indexed_by_induction_var_with_exact_iterations = false; + + run(list); + } + + virtual ir_visitor_status visit_enter(ir_assignment *) + { + nodes++; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_expression *) + { + nodes++; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_loop *) + { + nested_loop = true; + return visit_continue; + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *ir) + { + /* Force unroll in case of dynamic indexing with sampler arrays + * when EmitNoIndirectSampler is set. + */ + if (options->EmitNoIndirectSampler) { + if ((ir->array->type->is_array() && + ir->array->type->contains_sampler()) && + !ir->array_index->constant_expression_value()) { + unsupported_variable_indexing = true; + return visit_continue; + } + } + + /* Check for arrays variably-indexed by a loop induction variable. + * Unrolling the loop may convert that access into constant-indexing. + * + * Many drivers don't support particular kinds of variable indexing, + * and have to resort to using lower_variable_index_to_cond_assign to + * handle it. This results in huge amounts of horrible code, so we'd + * like to avoid that if possible. Here, we just note that it will + * happen. + */ + if ((ir->array->type->is_array() || ir->array->type->is_matrix()) && + !ir->array_index->as_constant()) { + ir_variable *array = ir->array->variable_referenced(); + loop_variable *lv = ls->get(ir->array_index->variable_referenced()); + if (array && lv && lv->is_induction_var()) { + /* If an array is indexed by a loop induction variable, and the + * array size is exactly the number of loop iterations, this is + * probably a simple for-loop trying to access each element in + * turn; the application may expect it to be unrolled. + */ + if (int(array->type->length) == ls->limiting_terminator->iterations) + array_indexed_by_induction_var_with_exact_iterations = true; + + switch (array->data.mode) { + case ir_var_auto: + case ir_var_temporary: + case ir_var_const_in: + case ir_var_function_in: + case ir_var_function_out: + case ir_var_function_inout: + if (options->EmitNoIndirectTemp) + unsupported_variable_indexing = true; + break; + case ir_var_uniform: + case ir_var_shader_storage: + if (options->EmitNoIndirectUniform) + unsupported_variable_indexing = true; + break; + case ir_var_shader_in: + if (options->EmitNoIndirectInput) + unsupported_variable_indexing = true; + break; + case ir_var_shader_out: + if (options->EmitNoIndirectOutput) + unsupported_variable_indexing = true; + break; + } + } + } + return visit_continue; + } + +private: + loop_variable_state *ls; + const struct gl_shader_compiler_options *options; +}; + + +/** + * Unroll a loop which does not contain any jumps. For example, if the input + * is: + * + * (loop (...) ...instrs...) + * + * And the iteration count is 3, the output will be: + * + * ...instrs... ...instrs... ...instrs... + */ +void +loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) +{ + void *const mem_ctx = ralloc_parent(ir); + + for (int i = 0; i < iterations; i++) { + exec_list copy_list; + + copy_list.make_empty(); + clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); + + ir->insert_before(©_list); + } + + /* The loop has been replaced by the unrolled copies. Remove the original + * loop from the IR sequence. + */ + ir->remove(); + + this->progress = true; +} + + +/** + * Unroll a loop whose last statement is an ir_if. If \c + * continue_from_then_branch is true, the loop is repeated only when the + * "then" branch of the if is taken; otherwise it is repeated only when the + * "else" branch of the if is taken. + * + * For example, if the input is: + * + * (loop (...) + * ...body... + * (if (cond) + * (...then_instrs...) + * (...else_instrs...))) + * + * And the iteration count is 3, and \c continue_from_then_branch is true, + * then the output will be: + * + * ...body... + * (if (cond) + * (...then_instrs... + * ...body... + * (if (cond) + * (...then_instrs... + * ...body... + * (if (cond) + * (...then_instrs...) + * (...else_instrs...))) + * (...else_instrs...))) + * (...else_instrs)) + */ +void +loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, + bool continue_from_then_branch) +{ + void *const mem_ctx = ralloc_parent(ir); + ir_instruction *ir_to_replace = ir; + + for (int i = 0; i < iterations; i++) { + exec_list copy_list; + + copy_list.make_empty(); + clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); + + ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if(); + assert(ir_if != NULL); + + ir_to_replace->insert_before(©_list); + ir_to_replace->remove(); + + /* placeholder that will be removed in the next iteration */ + ir_to_replace = + new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); + + exec_list *const list = (continue_from_then_branch) + ? &ir_if->then_instructions : &ir_if->else_instructions; + + list->push_tail(ir_to_replace); + } + + ir_to_replace->remove(); + + this->progress = true; +} + + +/** + * Move all of the instructions which follow \c ir_if to the end of + * \c splice_dest. + * + * For example, in the code snippet: + * + * (if (cond) + * (...then_instructions... + * break) + * (...else_instructions...)) + * ...post_if_instructions... + * + * If \c ir_if points to the "if" instruction, and \c splice_dest points to + * (...else_instructions...), the code snippet is transformed into: + * + * (if (cond) + * (...then_instructions... + * break) + * (...else_instructions... + * ...post_if_instructions...)) + */ +void +loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if, + exec_list *splice_dest) +{ + while (!ir_if->get_next()->is_tail_sentinel()) { + ir_instruction *move_ir = (ir_instruction *) ir_if->get_next(); + + move_ir->remove(); + splice_dest->push_tail(move_ir); + } +} + + +ir_visitor_status +loop_unroll_visitor::visit_leave(ir_loop *ir) +{ + loop_variable_state *const ls = this->state->get(ir); + int iterations; + + /* If we've entered a loop that hasn't been analyzed, something really, + * really bad has happened. + */ + if (ls == NULL) { + assert(ls != NULL); + return visit_continue; + } + + /* Don't try to unroll loops where the number of iterations is not known + * at compile-time. + */ + if (ls->limiting_terminator == NULL) + return visit_continue; + + iterations = ls->limiting_terminator->iterations; + + const int max_iterations = options->MaxUnrollIterations; + + /* Don't try to unroll loops that have zillions of iterations either. + */ + if (iterations > max_iterations) + return visit_continue; + + /* Don't try to unroll nested loops and loops with a huge body. + */ + loop_unroll_count count(&ir->body_instructions, ls, options); + + bool loop_too_large = + count.nested_loop || count.nodes * iterations > max_iterations * 5; + + if (loop_too_large && !count.unsupported_variable_indexing && + !count.array_indexed_by_induction_var_with_exact_iterations) + return visit_continue; + + /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. + * We'll be removing the limiting terminator before we unroll. + */ + assert(ls->num_loop_jumps > 0); + unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1; + + if (predicted_num_loop_jumps > 1) + return visit_continue; + + if (predicted_num_loop_jumps == 0) { + ls->limiting_terminator->ir->remove(); + simple_unroll(ir, iterations); + return visit_continue; + } + + ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail(); + assert(last_ir != NULL); + + if (is_break(last_ir)) { + /* If the only loop-jump is a break at the end of the loop, the loop + * will execute exactly once. Remove the break and use the simple + * unroller with an iteration count of 1. + */ + last_ir->remove(); + + ls->limiting_terminator->ir->remove(); + simple_unroll(ir, 1); + return visit_continue; + } + + /* recognize loops in the form produced by ir_lower_jumps */ + foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) { + /* Skip the limiting terminator, since it will go away when we + * unroll. + */ + if (cur_ir == ls->limiting_terminator->ir) + continue; + + ir_if *ir_if = cur_ir->as_if(); + if (ir_if != NULL) { + /* Determine which if-statement branch, if any, ends with a + * break. The branch that did *not* have the break will get a + * temporary continue inserted in each iteration of the loop + * unroll. + * + * Note that since ls->num_loop_jumps is <= 1, it is impossible + * for both branches to end with a break. + */ + ir_instruction *ir_if_last = + (ir_instruction *) ir_if->then_instructions.get_tail(); + + if (is_break(ir_if_last)) { + ls->limiting_terminator->ir->remove(); + splice_post_if_instructions(ir_if, &ir_if->else_instructions); + ir_if_last->remove(); + complex_unroll(ir, iterations, false); + return visit_continue; + } else { + ir_if_last = + (ir_instruction *) ir_if->else_instructions.get_tail(); + + if (is_break(ir_if_last)) { + ls->limiting_terminator->ir->remove(); + splice_post_if_instructions(ir_if, &ir_if->then_instructions); + ir_if_last->remove(); + complex_unroll(ir, iterations, true); + return visit_continue; + } + } + } + } + + /* Did not find the break statement. It must be in a complex if-nesting, + * so don't try to unroll. + */ + return visit_continue; +} + + +bool +unroll_loops(exec_list *instructions, loop_state *ls, + const struct gl_shader_compiler_options *options) +{ + loop_unroll_visitor v(ls, options); + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_buffer_access.cpp b/src/compiler/glsl/lower_buffer_access.cpp new file mode 100644 index 00000000000..f8c8d140ea8 --- /dev/null +++ b/src/compiler/glsl/lower_buffer_access.cpp @@ -0,0 +1,490 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_buffer_access.cpp + * + * Helper for IR lowering pass to replace dereferences of buffer object based + * shader variables with intrinsic function calls. + * + * This helper is used by lowering passes for UBOs, SSBOs and compute shader + * shared variables. + */ + +#include "lower_buffer_access.h" +#include "ir_builder.h" +#include "main/macros.h" +#include "util/list.h" +#include "glsl_parser_extras.h" + +using namespace ir_builder; + +namespace lower_buffer_access { + +static inline int +writemask_for_size(unsigned n) +{ + return ((1 << n) - 1); +} + +/** + * Takes a deref and recursively calls itself to break the deref down to the + * point that the reads or writes generated are contiguous scalars or vectors. + */ +void +lower_buffer_access::emit_access(void *mem_ctx, + bool is_write, + ir_dereference *deref, + ir_variable *base_offset, + unsigned int deref_offset, + bool row_major, + int matrix_columns, + unsigned int packing, + unsigned int write_mask) +{ + if (deref->type->is_record()) { + unsigned int field_offset = 0; + + for (unsigned i = 0; i < deref->type->length; i++) { + const struct glsl_struct_field *field = + &deref->type->fields.structure[i]; + ir_dereference *field_deref = + new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL), + field->name); + + field_offset = + glsl_align(field_offset, + field->type->std140_base_alignment(row_major)); + + emit_access(mem_ctx, is_write, field_deref, base_offset, + deref_offset + field_offset, + row_major, 1, packing, + writemask_for_size(field_deref->type->vector_elements)); + + field_offset += field->type->std140_size(row_major); + } + return; + } + + if (deref->type->is_array()) { + unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ? + deref->type->fields.array->std430_array_stride(row_major) : + glsl_align(deref->type->fields.array->std140_size(row_major), 16); + + for (unsigned i = 0; i < deref->type->length; i++) { + ir_constant *element = new(mem_ctx) ir_constant(i); + ir_dereference *element_deref = + new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), + element); + emit_access(mem_ctx, is_write, element_deref, base_offset, + deref_offset + i * array_stride, + row_major, 1, packing, + writemask_for_size(element_deref->type->vector_elements)); + } + return; + } + + if (deref->type->is_matrix()) { + for (unsigned i = 0; i < deref->type->matrix_columns; i++) { + ir_constant *col = new(mem_ctx) ir_constant(i); + ir_dereference *col_deref = + new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col); + + if (row_major) { + /* For a row-major matrix, the next column starts at the next + * element. + */ + int size_mul = deref->type->is_double() ? 8 : 4; + emit_access(mem_ctx, is_write, col_deref, base_offset, + deref_offset + i * size_mul, + row_major, deref->type->matrix_columns, packing, + writemask_for_size(col_deref->type->vector_elements)); + } else { + int size_mul; + + /* std430 doesn't round up vec2 size to a vec4 size */ + if (packing == GLSL_INTERFACE_PACKING_STD430 && + deref->type->vector_elements == 2 && + !deref->type->is_double()) { + size_mul = 8; + } else { + /* std140 always rounds the stride of arrays (and matrices) to a + * vec4, so matrices are always 16 between columns/rows. With + * doubles, they will be 32 apart when there are more than 2 rows. + * + * For both std140 and std430, if the member is a + * three-'component vector with components consuming N basic + * machine units, the base alignment is 4N. For vec4, base + * alignment is 4N. + */ + size_mul = (deref->type->is_double() && + deref->type->vector_elements > 2) ? 32 : 16; + } + + emit_access(mem_ctx, is_write, col_deref, base_offset, + deref_offset + i * size_mul, + row_major, deref->type->matrix_columns, packing, + writemask_for_size(col_deref->type->vector_elements)); + } + } + return; + } + + assert(deref->type->is_scalar() || deref->type->is_vector()); + + if (!row_major) { + ir_rvalue *offset = + add(base_offset, new(mem_ctx) ir_constant(deref_offset)); + unsigned mask = + is_write ? write_mask : (1 << deref->type->vector_elements) - 1; + insert_buffer_access(mem_ctx, deref, deref->type, offset, mask, -1); + } else { + unsigned N = deref->type->is_double() ? 8 : 4; + + /* We're dereffing a column out of a row-major matrix, so we + * gather the vector from each stored row. + */ + assert(deref->type->base_type == GLSL_TYPE_FLOAT || + deref->type->base_type == GLSL_TYPE_DOUBLE); + /* Matrices, row_major or not, are stored as if they were + * arrays of vectors of the appropriate size in std140. + * Arrays have their strides rounded up to a vec4, so the + * matrix stride is always 16. However a double matrix may either be 16 + * or 32 depending on the number of columns. + */ + assert(matrix_columns <= 4); + unsigned matrix_stride = 0; + /* Matrix stride for std430 mat2xY matrices are not rounded up to + * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform + * Block Layout": + * + * "2. If the member is a two- or four-component vector with components + * consuming N basic machine units, the base alignment is 2N or 4N, + * respectively." [...] + * "4. If the member is an array of scalars or vectors, the base alignment + * and array stride are set to match the base alignment of a single array + * element, according to rules (1), (2), and (3), and rounded up to the + * base alignment of a vec4." [...] + * "7. If the member is a row-major matrix with C columns and R rows, the + * matrix is stored identically to an array of R row vectors with C + * components each, according to rule (4)." [...] + * "When using the std430 storage layout, shader storage blocks will be + * laid out in buffer storage identically to uniform and shader storage + * blocks using the std140 layout, except that the base alignment and + * stride of arrays of scalars and vectors in rule 4 and of structures in + * rule 9 are not rounded up a multiple of the base alignment of a vec4." + */ + if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2) + matrix_stride = 2 * N; + else + matrix_stride = glsl_align(matrix_columns * N, 16); + + const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ? + glsl_type::float_type : glsl_type::double_type; + + for (unsigned i = 0; i < deref->type->vector_elements; i++) { + ir_rvalue *chan_offset = + add(base_offset, + new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); + if (!is_write || ((1U << i) & write_mask)) + insert_buffer_access(mem_ctx, deref, deref_type, chan_offset, + (1U << i), i); + } + } +} + +/** + * Determine if a thing being dereferenced is row-major + * + * There is some trickery here. + * + * If the thing being dereferenced is a member of uniform block \b without an + * instance name, then the name of the \c ir_variable is the field name of an + * interface type. If this field is row-major, then the thing referenced is + * row-major. + * + * If the thing being dereferenced is a member of uniform block \b with an + * instance name, then the last dereference in the tree will be an + * \c ir_dereference_record. If that record field is row-major, then the + * thing referenced is row-major. + */ +bool +lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref) +{ + bool matrix = false; + const ir_rvalue *ir = deref; + + while (true) { + matrix = matrix || ir->type->without_array()->is_matrix(); + + switch (ir->ir_type) { + case ir_type_dereference_array: { + const ir_dereference_array *const array_deref = + (const ir_dereference_array *) ir; + + ir = array_deref->array; + break; + } + + case ir_type_dereference_record: { + const ir_dereference_record *const record_deref = + (const ir_dereference_record *) ir; + + ir = record_deref->record; + + const int idx = ir->type->field_index(record_deref->field); + assert(idx >= 0); + + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout); + + switch (matrix_layout) { + case GLSL_MATRIX_LAYOUT_INHERITED: + break; + case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: + return false; + case GLSL_MATRIX_LAYOUT_ROW_MAJOR: + return matrix || deref->type->without_array()->is_record(); + } + + break; + } + + case ir_type_dereference_variable: { + const ir_dereference_variable *const var_deref = + (const ir_dereference_variable *) ir; + + const enum glsl_matrix_layout matrix_layout = + glsl_matrix_layout(var_deref->var->data.matrix_layout); + + switch (matrix_layout) { + case GLSL_MATRIX_LAYOUT_INHERITED: { + /* For interface block matrix variables we handle inherited + * layouts at HIR generation time, but we don't do that for shared + * variables, which are always column-major + */ + ir_variable *var = deref->variable_referenced(); + assert((var->is_in_buffer_block() && !matrix) || + var->data.mode == ir_var_shader_shared); + return false; + } + case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: + return false; + case GLSL_MATRIX_LAYOUT_ROW_MAJOR: + return matrix || deref->type->without_array()->is_record(); + } + + unreachable("invalid matrix layout"); + break; + } + + default: + return false; + } + } + + /* The tree must have ended with a dereference that wasn't an + * ir_dereference_variable. That is invalid, and it should be impossible. + */ + unreachable("invalid dereference tree"); + return false; +} + +/** + * This function initializes various values that will be used later by + * emit_access when actually emitting loads or stores. + * + * Note: const_offset is an input as well as an output, clients must + * initialize it to the offset of the variable in the underlying block, and + * this function will adjust it by adding the constant offset of the member + * being accessed into that variable. + */ +void +lower_buffer_access::setup_buffer_access(void *mem_ctx, + ir_variable *var, + ir_rvalue *deref, + ir_rvalue **offset, + unsigned *const_offset, + bool *row_major, + int *matrix_columns, + unsigned packing) +{ + *offset = new(mem_ctx) ir_constant(0u); + *row_major = is_dereferenced_thing_row_major(deref); + *matrix_columns = 1; + + /* Calculate the offset to the start of the region of the UBO + * dereferenced by *rvalue. This may be a variable offset if an + * array dereference has a variable index. + */ + while (deref) { + switch (deref->ir_type) { + case ir_type_dereference_variable: { + deref = NULL; + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *deref_array = (ir_dereference_array *) deref; + unsigned array_stride; + if (deref_array->array->type->is_vector()) { + /* We get this when storing or loading a component out of a vector + * with a non-constant index. This happens for v[i] = f where v is + * a vector (or m[i][j] = f where m is a matrix). If we don't + * lower that here, it gets turned into v = vector_insert(v, i, + * f), which loads the entire vector, modifies one component and + * then write the entire thing back. That breaks if another + * thread or SIMD channel is modifying the same vector. + */ + array_stride = 4; + if (deref_array->array->type->is_double()) + array_stride *= 2; + } else if (deref_array->array->type->is_matrix() && *row_major) { + /* When loading a vector out of a row major matrix, the + * step between the columns (vectors) is the size of a + * float, while the step between the rows (elements of a + * vector) is handled below in emit_ubo_loads. + */ + array_stride = 4; + if (deref_array->array->type->is_double()) + array_stride *= 2; + *matrix_columns = deref_array->array->type->matrix_columns; + } else if (deref_array->type->without_array()->is_interface()) { + /* We're processing an array dereference of an interface instance + * array. The thing being dereferenced *must* be a variable + * dereference because interfaces cannot be embedded in other + * types. In terms of calculating the offsets for the lowering + * pass, we don't care about the array index. All elements of an + * interface instance array will have the same offsets relative to + * the base of the block that backs them. + */ + deref = deref_array->array->as_dereference(); + break; + } else { + /* Whether or not the field is row-major (because it might be a + * bvec2 or something) does not affect the array itself. We need + * to know whether an array element in its entirety is row-major. + */ + const bool array_row_major = + is_dereferenced_thing_row_major(deref_array); + + /* The array type will give the correct interface packing + * information + */ + if (packing == GLSL_INTERFACE_PACKING_STD430) { + array_stride = deref_array->type->std430_array_stride(array_row_major); + } else { + array_stride = deref_array->type->std140_size(array_row_major); + array_stride = glsl_align(array_stride, 16); + } + } + + ir_rvalue *array_index = deref_array->array_index; + if (array_index->type->base_type == GLSL_TYPE_INT) + array_index = i2u(array_index); + + ir_constant *const_index = + array_index->constant_expression_value(NULL); + if (const_index) { + *const_offset += array_stride * const_index->value.u[0]; + } else { + *offset = add(*offset, + mul(array_index, + new(mem_ctx) ir_constant(array_stride))); + } + deref = deref_array->array->as_dereference(); + break; + } + + case ir_type_dereference_record: { + ir_dereference_record *deref_record = (ir_dereference_record *) deref; + const glsl_type *struct_type = deref_record->record->type; + unsigned intra_struct_offset = 0; + + for (unsigned int i = 0; i < struct_type->length; i++) { + const glsl_type *type = struct_type->fields.structure[i].type; + + ir_dereference_record *field_deref = new(mem_ctx) + ir_dereference_record(deref_record->record, + struct_type->fields.structure[i].name); + const bool field_row_major = + is_dereferenced_thing_row_major(field_deref); + + ralloc_free(field_deref); + + unsigned field_align = 0; + + if (packing == GLSL_INTERFACE_PACKING_STD430) + field_align = type->std430_base_alignment(field_row_major); + else + field_align = type->std140_base_alignment(field_row_major); + + intra_struct_offset = glsl_align(intra_struct_offset, field_align); + + if (strcmp(struct_type->fields.structure[i].name, + deref_record->field) == 0) + break; + + if (packing == GLSL_INTERFACE_PACKING_STD430) + intra_struct_offset += type->std430_size(field_row_major); + else + intra_struct_offset += type->std140_size(field_row_major); + + /* If the field just examined was itself a structure, apply rule + * #9: + * + * "The structure may have padding at the end; the base offset + * of the member following the sub-structure is rounded up to + * the next multiple of the base alignment of the structure." + */ + if (type->without_array()->is_record()) { + intra_struct_offset = glsl_align(intra_struct_offset, + field_align); + + } + } + + *const_offset += intra_struct_offset; + deref = deref_record->record->as_dereference(); + break; + } + + case ir_type_swizzle: { + ir_swizzle *deref_swizzle = (ir_swizzle *) deref; + + assert(deref_swizzle->mask.num_components == 1); + + *const_offset += deref_swizzle->mask.x * sizeof(int); + deref = deref_swizzle->val->as_dereference(); + break; + } + + default: + assert(!"not reached"); + deref = NULL; + break; + } + } +} + +} /* namespace lower_buffer_access */ diff --git a/src/compiler/glsl/lower_buffer_access.h b/src/compiler/glsl/lower_buffer_access.h new file mode 100644 index 00000000000..cc4614e9792 --- /dev/null +++ b/src/compiler/glsl/lower_buffer_access.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_buffer_access.h + * + * Helper for IR lowering pass to replace dereferences of buffer object based + * shader variables with intrinsic function calls. + * + * This helper is used by lowering passes for UBOs, SSBOs and compute shader + * shared variables. + */ + +#pragma once +#ifndef LOWER_BUFFER_ACCESS_H +#define LOWER_BUFFER_ACCESS_H + +#include "ir.h" +#include "ir_rvalue_visitor.h" + +namespace lower_buffer_access { + +class lower_buffer_access : public ir_rvalue_enter_visitor { +public: + virtual void + insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, + unsigned mask, int channel) = 0; + + void emit_access(void *mem_ctx, bool is_write, ir_dereference *deref, + ir_variable *base_offset, unsigned int deref_offset, + bool row_major, int matrix_columns, + unsigned int packing, unsigned int write_mask); + + bool is_dereferenced_thing_row_major(const ir_rvalue *deref); + + void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref, + ir_rvalue **offset, unsigned *const_offset, + bool *row_major, int *matrix_columns, + unsigned packing); +}; + +} /* namespace lower_buffer_access */ + +#endif /* LOWER_BUFFER_ACCESS_H */ diff --git a/src/compiler/glsl/lower_clip_distance.cpp b/src/compiler/glsl/lower_clip_distance.cpp new file mode 100644 index 00000000000..1ada215796c --- /dev/null +++ b/src/compiler/glsl/lower_clip_distance.cpp @@ -0,0 +1,574 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_clip_distance.cpp + * + * This pass accounts for the difference between the way + * gl_ClipDistance is declared in standard GLSL (as an array of + * floats), and the way it is frequently implemented in hardware (as + * a pair of vec4s, with four clip distances packed into each). + * + * The declaration of gl_ClipDistance is replaced with a declaration + * of gl_ClipDistanceMESA, and any references to gl_ClipDistance are + * translated to refer to gl_ClipDistanceMESA with the appropriate + * swizzling of array indices. For instance: + * + * gl_ClipDistance[i] + * + * is translated into: + * + * gl_ClipDistanceMESA[i>>2][i&3] + * + * Since some hardware may not internally represent gl_ClipDistance as a pair + * of vec4's, this lowering pass is optional. To enable it, set the + * LowerClipDistance flag in gl_shader_compiler_options to true. + */ + +#include "glsl_symbol_table.h" +#include "ir_rvalue_visitor.h" +#include "ir.h" +#include "program/prog_instruction.h" /* For WRITEMASK_* */ + +namespace { + +class lower_clip_distance_visitor : public ir_rvalue_visitor { +public: + explicit lower_clip_distance_visitor(gl_shader_stage shader_stage) + : progress(false), old_clip_distance_out_var(NULL), + old_clip_distance_in_var(NULL), new_clip_distance_out_var(NULL), + new_clip_distance_in_var(NULL), shader_stage(shader_stage) + { + } + + virtual ir_visitor_status visit(ir_variable *); + void create_indices(ir_rvalue*, ir_rvalue *&, ir_rvalue *&); + bool is_clip_distance_vec8(ir_rvalue *ir); + ir_rvalue *lower_clip_distance_vec8(ir_rvalue *ir); + virtual ir_visitor_status visit_leave(ir_assignment *); + void visit_new_assignment(ir_assignment *ir); + virtual ir_visitor_status visit_leave(ir_call *); + + virtual void handle_rvalue(ir_rvalue **rvalue); + + void fix_lhs(ir_assignment *); + + bool progress; + + /** + * Pointer to the declaration of gl_ClipDistance, if found. + * + * Note: + * + * - the in_var is for geometry and both tessellation shader inputs only. + * + * - since gl_ClipDistance is available in tessellation control, + * tessellation evaluation and geometry shaders as both an input + * and an output, it's possible for both old_clip_distance_out_var + * and old_clip_distance_in_var to be non-null. + */ + ir_variable *old_clip_distance_out_var; + ir_variable *old_clip_distance_in_var; + + /** + * Pointer to the newly-created gl_ClipDistanceMESA variable. + */ + ir_variable *new_clip_distance_out_var; + ir_variable *new_clip_distance_in_var; + + /** + * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX) + */ + const gl_shader_stage shader_stage; +}; + +} /* anonymous namespace */ + +/** + * Replace any declaration of gl_ClipDistance as an array of floats with a + * declaration of gl_ClipDistanceMESA as an array of vec4's. + */ +ir_visitor_status +lower_clip_distance_visitor::visit(ir_variable *ir) +{ + ir_variable **old_var; + ir_variable **new_var; + + if (!ir->name || strcmp(ir->name, "gl_ClipDistance") != 0) + return visit_continue; + assert (ir->type->is_array()); + + if (ir->data.mode == ir_var_shader_out) { + if (this->old_clip_distance_out_var) + return visit_continue; + old_var = &old_clip_distance_out_var; + new_var = &new_clip_distance_out_var; + } else if (ir->data.mode == ir_var_shader_in) { + if (this->old_clip_distance_in_var) + return visit_continue; + old_var = &old_clip_distance_in_var; + new_var = &new_clip_distance_in_var; + } else { + unreachable("not reached"); + } + + this->progress = true; + + if (!ir->type->fields.array->is_array()) { + /* gl_ClipDistance (used for vertex, tessellation evaluation and + * geometry output, and fragment input). + */ + assert((ir->data.mode == ir_var_shader_in && + this->shader_stage == MESA_SHADER_FRAGMENT) || + (ir->data.mode == ir_var_shader_out && + (this->shader_stage == MESA_SHADER_VERTEX || + this->shader_stage == MESA_SHADER_TESS_EVAL || + this->shader_stage == MESA_SHADER_GEOMETRY))); + + *old_var = ir; + assert (ir->type->fields.array == glsl_type::float_type); + unsigned new_size = (ir->type->array_size() + 3) / 4; + + /* Clone the old var so that we inherit all of its properties */ + *new_var = ir->clone(ralloc_parent(ir), NULL); + + /* And change the properties that we need to change */ + (*new_var)->name = ralloc_strdup(*new_var, "gl_ClipDistanceMESA"); + (*new_var)->type = glsl_type::get_array_instance(glsl_type::vec4_type, + new_size); + (*new_var)->data.max_array_access = ir->data.max_array_access / 4; + + ir->replace_with(*new_var); + } else { + /* 2D gl_ClipDistance (used for tessellation control, tessellation + * evaluation and geometry input, and tessellation control output). + */ + assert((ir->data.mode == ir_var_shader_in && + (this->shader_stage == MESA_SHADER_GEOMETRY || + this->shader_stage == MESA_SHADER_TESS_EVAL)) || + this->shader_stage == MESA_SHADER_TESS_CTRL); + + *old_var = ir; + assert (ir->type->fields.array->fields.array == glsl_type::float_type); + unsigned new_size = (ir->type->fields.array->array_size() + 3) / 4; + + /* Clone the old var so that we inherit all of its properties */ + *new_var = ir->clone(ralloc_parent(ir), NULL); + + /* And change the properties that we need to change */ + (*new_var)->name = ralloc_strdup(*new_var, "gl_ClipDistanceMESA"); + (*new_var)->type = glsl_type::get_array_instance( + glsl_type::get_array_instance(glsl_type::vec4_type, + new_size), + ir->type->array_size()); + (*new_var)->data.max_array_access = ir->data.max_array_access / 4; + + ir->replace_with(*new_var); + } + + return visit_continue; +} + + +/** + * Create the necessary GLSL rvalues to index into gl_ClipDistanceMESA based + * on the rvalue previously used to index into gl_ClipDistance. + * + * \param array_index Selects one of the vec4's in gl_ClipDistanceMESA + * \param swizzle_index Selects a component within the vec4 selected by + * array_index. + */ +void +lower_clip_distance_visitor::create_indices(ir_rvalue *old_index, + ir_rvalue *&array_index, + ir_rvalue *&swizzle_index) +{ + void *ctx = ralloc_parent(old_index); + + /* Make sure old_index is a signed int so that the bitwise "shift" and + * "and" operations below type check properly. + */ + if (old_index->type != glsl_type::int_type) { + assert (old_index->type == glsl_type::uint_type); + old_index = new(ctx) ir_expression(ir_unop_u2i, old_index); + } + + ir_constant *old_index_constant = old_index->constant_expression_value(); + if (old_index_constant) { + /* gl_ClipDistance is being accessed via a constant index. Don't bother + * creating expressions to calculate the lowered indices. Just create + * constants. + */ + int const_val = old_index_constant->get_int_component(0); + array_index = new(ctx) ir_constant(const_val / 4); + swizzle_index = new(ctx) ir_constant(const_val % 4); + } else { + /* Create a variable to hold the value of old_index (so that we + * don't compute it twice). + */ + ir_variable *old_index_var = new(ctx) ir_variable( + glsl_type::int_type, "clip_distance_index", ir_var_temporary); + this->base_ir->insert_before(old_index_var); + this->base_ir->insert_before(new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(old_index_var), old_index)); + + /* Create the expression clip_distance_index / 4. Do this as a bit + * shift because that's likely to be more efficient. + */ + array_index = new(ctx) ir_expression( + ir_binop_rshift, new(ctx) ir_dereference_variable(old_index_var), + new(ctx) ir_constant(2)); + + /* Create the expression clip_distance_index % 4. Do this as a bitwise + * AND because that's likely to be more efficient. + */ + swizzle_index = new(ctx) ir_expression( + ir_binop_bit_and, new(ctx) ir_dereference_variable(old_index_var), + new(ctx) ir_constant(3)); + } +} + + +/** + * Determine whether the given rvalue describes an array of 8 floats that + * needs to be lowered to an array of 2 vec4's; that is, determine whether it + * matches one of the following patterns: + * + * - gl_ClipDistance (if gl_ClipDistance is 1D) + * - gl_ClipDistance[i] (if gl_ClipDistance is 2D) + */ +bool +lower_clip_distance_visitor::is_clip_distance_vec8(ir_rvalue *ir) +{ + /* Note that geometry shaders contain gl_ClipDistance both as an input + * (which is a 2D array) and an output (which is a 1D array), so it's + * possible for both this->old_clip_distance_out_var and + * this->old_clip_distance_in_var to be non-NULL in the same shader. + */ + + if (!ir->type->is_array()) + return false; + if (ir->type->fields.array != glsl_type::float_type) + return false; + + if (this->old_clip_distance_out_var) { + if (ir->variable_referenced() == this->old_clip_distance_out_var) + return true; + } + if (this->old_clip_distance_in_var) { + assert(this->shader_stage == MESA_SHADER_TESS_CTRL || + this->shader_stage == MESA_SHADER_TESS_EVAL || + this->shader_stage == MESA_SHADER_GEOMETRY || + this->shader_stage == MESA_SHADER_FRAGMENT); + + if (ir->variable_referenced() == this->old_clip_distance_in_var) + return true; + } + return false; +} + + +/** + * If the given ir satisfies is_clip_distance_vec8(), return new ir + * representing its lowered equivalent. That is, map: + * + * - gl_ClipDistance => gl_ClipDistanceMESA (if gl_ClipDistance is 1D) + * - gl_ClipDistance[i] => gl_ClipDistanceMESA[i] (if gl_ClipDistance is 2D) + * + * Otherwise return NULL. + */ +ir_rvalue * +lower_clip_distance_visitor::lower_clip_distance_vec8(ir_rvalue *ir) +{ + if (!ir->type->is_array()) + return NULL; + if (ir->type->fields.array != glsl_type::float_type) + return NULL; + + ir_variable **new_var = NULL; + if (this->old_clip_distance_out_var) { + if (ir->variable_referenced() == this->old_clip_distance_out_var) + new_var = &this->new_clip_distance_out_var; + } + if (this->old_clip_distance_in_var) { + if (ir->variable_referenced() == this->old_clip_distance_in_var) + new_var = &this->new_clip_distance_in_var; + } + if (new_var == NULL) + return NULL; + + if (ir->as_dereference_variable()) { + return new(ralloc_parent(ir)) ir_dereference_variable(*new_var); + } else { + ir_dereference_array *array_ref = ir->as_dereference_array(); + assert(array_ref); + assert(array_ref->array->as_dereference_variable()); + + return new(ralloc_parent(ir)) + ir_dereference_array(*new_var, array_ref->array_index); + } +} + + +void +lower_clip_distance_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL) + return; + + ir_dereference_array *const array_deref = (*rv)->as_dereference_array(); + if (array_deref == NULL) + return; + + /* Replace any expression that indexes one of the floats in gl_ClipDistance + * with an expression that indexes into one of the vec4's in + * gl_ClipDistanceMESA and accesses the appropriate component. + */ + ir_rvalue *lowered_vec8 = + this->lower_clip_distance_vec8(array_deref->array); + if (lowered_vec8 != NULL) { + this->progress = true; + ir_rvalue *array_index; + ir_rvalue *swizzle_index; + this->create_indices(array_deref->array_index, array_index, swizzle_index); + void *mem_ctx = ralloc_parent(array_deref); + + ir_dereference_array *const new_array_deref = + new(mem_ctx) ir_dereference_array(lowered_vec8, array_index); + + ir_expression *const expr = + new(mem_ctx) ir_expression(ir_binop_vector_extract, + new_array_deref, + swizzle_index); + + *rv = expr; + } +} + +void +lower_clip_distance_visitor::fix_lhs(ir_assignment *ir) +{ + if (ir->lhs->ir_type == ir_type_expression) { + void *mem_ctx = ralloc_parent(ir); + ir_expression *const expr = (ir_expression *) ir->lhs; + + /* The expression must be of the form: + * + * (vector_extract gl_ClipDistanceMESA[i], j). + */ + assert(expr->operation == ir_binop_vector_extract); + assert(expr->operands[0]->ir_type == ir_type_dereference_array); + assert(expr->operands[0]->type == glsl_type::vec4_type); + + ir_dereference *const new_lhs = (ir_dereference *) expr->operands[0]; + ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, + glsl_type::vec4_type, + new_lhs->clone(mem_ctx, NULL), + ir->rhs, + expr->operands[1]); + ir->set_lhs(new_lhs); + ir->write_mask = WRITEMASK_XYZW; + } +} + +/** + * Replace any assignment having the 1D gl_ClipDistance (undereferenced) as + * its LHS or RHS with a sequence of assignments, one for each component of + * the array. Each of these assignments is lowered to refer to + * gl_ClipDistanceMESA as appropriate. + * + * We need to do a similar replacement for 2D gl_ClipDistance, however since + * it's an input, the only case we need to address is where a 1D slice of it + * is the entire RHS of an assignment, e.g.: + * + * foo = gl_in[i].gl_ClipDistance + */ +ir_visitor_status +lower_clip_distance_visitor::visit_leave(ir_assignment *ir) +{ + /* First invoke the base class visitor. This causes handle_rvalue() to be + * called on ir->rhs and ir->condition. + */ + ir_rvalue_visitor::visit_leave(ir); + + if (this->is_clip_distance_vec8(ir->lhs) || + this->is_clip_distance_vec8(ir->rhs)) { + /* LHS or RHS of the assignment is the entire 1D gl_ClipDistance array + * (or a 1D slice of a 2D gl_ClipDistance input array). Since we are + * reshaping gl_ClipDistance from an array of floats to an array of + * vec4's, this isn't going to work as a bulk assignment anymore, so + * unroll it to element-by-element assignments and lower each of them. + * + * Note: to unroll into element-by-element assignments, we need to make + * clones of the LHS and RHS. This is safe because expressions and + * l-values are side-effect free. + */ + void *ctx = ralloc_parent(ir); + int array_size = ir->lhs->type->array_size(); + for (int i = 0; i < array_size; ++i) { + ir_dereference_array *new_lhs = new(ctx) ir_dereference_array( + ir->lhs->clone(ctx, NULL), new(ctx) ir_constant(i)); + ir_dereference_array *new_rhs = new(ctx) ir_dereference_array( + ir->rhs->clone(ctx, NULL), new(ctx) ir_constant(i)); + this->handle_rvalue((ir_rvalue **) &new_rhs); + + /* Handle the LHS after creating the new assignment. This must + * happen in this order because handle_rvalue may replace the old LHS + * with an ir_expression of ir_binop_vector_extract. Since this is + * not a valide l-value, this will cause an assertion in the + * ir_assignment constructor to fail. + * + * If this occurs, replace the mangled LHS with a dereference of the + * vector, and replace the RHS with an ir_triop_vector_insert. + */ + ir_assignment *const assign = new(ctx) ir_assignment(new_lhs, new_rhs); + this->handle_rvalue((ir_rvalue **) &assign->lhs); + this->fix_lhs(assign); + + this->base_ir->insert_before(assign); + } + ir->remove(); + + return visit_continue; + } + + /* Handle the LHS as if it were an r-value. Normally + * rvalue_visit(ir_assignment *) only visits the RHS, but we need to lower + * expressions in the LHS as well. + * + * This may cause the LHS to get replaced with an ir_expression of + * ir_binop_vector_extract. If this occurs, replace it with a dereference + * of the vector, and replace the RHS with an ir_triop_vector_insert. + */ + handle_rvalue((ir_rvalue **)&ir->lhs); + this->fix_lhs(ir); + + return rvalue_visit(ir); +} + + +/** + * Set up base_ir properly and call visit_leave() on a newly created + * ir_assignment node. This is used in cases where we have to insert an + * ir_assignment in a place where we know the hierarchical visitor won't see + * it. + */ +void +lower_clip_distance_visitor::visit_new_assignment(ir_assignment *ir) +{ + ir_instruction *old_base_ir = this->base_ir; + this->base_ir = ir; + ir->accept(this); + this->base_ir = old_base_ir; +} + + +/** + * If a 1D gl_ClipDistance variable appears as an argument in an ir_call + * expression, replace it with a temporary variable, and make sure the ir_call + * is preceded and/or followed by assignments that copy the contents of the + * temporary variable to and/or from gl_ClipDistance. Each of these + * assignments is then lowered to refer to gl_ClipDistanceMESA. + * + * We need to do a similar replacement for 2D gl_ClipDistance, however since + * it's an input, the only case we need to address is where a 1D slice of it + * is passed as an "in" parameter to an ir_call, e.g.: + * + * foo(gl_in[i].gl_ClipDistance) + */ +ir_visitor_status +lower_clip_distance_visitor::visit_leave(ir_call *ir) +{ + void *ctx = ralloc_parent(ir); + + const exec_node *formal_param_node = ir->callee->parameters.head; + const exec_node *actual_param_node = ir->actual_parameters.head; + while (!actual_param_node->is_tail_sentinel()) { + ir_variable *formal_param = (ir_variable *) formal_param_node; + ir_rvalue *actual_param = (ir_rvalue *) actual_param_node; + + /* Advance formal_param_node and actual_param_node now so that we can + * safely replace actual_param with another node, if necessary, below. + */ + formal_param_node = formal_param_node->next; + actual_param_node = actual_param_node->next; + + if (this->is_clip_distance_vec8(actual_param)) { + /* User is trying to pass the whole 1D gl_ClipDistance array (or a 1D + * slice of a 2D gl_ClipDistance array) to a function call. Since we + * are reshaping gl_ClipDistance from an array of floats to an array + * of vec4's, this isn't going to work anymore, so use a temporary + * array instead. + */ + ir_variable *temp_clip_distance = new(ctx) ir_variable( + actual_param->type, "temp_clip_distance", ir_var_temporary); + this->base_ir->insert_before(temp_clip_distance); + actual_param->replace_with( + new(ctx) ir_dereference_variable(temp_clip_distance)); + if (formal_param->data.mode == ir_var_function_in + || formal_param->data.mode == ir_var_function_inout) { + /* Copy from gl_ClipDistance to the temporary before the call. + * Since we are going to insert this copy before the current + * instruction, we need to visit it afterwards to make sure it + * gets lowered. + */ + ir_assignment *new_assignment = new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(temp_clip_distance), + actual_param->clone(ctx, NULL)); + this->base_ir->insert_before(new_assignment); + this->visit_new_assignment(new_assignment); + } + if (formal_param->data.mode == ir_var_function_out + || formal_param->data.mode == ir_var_function_inout) { + /* Copy from the temporary to gl_ClipDistance after the call. + * Since visit_list_elements() has already decided which + * instruction it's going to visit next, we need to visit + * afterwards to make sure it gets lowered. + */ + ir_assignment *new_assignment = new(ctx) ir_assignment( + actual_param->clone(ctx, NULL), + new(ctx) ir_dereference_variable(temp_clip_distance)); + this->base_ir->insert_after(new_assignment); + this->visit_new_assignment(new_assignment); + } + } + } + + return rvalue_visit(ir); +} + + +bool +lower_clip_distance(gl_shader *shader) +{ + lower_clip_distance_visitor v(shader->Stage); + + visit_list_elements(&v, shader->ir); + + if (v.new_clip_distance_out_var) + shader->symbols->add_variable(v.new_clip_distance_out_var); + if (v.new_clip_distance_in_var) + shader->symbols->add_variable(v.new_clip_distance_in_var); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp b/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp new file mode 100644 index 00000000000..2d024d4b78c --- /dev/null +++ b/src/compiler/glsl/lower_const_arrays_to_uniforms.cpp @@ -0,0 +1,109 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_const_arrays_to_uniforms.cpp + * + * Lower constant arrays to uniform arrays. + * + * Some driver backends (such as i965 and nouveau) don't handle constant arrays + * gracefully, instead treating them as ordinary writable temporary arrays. + * Since arrays can be large, this often means spilling them to scratch memory, + * which usually involves a large number of instructions. + * + * This must be called prior to link_set_uniform_initializers(); we need the + * linker to process our new uniform's constant initializer. + * + * This should be called after optimizations, since those can result in + * splitting and removing arrays that are indexed by constant expressions. + */ +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +namespace { +class lower_const_array_visitor : public ir_rvalue_visitor { +public: + lower_const_array_visitor(exec_list *insts) + { + instructions = insts; + progress = false; + } + + bool run() + { + visit_list_elements(this, instructions); + return progress; + } + + void handle_rvalue(ir_rvalue **rvalue); + +private: + exec_list *instructions; + bool progress; +}; + +void +lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference_array *dra = (*rvalue)->as_dereference_array(); + if (!dra) + return; + + ir_constant *con = dra->array->as_constant(); + if (!con || !con->type->is_array()) + return; + + void *mem_ctx = ralloc_parent(con); + + char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%p", dra); + + ir_variable *uni = + new(mem_ctx) ir_variable(con->type, uniform_name, ir_var_uniform); + uni->constant_initializer = con; + uni->constant_value = con; + uni->data.has_initializer = true; + uni->data.how_declared = ir_var_hidden; + uni->data.read_only = true; + /* Assume the whole thing is accessed. */ + uni->data.max_array_access = uni->type->length - 1; + instructions->push_head(uni); + + ir_dereference_variable *varref = new(mem_ctx) ir_dereference_variable(uni); + *rvalue = new(mem_ctx) ir_dereference_array(varref, dra->array_index); + + progress = true; +} + +} /* anonymous namespace */ + +bool +lower_const_arrays_to_uniforms(exec_list *instructions) +{ + lower_const_array_visitor v(instructions); + return v.run(); +} diff --git a/src/compiler/glsl/lower_discard.cpp b/src/compiler/glsl/lower_discard.cpp new file mode 100644 index 00000000000..b62eb20dcb4 --- /dev/null +++ b/src/compiler/glsl/lower_discard.cpp @@ -0,0 +1,201 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_discard.cpp + * + * This pass moves discards out of if-statements. + * + * Case 1: The "then" branch contains a conditional discard: + * --------------------------------------------------------- + * + * if (cond1) { + * s1; + * discard cond2; + * s2; + * } else { + * s3; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * temp = cond2; + * s2; + * } else { + * s3; + * } + * discard temp; + * + * Case 2: The "else" branch contains a conditional discard: + * --------------------------------------------------------- + * + * if (cond1) { + * s1; + * } else { + * s2; + * discard cond2; + * s3; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * } else { + * s2; + * temp = cond2; + * s3; + * } + * discard temp; + * + * Case 3: Both branches contain a conditional discard: + * ---------------------------------------------------- + * + * if (cond1) { + * s1; + * discard cond2; + * s2; + * } else { + * s3; + * discard cond3; + * s4; + * } + * + * becomes: + * + * temp = false; + * if (cond1) { + * s1; + * temp = cond2; + * s2; + * } else { + * s3; + * temp = cond3; + * s4; + * } + * discard temp; + * + * If there are multiple conditional discards, we need only deal with one of + * them. Repeatedly applying this pass will take care of the others. + * + * Unconditional discards are treated as having a condition of "true". + */ + +#include "compiler/glsl_types.h" +#include "ir.h" + +namespace { + +class lower_discard_visitor : public ir_hierarchical_visitor { +public: + lower_discard_visitor() + { + this->progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + + bool progress; +}; + +} /* anonymous namespace */ + +bool +lower_discard(exec_list *instructions) +{ + lower_discard_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} + + +static ir_discard * +find_discard(exec_list &instructions) +{ + foreach_in_list(ir_instruction, node, &instructions) { + ir_discard *ir = node->as_discard(); + if (ir != NULL) + return ir; + } + return NULL; +} + + +static void +replace_discard(void *mem_ctx, ir_variable *var, ir_discard *ir) +{ + ir_rvalue *condition = ir->condition; + + /* For unconditional discards, use "true" as the condition. */ + if (condition == NULL) + condition = new(mem_ctx) ir_constant(true); + + ir_assignment *assignment = + new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(var), + condition, NULL); + + ir->replace_with(assignment); +} + + +ir_visitor_status +lower_discard_visitor::visit_leave(ir_if *ir) +{ + ir_discard *then_discard = find_discard(ir->then_instructions); + ir_discard *else_discard = find_discard(ir->else_instructions); + + if (then_discard == NULL && else_discard == NULL) + return visit_continue; + + void *mem_ctx = ralloc_parent(ir); + + ir_variable *temp = new(mem_ctx) ir_variable(glsl_type::bool_type, + "discard_cond_temp", + ir_var_temporary); + ir_assignment *temp_initializer = + new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(temp), + new(mem_ctx) ir_constant(false), NULL); + + ir->insert_before(temp); + ir->insert_before(temp_initializer); + + if (then_discard != NULL) + replace_discard(mem_ctx, temp, then_discard); + + if (else_discard != NULL) + replace_discard(mem_ctx, temp, else_discard); + + ir_discard *discard = then_discard != NULL ? then_discard : else_discard; + discard->condition = new(mem_ctx) ir_dereference_variable(temp); + ir->insert_after(discard); + + this->progress = true; + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_discard_flow.cpp b/src/compiler/glsl/lower_discard_flow.cpp new file mode 100644 index 00000000000..9d0a56b230d --- /dev/null +++ b/src/compiler/glsl/lower_discard_flow.cpp @@ -0,0 +1,155 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** @file lower_discard_flow.cpp + * + * Implements the GLSL 1.30 revision 9 rule for fragment shader + * discard handling: + * + * "Control flow exits the shader, and subsequent implicit or + * explicit derivatives are undefined when this control flow is + * non-uniform (meaning different fragments within the primitive + * take different control paths)." + * + * There seem to be two conflicting things here. "Control flow exits + * the shader" sounds like the discarded fragments should effectively + * jump to the end of the shader, but that breaks derivatives in the + * case of uniform control flow and causes rendering failure in the + * bushes in Unigine Tropics. + * + * The question, then, is whether the intent was "loops stop at the + * point that the only active channels left are discarded pixels" or + * "discarded pixels become inactive at the point that control flow + * returns to the top of a loop". This implements the second + * interpretation. + */ + +#include "compiler/glsl_types.h" +#include "ir.h" +#include "program/hash_table.h" + +namespace { + +class lower_discard_flow_visitor : public ir_hierarchical_visitor { +public: + lower_discard_flow_visitor(ir_variable *discarded) + : discarded(discarded) + { + mem_ctx = ralloc_parent(discarded); + } + + ~lower_discard_flow_visitor() + { + } + + ir_visitor_status visit_enter(ir_discard *ir); + ir_visitor_status visit_enter(ir_loop_jump *ir); + ir_visitor_status visit_enter(ir_loop *ir); + ir_visitor_status visit_enter(ir_function_signature *ir); + + ir_if *generate_discard_break(); + + ir_variable *discarded; + void *mem_ctx; +}; + +} /* anonymous namespace */ + +ir_visitor_status +lower_discard_flow_visitor::visit_enter(ir_loop_jump *ir) +{ + if (ir->mode != ir_loop_jump::jump_continue) + return visit_continue; + + ir->insert_before(generate_discard_break()); + + return visit_continue; +} + +ir_visitor_status +lower_discard_flow_visitor::visit_enter(ir_discard *ir) +{ + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(discarded); + ir_rvalue *rhs; + if (ir->condition) { + /* discarded <- condition, use (var_ref discarded) as the condition */ + rhs = ir->condition; + ir->condition = new(mem_ctx) ir_dereference_variable(discarded); + } else { + rhs = new(mem_ctx) ir_constant(true); + } + ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, rhs); + ir->insert_before(assign); + + return visit_continue; +} + +ir_visitor_status +lower_discard_flow_visitor::visit_enter(ir_loop *ir) +{ + ir->body_instructions.push_tail(generate_discard_break()); + + return visit_continue; +} + +ir_visitor_status +lower_discard_flow_visitor::visit_enter(ir_function_signature *ir) +{ + if (strcmp(ir->function_name(), "main") != 0) + return visit_continue; + + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(discarded); + ir_rvalue *rhs = new(mem_ctx) ir_constant(false); + ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, rhs); + ir->body.push_head(assign); + + return visit_continue; +} + +ir_if * +lower_discard_flow_visitor::generate_discard_break() +{ + ir_rvalue *if_condition = new(mem_ctx) ir_dereference_variable(discarded); + ir_if *if_inst = new(mem_ctx) ir_if(if_condition); + + ir_instruction *br = new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); + if_inst->then_instructions.push_tail(br); + + return if_inst; +} + +void +lower_discard_flow(exec_list *ir) +{ + void *mem_ctx = ir; + + ir_variable *var = new(mem_ctx) ir_variable(glsl_type::bool_type, + "discarded", + ir_var_temporary); + + ir->push_head(var); + + lower_discard_flow_visitor v(var); + + visit_list_elements(&v, ir); +} diff --git a/src/compiler/glsl/lower_if_to_cond_assign.cpp b/src/compiler/glsl/lower_if_to_cond_assign.cpp new file mode 100644 index 00000000000..6a7034794b2 --- /dev/null +++ b/src/compiler/glsl/lower_if_to_cond_assign.cpp @@ -0,0 +1,252 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_if_to_cond_assign.cpp + * + * This attempts to flatten if-statements to conditional assignments for + * GPUs with limited or no flow control support. + * + * It can't handle other control flow being inside of its block, such + * as calls or loops. Hopefully loop unrolling and inlining will take + * care of those. + * + * Drivers for GPUs with no control flow support should simply call + * + * lower_if_to_cond_assign(instructions) + * + * to attempt to flatten all if-statements. + * + * Some GPUs (such as i965 prior to gen6) do support control flow, but have a + * maximum nesting depth N. Drivers for such hardware can call + * + * lower_if_to_cond_assign(instructions, N) + * + * to attempt to flatten any if-statements appearing at depth > N. + */ + +#include "compiler/glsl_types.h" +#include "ir.h" +#include "program/hash_table.h" + +namespace { + +class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { +public: + ir_if_to_cond_assign_visitor(unsigned max_depth) + { + this->progress = false; + this->max_depth = max_depth; + this->depth = 0; + + this->condition_variables = hash_table_ctor(0, hash_table_pointer_hash, + hash_table_pointer_compare); + } + + ~ir_if_to_cond_assign_visitor() + { + hash_table_dtor(this->condition_variables); + } + + ir_visitor_status visit_enter(ir_if *); + ir_visitor_status visit_leave(ir_if *); + + bool progress; + unsigned max_depth; + unsigned depth; + + struct hash_table *condition_variables; +}; + +} /* anonymous namespace */ + +bool +lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth) +{ + if (max_depth == UINT_MAX) + return false; + + ir_if_to_cond_assign_visitor v(max_depth); + + visit_list_elements(&v, instructions); + + return v.progress; +} + +void +check_control_flow(ir_instruction *ir, void *data) +{ + bool *found_control_flow = (bool *)data; + switch (ir->ir_type) { + case ir_type_call: + case ir_type_discard: + case ir_type_loop: + case ir_type_loop_jump: + case ir_type_return: + *found_control_flow = true; + break; + default: + break; + } +} + +void +move_block_to_cond_assign(void *mem_ctx, + ir_if *if_ir, ir_rvalue *cond_expr, + exec_list *instructions, + struct hash_table *ht) +{ + foreach_in_list_safe(ir_instruction, ir, instructions) { + if (ir->ir_type == ir_type_assignment) { + ir_assignment *assign = (ir_assignment *)ir; + + if (hash_table_find(ht, assign) == NULL) { + hash_table_insert(ht, assign, assign); + + /* If the LHS of the assignment is a condition variable that was + * previously added, insert an additional assignment of false to + * the variable. + */ + const bool assign_to_cv = + hash_table_find(ht, assign->lhs->variable_referenced()) != NULL; + + if (!assign->condition) { + if (assign_to_cv) { + assign->rhs = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->rhs); + } else { + assign->condition = cond_expr->clone(mem_ctx, NULL); + } + } else { + assign->condition = + new(mem_ctx) ir_expression(ir_binop_logic_and, + glsl_type::bool_type, + cond_expr->clone(mem_ctx, NULL), + assign->condition); + } + } + } + + /* Now, move from the if block to the block surrounding it. */ + ir->remove(); + if_ir->insert_before(ir); + } +} + +ir_visitor_status +ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir) +{ + (void) ir; + this->depth++; + + return visit_continue; +} + +ir_visitor_status +ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) +{ + /* Only flatten when beyond the GPU's maximum supported nesting depth. */ + if (this->depth-- <= this->max_depth) + return visit_continue; + + bool found_control_flow = false; + ir_assignment *assign; + + /* Check that both blocks don't contain anything we can't support. */ + foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) { + visit_tree(then_ir, check_control_flow, &found_control_flow); + } + foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) { + visit_tree(else_ir, check_control_flow, &found_control_flow); + } + if (found_control_flow) + return visit_continue; + + void *mem_ctx = ralloc_parent(ir); + + /* Store the condition to a variable. Move all of the instructions from + * the then-clause of the if-statement. Use the condition variable as a + * condition for all assignments. + */ + ir_variable *const then_var = + new(mem_ctx) ir_variable(glsl_type::bool_type, + "if_to_cond_assign_then", + ir_var_temporary); + ir->insert_before(then_var); + + ir_dereference_variable *then_cond = + new(mem_ctx) ir_dereference_variable(then_var); + + assign = new(mem_ctx) ir_assignment(then_cond, ir->condition); + ir->insert_before(assign); + + move_block_to_cond_assign(mem_ctx, ir, then_cond, + &ir->then_instructions, + this->condition_variables); + + /* Add the new condition variable to the hash table. This allows us to + * find this variable when lowering other (enclosing) if-statements. + */ + hash_table_insert(this->condition_variables, then_var, then_var); + + /* If there are instructions in the else-clause, store the inverse of the + * condition to a variable. Move all of the instructions from the + * else-clause if the if-statement. Use the (inverse) condition variable + * as a condition for all assignments. + */ + if (!ir->else_instructions.is_empty()) { + ir_variable *const else_var = + new(mem_ctx) ir_variable(glsl_type::bool_type, + "if_to_cond_assign_else", + ir_var_temporary); + ir->insert_before(else_var); + + ir_dereference_variable *else_cond = + new(mem_ctx) ir_dereference_variable(else_var); + + ir_rvalue *inverse = + new(mem_ctx) ir_expression(ir_unop_logic_not, + then_cond->clone(mem_ctx, NULL)); + + assign = new(mem_ctx) ir_assignment(else_cond, inverse); + ir->insert_before(assign); + + move_block_to_cond_assign(mem_ctx, ir, else_cond, + &ir->else_instructions, + this->condition_variables); + + /* Add the new condition variable to the hash table. This allows us to + * find this variable when lowering other (enclosing) if-statements. + */ + hash_table_insert(this->condition_variables, else_var, else_var); + } + + ir->remove(); + + this->progress = true; + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_instructions.cpp b/src/compiler/glsl/lower_instructions.cpp new file mode 100644 index 00000000000..1875149b7a6 --- /dev/null +++ b/src/compiler/glsl/lower_instructions.cpp @@ -0,0 +1,1061 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_instructions.cpp + * + * Many GPUs lack native instructions for certain expression operations, and + * must replace them with some other expression tree. This pass lowers some + * of the most common cases, allowing the lowering code to be implemented once + * rather than in each driver backend. + * + * Currently supported transformations: + * - SUB_TO_ADD_NEG + * - DIV_TO_MUL_RCP + * - INT_DIV_TO_MUL_RCP + * - EXP_TO_EXP2 + * - POW_TO_EXP2 + * - LOG_TO_LOG2 + * - MOD_TO_FLOOR + * - LDEXP_TO_ARITH + * - DFREXP_TO_ARITH + * - CARRY_TO_ARITH + * - BORROW_TO_ARITH + * - SAT_TO_CLAMP + * - DOPS_TO_DFRAC + * + * SUB_TO_ADD_NEG: + * --------------- + * Breaks an ir_binop_sub expression down to add(op0, neg(op1)) + * + * This simplifies expression reassociation, and for many backends + * there is no subtract operation separate from adding the negation. + * For backends with native subtract operations, they will probably + * want to recognize add(op0, neg(op1)) or the other way around to + * produce a subtract anyway. + * + * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP: + * -------------------------------------- + * Breaks an ir_binop_div expression down to op0 * (rcp(op1)). + * + * Many GPUs don't have a divide instruction (945 and 965 included), + * but they do have an RCP instruction to compute an approximate + * reciprocal. By breaking the operation down, constant reciprocals + * can get constant folded. + * + * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP + * handles the integer case, converting to and from floating point so that + * RCP is possible. + * + * EXP_TO_EXP2 and LOG_TO_LOG2: + * ---------------------------- + * Many GPUs don't have a base e log or exponent instruction, but they + * do have base 2 versions, so this pass converts exp and log to exp2 + * and log2 operations. + * + * POW_TO_EXP2: + * ----------- + * Many older GPUs don't have an x**y instruction. For these GPUs, convert + * x**y to 2**(y * log2(x)). + * + * MOD_TO_FLOOR: + * ------------- + * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1)) + * + * Many GPUs don't have a MOD instruction (945 and 965 included), and + * if we have to break it down like this anyway, it gives an + * opportunity to do things like constant fold the (1.0 / op1) easily. + * + * Note: before we used to implement this as op1 * fract(op / op1) but this + * implementation had significant precision errors. + * + * LDEXP_TO_ARITH: + * ------------- + * Converts ir_binop_ldexp to arithmetic and bit operations for float sources. + * + * DFREXP_DLDEXP_TO_ARITH: + * --------------- + * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to + * arithmetic and bit ops for double arguments. + * + * CARRY_TO_ARITH: + * --------------- + * Converts ir_carry into (x + y) < x. + * + * BORROW_TO_ARITH: + * ---------------- + * Converts ir_borrow into (x < y). + * + * SAT_TO_CLAMP: + * ------------- + * Converts ir_unop_saturate into min(max(x, 0.0), 1.0) + * + * DOPS_TO_DFRAC: + * -------------- + * Converts double trunc, ceil, floor, round to fract + */ + +#include "c99_math.h" +#include "program/prog_instruction.h" /* for swizzle */ +#include "compiler/glsl_types.h" +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" + +using namespace ir_builder; + +namespace { + +class lower_instructions_visitor : public ir_hierarchical_visitor { +public: + lower_instructions_visitor(unsigned lower) + : progress(false), lower(lower) { } + + ir_visitor_status visit_leave(ir_expression *); + + bool progress; + +private: + unsigned lower; /** Bitfield of which operations to lower */ + + void sub_to_add_neg(ir_expression *); + void div_to_mul_rcp(ir_expression *); + void int_div_to_mul_rcp(ir_expression *); + void mod_to_floor(ir_expression *); + void exp_to_exp2(ir_expression *); + void pow_to_exp2(ir_expression *); + void log_to_log2(ir_expression *); + void ldexp_to_arith(ir_expression *); + void dldexp_to_arith(ir_expression *); + void dfrexp_sig_to_arith(ir_expression *); + void dfrexp_exp_to_arith(ir_expression *); + void carry_to_arith(ir_expression *); + void borrow_to_arith(ir_expression *); + void sat_to_clamp(ir_expression *); + void double_dot_to_fma(ir_expression *); + void double_lrp(ir_expression *); + void dceil_to_dfrac(ir_expression *); + void dfloor_to_dfrac(ir_expression *); + void dround_even_to_dfrac(ir_expression *); + void dtrunc_to_dfrac(ir_expression *); + void dsign_to_csel(ir_expression *); +}; + +} /* anonymous namespace */ + +/** + * Determine if a particular type of lowering should occur + */ +#define lowering(x) (this->lower & x) + +bool +lower_instructions(exec_list *instructions, unsigned what_to_lower) +{ + lower_instructions_visitor v(what_to_lower); + + visit_list_elements(&v, instructions); + return v.progress; +} + +void +lower_instructions_visitor::sub_to_add_neg(ir_expression *ir) +{ + ir->operation = ir_binop_add; + ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type, + ir->operands[1], NULL); + this->progress = true; +} + +void +lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) +{ + assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double()); + + /* New expression for the 1.0 / op1 */ + ir_rvalue *expr; + expr = new(ir) ir_expression(ir_unop_rcp, + ir->operands[1]->type, + ir->operands[1]); + + /* op0 / op1 -> op0 * (1.0 / op1) */ + ir->operation = ir_binop_mul; + ir->operands[1] = expr; + + this->progress = true; +} + +void +lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir) +{ + assert(ir->operands[1]->type->is_integer()); + + /* Be careful with integer division -- we need to do it as a + * float and re-truncate, since rcp(n > 1) of an integer would + * just be 0. + */ + ir_rvalue *op0, *op1; + const struct glsl_type *vec_type; + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->operands[1]->type->vector_elements, + ir->operands[1]->type->matrix_columns); + + if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) + op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL); + else + op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL); + + op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL); + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->operands[0]->type->vector_elements, + ir->operands[0]->type->matrix_columns); + + if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) + op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL); + else + op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL); + + vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, + ir->type->vector_elements, + ir->type->matrix_columns); + + op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); + + if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { + ir->operation = ir_unop_f2i; + ir->operands[0] = op0; + } else { + ir->operation = ir_unop_i2u; + ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0); + } + ir->operands[1] = NULL; + + this->progress = true; +} + +void +lower_instructions_visitor::exp_to_exp2(ir_expression *ir) +{ + ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E)); + + ir->operation = ir_unop_exp2; + ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type, + ir->operands[0], log2_e); + this->progress = true; +} + +void +lower_instructions_visitor::pow_to_exp2(ir_expression *ir) +{ + ir_expression *const log2_x = + new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, + ir->operands[0]); + + ir->operation = ir_unop_exp2; + ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type, + ir->operands[1], log2_x); + ir->operands[1] = NULL; + this->progress = true; +} + +void +lower_instructions_visitor::log_to_log2(ir_expression *ir) +{ + ir->operation = ir_binop_mul; + ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, + ir->operands[0], NULL); + ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E)); + this->progress = true; +} + +void +lower_instructions_visitor::mod_to_floor(ir_expression *ir) +{ + ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x", + ir_var_temporary); + ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y", + ir_var_temporary); + this->base_ir->insert_before(x); + this->base_ir->insert_before(y); + + ir_assignment *const assign_x = + new(ir) ir_assignment(new(ir) ir_dereference_variable(x), + ir->operands[0], NULL); + ir_assignment *const assign_y = + new(ir) ir_assignment(new(ir) ir_dereference_variable(y), + ir->operands[1], NULL); + + this->base_ir->insert_before(assign_x); + this->base_ir->insert_before(assign_y); + + ir_expression *const div_expr = + new(ir) ir_expression(ir_binop_div, x->type, + new(ir) ir_dereference_variable(x), + new(ir) ir_dereference_variable(y)); + + /* Don't generate new IR that would need to be lowered in an additional + * pass. + */ + if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double())) + div_to_mul_rcp(div_expr); + + ir_expression *const floor_expr = + new(ir) ir_expression(ir_unop_floor, x->type, div_expr); + + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dfloor_to_dfrac(floor_expr); + + ir_expression *const mul_expr = + new(ir) ir_expression(ir_binop_mul, + new(ir) ir_dereference_variable(y), + floor_expr); + + ir->operation = ir_binop_sub; + ir->operands[0] = new(ir) ir_dereference_variable(x); + ir->operands[1] = mul_expr; + this->progress = true; +} + +void +lower_instructions_visitor::ldexp_to_arith(ir_expression *ir) +{ + /* Translates + * ir_binop_ldexp x exp + * into + * + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + * resulting_biased_exp = extracted_biased_exp + exp; + * + * if (resulting_biased_exp < 1) { + * return copysign(0.0, x); + * } + * + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + * lshift(i2u(resulting_biased_exp), exp_shift)); + * + * which we can't actually implement as such, since the GLSL IR doesn't + * have vectorized if-statements. We actually implement it without branches + * using conditional-select: + * + * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); + * resulting_biased_exp = extracted_biased_exp + exp; + * + * is_not_zero_or_underflow = gequal(resulting_biased_exp, 1); + * x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x)); + * resulting_biased_exp = csel(is_not_zero_or_underflow, + * resulting_biased_exp, 0); + * + * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | + * lshift(i2u(resulting_biased_exp), exp_shift)); + */ + + const unsigned vec_elem = ir->type->vector_elements; + + /* Types */ + const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + + /* Constants */ + ir_constant *zeroi = ir_constant::zero(ir, ivec); + + ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem); + + ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem); + ir_constant *exp_width = new(ir) ir_constant(8, vec_elem); + + /* Temporary variables */ + ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); + ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); + + ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", + ir_var_temporary); + + ir_variable *extracted_biased_exp = + new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); + ir_variable *resulting_biased_exp = + new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); + + ir_variable *is_not_zero_or_underflow = + new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); + + ir_instruction &i = *base_ir; + + /* Copy and arguments. */ + i.insert_before(x); + i.insert_before(assign(x, ir->operands[0])); + i.insert_before(exp); + i.insert_before(assign(exp, ir->operands[1])); + + /* Extract the biased exponent from . */ + i.insert_before(extracted_biased_exp); + i.insert_before(assign(extracted_biased_exp, + rshift(bitcast_f2i(abs(x)), exp_shift))); + + i.insert_before(resulting_biased_exp); + i.insert_before(assign(resulting_biased_exp, + add(extracted_biased_exp, exp))); + + /* Test if result is ±0.0, subnormal, or underflow by checking if the + * resulting biased exponent would be less than 0x1. If so, the result is + * 0.0 with the sign of x. (Actually, invert the conditions so that + * immediate values are the second arguments, which is better for i965) + */ + i.insert_before(zero_sign_x); + i.insert_before(assign(zero_sign_x, + bitcast_u2f(bit_and(bitcast_f2u(x), sign_mask)))); + + i.insert_before(is_not_zero_or_underflow); + i.insert_before(assign(is_not_zero_or_underflow, + gequal(resulting_biased_exp, + new(ir) ir_constant(0x1, vec_elem)))); + i.insert_before(assign(x, csel(is_not_zero_or_underflow, + x, zero_sign_x))); + i.insert_before(assign(resulting_biased_exp, + csel(is_not_zero_or_underflow, + resulting_biased_exp, zeroi))); + + /* We could test for overflows by checking if the resulting biased exponent + * would be greater than 0xFE. Turns out we don't need to because the GLSL + * spec says: + * + * "If this product is too large to be represented in the + * floating-point type, the result is undefined." + */ + + ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL); + ir->operation = ir_unop_bitcast_i2f; + ir->operands[0] = bitfield_insert(bitcast_f2i(x), resulting_biased_exp, + exp_shift_clone, exp_width); + ir->operands[1] = NULL; + + this->progress = true; +} + +void +lower_instructions_visitor::dldexp_to_arith(ir_expression *ir) +{ + /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent + * from the significand. + */ + + const unsigned vec_elem = ir->type->vector_elements; + + /* Types */ + const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + + /* Constants */ + ir_constant *zeroi = ir_constant::zero(ir, ivec); + + ir_constant *sign_mask = new(ir) ir_constant(0x80000000u); + + ir_constant *exp_shift = new(ir) ir_constant(20u); + ir_constant *exp_width = new(ir) ir_constant(11u); + ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem); + + /* Temporary variables */ + ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); + ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); + + ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", + ir_var_temporary); + + ir_variable *extracted_biased_exp = + new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); + ir_variable *resulting_biased_exp = + new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); + + ir_variable *is_not_zero_or_underflow = + new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); + + ir_instruction &i = *base_ir; + + /* Copy and arguments. */ + i.insert_before(x); + i.insert_before(assign(x, ir->operands[0])); + i.insert_before(exp); + i.insert_before(assign(exp, ir->operands[1])); + + ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x); + if (lowering(DFREXP_DLDEXP_TO_ARITH)) + dfrexp_exp_to_arith(frexp_exp); + + /* Extract the biased exponent from . */ + i.insert_before(extracted_biased_exp); + i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias))); + + i.insert_before(resulting_biased_exp); + i.insert_before(assign(resulting_biased_exp, + add(extracted_biased_exp, exp))); + + /* Test if result is ±0.0, subnormal, or underflow by checking if the + * resulting biased exponent would be less than 0x1. If so, the result is + * 0.0 with the sign of x. (Actually, invert the conditions so that + * immediate values are the second arguments, which is better for i965) + * TODO: Implement in a vector fashion. + */ + i.insert_before(zero_sign_x); + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_variable *unpacked = + new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); + i.insert_before(unpacked); + i.insert_before( + assign(unpacked, + expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); + i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)), + WRITEMASK_Y)); + i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X)); + i.insert_before(assign(zero_sign_x, + expr(ir_unop_pack_double_2x32, unpacked), + 1 << elem)); + } + i.insert_before(is_not_zero_or_underflow); + i.insert_before(assign(is_not_zero_or_underflow, + gequal(resulting_biased_exp, + new(ir) ir_constant(0x1, vec_elem)))); + i.insert_before(assign(x, csel(is_not_zero_or_underflow, + x, zero_sign_x))); + i.insert_before(assign(resulting_biased_exp, + csel(is_not_zero_or_underflow, + resulting_biased_exp, zeroi))); + + /* We could test for overflows by checking if the resulting biased exponent + * would be greater than 0xFE. Turns out we don't need to because the GLSL + * spec says: + * + * "If this product is too large to be represented in the + * floating-point type, the result is undefined." + */ + + ir_rvalue *results[4] = {NULL}; + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_variable *unpacked = + new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); + i.insert_before(unpacked); + i.insert_before( + assign(unpacked, + expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); + + ir_expression *bfi = bitfield_insert( + swizzle_y(unpacked), + i2u(swizzle(resulting_biased_exp, elem, 1)), + exp_shift->clone(ir, NULL), + exp_width->clone(ir, NULL)); + + i.insert_before(assign(unpacked, bfi, WRITEMASK_Y)); + + results[elem] = expr(ir_unop_pack_double_2x32, unpacked); + } + + ir->operation = ir_quadop_vector; + ir->operands[0] = results[0]; + ir->operands[1] = results[1]; + ir->operands[2] = results[2]; + ir->operands[3] = results[3]; + + /* Don't generate new IR that would need to be lowered in an additional + * pass. + */ + + this->progress = true; +} + +void +lower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir) +{ + const unsigned vec_elem = ir->type->vector_elements; + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + + /* Double-precision floating-point values are stored as + * 1 sign bit; + * 11 exponent bits; + * 52 mantissa bits. + * + * We're just extracting the significand here, so we only need to modify + * the upper 32-bit uint. Unfortunately we must extract each double + * independently as there is no vector version of unpackDouble. + */ + + ir_instruction &i = *base_ir; + + ir_variable *is_not_zero = + new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); + ir_rvalue *results[4] = {NULL}; + + ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); + i.insert_before(is_not_zero); + i.insert_before( + assign(is_not_zero, + nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero))); + + /* TODO: Remake this as more vector-friendly when int64 support is + * available. + */ + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_constant *zero = new(ir) ir_constant(0u, 1); + ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1); + + /* Exponent of double floating-point values in the range [0.5, 1.0). */ + ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1); + + ir_variable *bits = + new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary); + ir_variable *unpacked = + new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); + + ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1); + + i.insert_before(bits); + i.insert_before(unpacked); + i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x))); + + /* Manipulate the high uint to remove the exponent and replace it with + * either the default exponent or zero. + */ + i.insert_before(assign(bits, swizzle_y(unpacked))); + i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask))); + i.insert_before(assign(bits, bit_or(bits, + csel(swizzle(is_not_zero, elem, 1), + exponent_value, + zero)))); + i.insert_before(assign(unpacked, bits, WRITEMASK_Y)); + results[elem] = expr(ir_unop_pack_double_2x32, unpacked); + } + + /* Put the dvec back together */ + ir->operation = ir_quadop_vector; + ir->operands[0] = results[0]; + ir->operands[1] = results[1]; + ir->operands[2] = results[2]; + ir->operands[3] = results[3]; + + this->progress = true; +} + +void +lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir) +{ + const unsigned vec_elem = ir->type->vector_elements; + const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); + const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); + + /* Double-precision floating-point values are stored as + * 1 sign bit; + * 11 exponent bits; + * 52 mantissa bits. + * + * We're just extracting the exponent here, so we only care about the upper + * 32-bit uint. + */ + + ir_instruction &i = *base_ir; + + ir_variable *is_not_zero = + new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); + ir_variable *high_words = + new(ir) ir_variable(uvec, "high_words", ir_var_temporary); + ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); + ir_constant *izero = new(ir) ir_constant(0, vec_elem); + + ir_rvalue *absval = abs(ir->operands[0]); + + i.insert_before(is_not_zero); + i.insert_before(high_words); + i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero))); + + /* Extract all of the upper uints. */ + for (unsigned elem = 0; elem < vec_elem; elem++) { + ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1); + + i.insert_before(assign(high_words, + swizzle_y(expr(ir_unop_unpack_double_2x32, x)), + 1 << elem)); + + } + ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem); + ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem); + + /* For non-zero inputs, shift the exponent down and apply bias. */ + ir->operation = ir_triop_csel; + ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero); + ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift))); + ir->operands[2] = izero; + + this->progress = true; +} + +void +lower_instructions_visitor::carry_to_arith(ir_expression *ir) +{ + /* Translates + * ir_binop_carry x y + * into + * sum = ir_binop_add x y + * bcarry = ir_binop_less sum x + * carry = ir_unop_b2i bcarry + */ + + ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL); + ir->operation = ir_unop_i2u; + ir->operands[0] = b2i(less(add(ir->operands[0], ir->operands[1]), x_clone)); + ir->operands[1] = NULL; + + this->progress = true; +} + +void +lower_instructions_visitor::borrow_to_arith(ir_expression *ir) +{ + /* Translates + * ir_binop_borrow x y + * into + * bcarry = ir_binop_less x y + * carry = ir_unop_b2i bcarry + */ + + ir->operation = ir_unop_i2u; + ir->operands[0] = b2i(less(ir->operands[0], ir->operands[1])); + ir->operands[1] = NULL; + + this->progress = true; +} + +void +lower_instructions_visitor::sat_to_clamp(ir_expression *ir) +{ + /* Translates + * ir_unop_saturate x + * into + * ir_binop_min (ir_binop_max(x, 0.0), 1.0) + */ + + ir->operation = ir_binop_min; + ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type, + ir->operands[0], + new(ir) ir_constant(0.0f)); + ir->operands[1] = new(ir) ir_constant(1.0f); + + this->progress = true; +} + +void +lower_instructions_visitor::double_dot_to_fma(ir_expression *ir) +{ + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res", + ir_var_temporary); + this->base_ir->insert_before(temp); + + int nc = ir->operands[0]->type->components(); + for (int i = nc - 1; i >= 1; i--) { + ir_assignment *assig; + if (i == (nc - 1)) { + assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), + swizzle(ir->operands[1]->clone(ir, NULL), i, 1))); + } else { + assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), + swizzle(ir->operands[1]->clone(ir, NULL), i, 1), + temp)); + } + this->base_ir->insert_before(assig); + } + + ir->operation = ir_triop_fma; + ir->operands[0] = swizzle(ir->operands[0], 0, 1); + ir->operands[1] = swizzle(ir->operands[1], 0, 1); + ir->operands[2] = new(ir) ir_dereference_variable(temp); + + this->progress = true; + +} + +void +lower_instructions_visitor::double_lrp(ir_expression *ir) +{ + int swizval; + ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2]; + ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements); + + switch (op2->type->vector_elements) { + case 1: + swizval = SWIZZLE_XXXX; + break; + default: + assert(op0->type->vector_elements == op2->type->vector_elements); + swizval = SWIZZLE_XYZW; + break; + } + + ir->operation = ir_triop_fma; + ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements); + ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0); + + this->progress = true; +} + +void +lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) +{ + /* + * frtemp = frac(x); + * temp = sub(x, frtemp); + * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0); + */ + ir_instruction &i = *base_ir; + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", + ir_var_temporary); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(ir->operands[0]))); + + ir->operation = ir_binop_add; + ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp); + ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL)); + + this->progress = true; +} + +void +lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) +{ + /* + * frtemp = frac(x); + * result = sub(x, frtemp); + */ + ir->operation = ir_binop_sub; + ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL)); + + this->progress = true; +} +void +lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir) +{ + /* + * insane but works + * temp = x + 0.5; + * frtemp = frac(temp); + * t2 = sub(temp, frtemp); + * if (frac(x) == 0.5) + * result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1; + * else + * result = t2; + + */ + ir_instruction &i = *base_ir; + ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", + ir_var_temporary); + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", + ir_var_temporary); + ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", + ir_var_temporary); + ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); + ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); + + i.insert_before(temp); + i.insert_before(assign(temp, add(ir->operands[0], p5))); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(temp))); + + i.insert_before(t2); + i.insert_before(assign(t2, sub(temp, frtemp))); + + ir->operation = ir_triop_csel; + ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)), + p5->clone(ir, NULL)); + ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))), + zero), + t2, + sub(t2, one)); + ir->operands[2] = new(ir) ir_dereference_variable(t2); + + this->progress = true; +} + +void +lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir) +{ + /* + * frtemp = frac(x); + * temp = sub(x, frtemp); + * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1; + */ + ir_rvalue *arg = ir->operands[0]; + ir_instruction &i = *base_ir; + + ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); + ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp", + ir_var_temporary); + ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", + ir_var_temporary); + + i.insert_before(frtemp); + i.insert_before(assign(frtemp, fract(arg))); + i.insert_before(temp); + i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp))); + + ir->operation = ir_triop_csel; + ir->operands[0] = gequal(arg->clone(ir, NULL), zero); + ir->operands[1] = new (ir) ir_dereference_variable(temp); + ir->operands[2] = add(temp, + csel(equal(frtemp, zero->clone(ir, NULL)), + zero->clone(ir, NULL), + one)); + + this->progress = true; +} + +void +lower_instructions_visitor::dsign_to_csel(ir_expression *ir) +{ + /* + * temp = x > 0.0 ? 1.0 : 0.0; + * result = x < 0.0 ? -1.0 : temp; + */ + ir_rvalue *arg = ir->operands[0]; + ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); + ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); + ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements); + + ir->operation = ir_triop_csel; + ir->operands[0] = less(arg->clone(ir, NULL), + zero->clone(ir, NULL)); + ir->operands[1] = neg_one; + ir->operands[2] = csel(greater(arg, zero), + one, + zero->clone(ir, NULL)); + + this->progress = true; +} + +ir_visitor_status +lower_instructions_visitor::visit_leave(ir_expression *ir) +{ + switch (ir->operation) { + case ir_binop_dot: + if (ir->operands[0]->type->is_double()) + double_dot_to_fma(ir); + break; + case ir_triop_lrp: + if (ir->operands[0]->type->is_double()) + double_lrp(ir); + break; + case ir_binop_sub: + if (lowering(SUB_TO_ADD_NEG)) + sub_to_add_neg(ir); + break; + + case ir_binop_div: + if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP)) + int_div_to_mul_rcp(ir); + else if ((ir->operands[1]->type->is_float() || + ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP)) + div_to_mul_rcp(ir); + break; + + case ir_unop_exp: + if (lowering(EXP_TO_EXP2)) + exp_to_exp2(ir); + break; + + case ir_unop_log: + if (lowering(LOG_TO_LOG2)) + log_to_log2(ir); + break; + + case ir_binop_mod: + if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double())) + mod_to_floor(ir); + break; + + case ir_binop_pow: + if (lowering(POW_TO_EXP2)) + pow_to_exp2(ir); + break; + + case ir_binop_ldexp: + if (lowering(LDEXP_TO_ARITH) && ir->type->is_float()) + ldexp_to_arith(ir); + if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double()) + dldexp_to_arith(ir); + break; + + case ir_unop_frexp_exp: + if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) + dfrexp_exp_to_arith(ir); + break; + + case ir_unop_frexp_sig: + if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) + dfrexp_sig_to_arith(ir); + break; + + case ir_binop_carry: + if (lowering(CARRY_TO_ARITH)) + carry_to_arith(ir); + break; + + case ir_binop_borrow: + if (lowering(BORROW_TO_ARITH)) + borrow_to_arith(ir); + break; + + case ir_unop_saturate: + if (lowering(SAT_TO_CLAMP)) + sat_to_clamp(ir); + break; + + case ir_unop_trunc: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dtrunc_to_dfrac(ir); + break; + + case ir_unop_ceil: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dceil_to_dfrac(ir); + break; + + case ir_unop_floor: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dfloor_to_dfrac(ir); + break; + + case ir_unop_round_even: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dround_even_to_dfrac(ir); + break; + + case ir_unop_sign: + if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) + dsign_to_csel(ir); + break; + default: + return visit_continue; + } + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_jumps.cpp b/src/compiler/glsl/lower_jumps.cpp new file mode 100644 index 00000000000..3cfa2e00ae8 --- /dev/null +++ b/src/compiler/glsl/lower_jumps.cpp @@ -0,0 +1,1022 @@ +/* + * Copyright © 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_jumps.cpp + * + * This pass lowers jumps (break, continue, and return) to if/else structures. + * + * It can be asked to: + * 1. Pull jumps out of ifs where possible + * 2. Remove all "continue"s, replacing them with an "execute flag" + * 3. Replace all "break" with a single conditional one at the end of the loop + * 4. Replace all "return"s with a single return at the end of the function, + * for the main function and/or other functions + * + * Applying this pass gives several benefits: + * 1. All functions can be inlined. + * 2. nv40 and other pre-DX10 chips without "continue" can be supported + * 3. nv30 and other pre-DX10 chips with no control flow at all are better + * supported + * + * Continues are lowered by adding a per-loop "execute flag", initialized to + * true, that when cleared inhibits all execution until the end of the loop. + * + * Breaks are lowered to continues, plus setting a "break flag" that is checked + * at the end of the loop, and trigger the unique "break". + * + * Returns are lowered to breaks/continues, plus adding a "return flag" that + * causes loops to break again out of their enclosing loops until all the + * loops are exited: then the "execute flag" logic will ignore everything + * until the end of the function. + * + * Note that "continue" and "return" can also be implemented by adding + * a dummy loop and using break. + * However, this is bad for hardware with limited nesting depth, and + * prevents further optimization, and thus is not currently performed. + */ + +#include "compiler/glsl_types.h" +#include +#include "ir.h" + +/** + * Enum recording the result of analyzing how control flow might exit + * an IR node. + * + * Each possible value of jump_strength indicates a strictly stronger + * guarantee on control flow than the previous value. + * + * The ordering of strengths roughly reflects the way jumps are + * lowered: jumps with higher strength tend to be lowered to jumps of + * lower strength. Accordingly, strength is used as a heuristic to + * determine which lowering to perform first. + * + * This enum is also used by get_jump_strength() to categorize + * instructions as either break, continue, return, or other. When + * used in this fashion, strength_always_clears_execute_flag is not + * used. + * + * The control flow analysis made by this optimization pass makes two + * simplifying assumptions: + * + * - It ignores discard instructions, since they are lowered by a + * separate pass (lower_discard.cpp). + * + * - It assumes it is always possible for control to flow from a loop + * to the instruction immediately following it. Technically, this + * is not true (since all execution paths through the loop might + * jump back to the top, or return from the function). + * + * Both of these simplifying assumtions are safe, since they can never + * cause reachable code to be incorrectly classified as unreachable; + * they can only do the opposite. + */ +enum jump_strength +{ + /** + * Analysis has produced no guarantee on how control flow might + * exit this IR node. It might fall out the bottom (with or + * without clearing the execute flag, if present), or it might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ + strength_none, + + /** + * The only way control can fall out the bottom of this node is + * through a code path that clears the execute flag. It might also + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ + strength_always_clears_execute_flag, + + /** + * Control cannot fall out the bottom of this node. It might + * continue to the top of the innermost enclosing loop, break out + * of it, or return from the function. + */ + strength_continue, + + /** + * Control cannot fall out the bottom of this node, or continue the + * top of the innermost enclosing loop. It can only break out of + * it or return from the function. + */ + strength_break, + + /** + * Control cannot fall out the bottom of this node, continue to the + * top of the innermost enclosing loop, or break out of it. It can + * only return from the function. + */ + strength_return +}; + +namespace { + +struct block_record +{ + /* minimum jump strength (of lowered IR, not pre-lowering IR) + * + * If the block ends with a jump, must be the strength of the jump. + * Otherwise, the jump would be dead and have been deleted before) + * + * If the block doesn't end with a jump, it can be different than strength_none if all paths before it lead to some jump + * (e.g. an if with a return in one branch, and a break in the other, while not lowering them) + * Note that identical jumps are usually unified though. + */ + jump_strength min_strength; + + /* can anything clear the execute flag? */ + bool may_clear_execute_flag; + + block_record() + { + this->min_strength = strength_none; + this->may_clear_execute_flag = false; + } +}; + +struct loop_record +{ + ir_function_signature* signature; + ir_loop* loop; + + /* used to avoid lowering the break used to represent lowered breaks */ + unsigned nesting_depth; + bool in_if_at_the_end_of_the_loop; + + bool may_set_return_flag; + + ir_variable* break_flag; + ir_variable* execute_flag; /* cleared to emulate continue */ + + loop_record(ir_function_signature* p_signature = 0, ir_loop* p_loop = 0) + { + this->signature = p_signature; + this->loop = p_loop; + this->nesting_depth = 0; + this->in_if_at_the_end_of_the_loop = false; + this->may_set_return_flag = false; + this->break_flag = 0; + this->execute_flag = 0; + } + + ir_variable* get_execute_flag() + { + /* also supported for the "function loop" */ + if(!this->execute_flag) { + exec_list& list = this->loop ? this->loop->body_instructions : signature->body; + this->execute_flag = new(this->signature) ir_variable(glsl_type::bool_type, "execute_flag", ir_var_temporary); + list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true), 0)); + list.push_head(this->execute_flag); + } + return this->execute_flag; + } + + ir_variable* get_break_flag() + { + assert(this->loop); + if(!this->break_flag) { + this->break_flag = new(this->signature) ir_variable(glsl_type::bool_type, "break_flag", ir_var_temporary); + this->loop->insert_before(this->break_flag); + this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false), 0)); + } + return this->break_flag; + } +}; + +struct function_record +{ + ir_function_signature* signature; + ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */ + ir_variable* return_value; + bool lower_return; + unsigned nesting_depth; + + function_record(ir_function_signature* p_signature = 0, + bool lower_return = false) + { + this->signature = p_signature; + this->return_flag = 0; + this->return_value = 0; + this->nesting_depth = 0; + this->lower_return = lower_return; + } + + ir_variable* get_return_flag() + { + if(!this->return_flag) { + this->return_flag = new(this->signature) ir_variable(glsl_type::bool_type, "return_flag", ir_var_temporary); + this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false), 0)); + this->signature->body.push_head(this->return_flag); + } + return this->return_flag; + } + + ir_variable* get_return_value() + { + if(!this->return_value) { + assert(!this->signature->return_type->is_void()); + return_value = new(this->signature) ir_variable(this->signature->return_type, "return_value", ir_var_temporary); + this->signature->body.push_head(this->return_value); + } + return this->return_value; + } +}; + +struct ir_lower_jumps_visitor : public ir_control_flow_visitor { + /* Postconditions: on exit of any visit() function: + * + * ANALYSIS: this->block.min_strength, + * this->block.may_clear_execute_flag, and + * this->loop.may_set_return_flag are updated to reflect the + * characteristics of the visited statement. + * + * DEAD_CODE_ELIMINATION: If this->block.min_strength is not + * strength_none, the visited node is at the end of its exec_list. + * In other words, any unreachable statements that follow the + * visited statement in its exec_list have been removed. + * + * CONTAINED_JUMPS_LOWERED: If the visited statement contains other + * statements, then should_lower_jump() is false for all of the + * return, break, or continue statements it contains. + * + * Note that visiting a jump does not lower it. That is the + * responsibility of the statement (or function signature) that + * contains the jump. + */ + + bool progress; + + struct function_record function; + struct loop_record loop; + struct block_record block; + + bool pull_out_jumps; + bool lower_continue; + bool lower_break; + bool lower_sub_return; + bool lower_main_return; + + ir_lower_jumps_visitor() + : progress(false), + pull_out_jumps(false), + lower_continue(false), + lower_break(false), + lower_sub_return(false), + lower_main_return(false) + { + } + + void truncate_after_instruction(exec_node *ir) + { + if (!ir) + return; + + while (!ir->get_next()->is_tail_sentinel()) { + ((ir_instruction *)ir->get_next())->remove(); + this->progress = true; + } + } + + void move_outer_block_inside(ir_instruction *ir, exec_list *inner_block) + { + while (!ir->get_next()->is_tail_sentinel()) { + ir_instruction *move_ir = (ir_instruction *)ir->get_next(); + + move_ir->remove(); + inner_block->push_tail(move_ir); + } + } + + /** + * Insert the instructions necessary to lower a return statement, + * before the given return instruction. + */ + void insert_lowered_return(ir_return *ir) + { + ir_variable* return_flag = this->function.get_return_flag(); + if(!this->function.signature->return_type->is_void()) { + ir_variable* return_value = this->function.get_return_value(); + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_value), + ir->value)); + } + ir->insert_before( + new(ir) ir_assignment( + new (ir) ir_dereference_variable(return_flag), + new (ir) ir_constant(true))); + this->loop.may_set_return_flag = true; + } + + /** + * If the given instruction is a return, lower it to instructions + * that store the return value (if there is one), set the return + * flag, and then break. + * + * It is safe to pass NULL to this function. + */ + void lower_return_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_return) { + return; + } + insert_lowered_return((ir_return*)ir); + ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + } + + /** + * Create the necessary instruction to replace a break instruction. + */ + ir_instruction *create_lowered_break() + { + void *ctx = this->function.signature; + return new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(this->loop.get_break_flag()), + new(ctx) ir_constant(true), + 0); + } + + /** + * If the given instruction is a break, lower it to an instruction + * that sets the break flag, without consulting + * should_lower_jump(). + * + * It is safe to pass NULL to this function. + */ + void lower_break_unconditionally(ir_instruction *ir) + { + if (get_jump_strength(ir) != strength_break) { + return; + } + ir->replace_with(create_lowered_break()); + } + + /** + * If the block ends in a conditional or unconditional break, lower + * it, even though should_lower_jump() says it needn't be lowered. + */ + void lower_final_breaks(exec_list *block) + { + ir_instruction *ir = (ir_instruction *) block->get_tail(); + lower_break_unconditionally(ir); + ir_if *ir_if = ir->as_if(); + if (ir_if) { + lower_break_unconditionally( + (ir_instruction *) ir_if->then_instructions.get_tail()); + lower_break_unconditionally( + (ir_instruction *) ir_if->else_instructions.get_tail()); + } + } + + virtual void visit(class ir_loop_jump * ir) + { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ + truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered jump + * instruction can't change any flags. + */ + this->block.min_strength = ir->is_break() ? strength_break : strength_continue; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ + } + + virtual void visit(class ir_return * ir) + { + /* Eliminate all instructions after each one, since they are + * unreachable. This satisfies the DEAD_CODE_ELIMINATION + * postcondition. + */ + truncate_after_instruction(ir); + + /* Set this->block.min_strength based on this instruction. This + * satisfies the ANALYSIS postcondition. It is not necessary to + * update this->block.may_clear_execute_flag or + * this->loop.may_set_return_flag, because an unlowered return + * instruction can't change any flags. + */ + this->block.min_strength = strength_return; + + /* The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because jump statements can't contain other + * statements. + */ + } + + virtual void visit(class ir_discard * ir) + { + /* Nothing needs to be done. The ANALYSIS and + * DEAD_CODE_ELIMINATION postconditions are already satisfied, + * because discard statements are ignored by this optimization + * pass. The CONTAINED_JUMPS_LOWERED postcondition is already + * satisfied, because discard statements can't contain other + * statements. + */ + (void) ir; + } + + enum jump_strength get_jump_strength(ir_instruction* ir) + { + if(!ir) + return strength_none; + else if(ir->ir_type == ir_type_loop_jump) { + if(((ir_loop_jump*)ir)->is_break()) + return strength_break; + else + return strength_continue; + } else if(ir->ir_type == ir_type_return) + return strength_return; + else + return strength_none; + } + + bool should_lower_jump(ir_jump* ir) + { + unsigned strength = get_jump_strength(ir); + bool lower; + switch(strength) + { + case strength_none: + lower = false; /* don't change this, code relies on it */ + break; + case strength_continue: + lower = lower_continue; + break; + case strength_break: + assert(this->loop.loop); + /* never lower "canonical break" */ + if(ir->get_next()->is_tail_sentinel() && (this->loop.nesting_depth == 0 + || (this->loop.nesting_depth == 1 && this->loop.in_if_at_the_end_of_the_loop))) + lower = false; + else + lower = lower_break; + break; + case strength_return: + /* never lower return at the end of a this->function */ + if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel()) + lower = false; + else + lower = this->function.lower_return; + break; + } + return lower; + } + + block_record visit_block(exec_list* list) + { + /* Note: since visiting a node may change that node's next + * pointer, we can't use visit_exec_list(), because + * visit_exec_list() caches the node's next pointer before + * visiting it. So we use foreach_in_list() instead. + * + * foreach_in_list() isn't safe if the node being visited gets + * removed, but fortunately this visitor doesn't do that. + */ + + block_record saved_block = this->block; + this->block = block_record(); + foreach_in_list(ir_instruction, node, list) { + node->accept(this); + } + block_record ret = this->block; + this->block = saved_block; + return ret; + } + + virtual void visit(ir_if *ir) + { + if(this->loop.nesting_depth == 0 && ir->get_next()->is_tail_sentinel()) + this->loop.in_if_at_the_end_of_the_loop = true; + + ++this->function.nesting_depth; + ++this->loop.nesting_depth; + + block_record block_records[2]; + ir_jump* jumps[2]; + + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * unconditional jumps at the end of ir->then_instructions and + * ir->else_instructions, which are handled below. + */ + block_records[0] = visit_block(&ir->then_instructions); + block_records[1] = visit_block(&ir->else_instructions); + +retry: /* we get here if we put code after the if inside a branch */ + + /* Determine which of ir->then_instructions and + * ir->else_instructions end with an unconditional jump. + */ + for(unsigned i = 0; i < 2; ++i) { + exec_list& list = i ? ir->else_instructions : ir->then_instructions; + jumps[i] = 0; + if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail())) + jumps[i] = (ir_jump*)list.get_tail(); + } + + /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED + * postcondition by lowering jumps in both then_instructions and + * else_instructions. + */ + for(;;) { + /* Determine the types of the jumps that terminate + * ir->then_instructions and ir->else_instructions. + */ + jump_strength jump_strengths[2]; + + for(unsigned i = 0; i < 2; ++i) { + if(jumps[i]) { + jump_strengths[i] = block_records[i].min_strength; + assert(jump_strengths[i] == get_jump_strength(jumps[i])); + } else + jump_strengths[i] = strength_none; + } + + /* If both code paths end in a jump, and the jumps are the + * same, and we are pulling out jumps, replace them with a + * single jump that comes after the if instruction. The new + * jump will be visited next, and it will be lowered if + * necessary by the loop or conditional that encloses it. + */ + if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) { + bool unify = true; + if(jump_strengths[0] == strength_continue) + ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_continue)); + else if(jump_strengths[0] == strength_break) + ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + /* FINISHME: unify returns with identical expressions */ + else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void()) + ir->insert_after(new(ir) ir_return(NULL)); + else + unify = false; + + if(unify) { + jumps[0]->remove(); + jumps[1]->remove(); + this->progress = true; + + /* Update jumps[] to reflect the fact that the jumps + * are gone, and update block_records[] to reflect the + * fact that control can now flow to the next + * instruction. + */ + jumps[0] = 0; + jumps[1] = 0; + block_records[0].min_strength = strength_none; + block_records[1].min_strength = strength_none; + + /* The CONTAINED_JUMPS_LOWERED postcondition is now + * satisfied, so we can break out of the loop. + */ + break; + } + } + + /* lower a jump: if both need to lowered, start with the strongest one, so that + * we might later unify the lowered version with the other one + */ + bool should_lower[2]; + for(unsigned i = 0; i < 2; ++i) + should_lower[i] = should_lower_jump(jumps[i]); + + int lower; + if(should_lower[1] && should_lower[0]) + lower = jump_strengths[1] > jump_strengths[0]; + else if(should_lower[0]) + lower = 0; + else if(should_lower[1]) + lower = 1; + else + /* Neither code path ends in a jump that needs to be + * lowered, so the CONTAINED_JUMPS_LOWERED postcondition + * is satisfied and we can break out of the loop. + */ + break; + + if(jump_strengths[lower] == strength_return) { + /* To lower a return, we create a return flag (if the + * function doesn't have one already) and add instructions + * that: 1. store the return value (if this function has a + * non-void return) and 2. set the return flag + */ + insert_lowered_return((ir_return*)jumps[lower]); + if(this->loop.loop) { + /* If we are in a loop, replace the return instruction + * with a break instruction, and then loop so that the + * break instruction can be lowered if necessary. + */ + ir_loop_jump* lowered = 0; + lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break); + /* Note: we must update block_records and jumps to + * reflect the fact that the control path has been + * altered from a return to a break. + */ + block_records[lower].min_strength = strength_break; + jumps[lower]->replace_with(lowered); + jumps[lower] = lowered; + } else { + /* If we are not in a loop, we then proceed as we would + * for a continue statement (set the execute flag to + * false to prevent the rest of the function from + * executing). + */ + goto lower_continue; + } + this->progress = true; + } else if(jump_strengths[lower] == strength_break) { + /* To lower a break, we create a break flag (if the loop + * doesn't have one already) and add an instruction that + * sets it. + * + * Then we proceed as we would for a continue statement + * (set the execute flag to false to prevent the rest of + * the loop body from executing). + * + * The visit() function for the loop will ensure that the + * break flag is checked after executing the loop body. + */ + jumps[lower]->insert_before(create_lowered_break()); + goto lower_continue; + } else if(jump_strengths[lower] == strength_continue) { +lower_continue: + /* To lower a continue, we create an execute flag (if the + * loop doesn't have one already) and replace the continue + * with an instruction that clears it. + * + * Note that this code path gets exercised when lowering + * return statements that are not inside a loop, so + * this->loop must be initialized even outside of loops. + */ + ir_variable* execute_flag = this->loop.get_execute_flag(); + jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0)); + /* Note: we must update block_records and jumps to reflect + * the fact that the control path has been altered to an + * instruction that clears the execute flag. + */ + jumps[lower] = 0; + block_records[lower].min_strength = strength_always_clears_execute_flag; + block_records[lower].may_clear_execute_flag = true; + this->progress = true; + + /* Let the loop run again, in case the other branch of the + * if needs to be lowered too. + */ + } + } + + /* move out a jump out if possible */ + if(pull_out_jumps) { + /* If one of the branches ends in a jump, and control cannot + * fall out the bottom of the other branch, then we can move + * the jump after the if. + * + * Set move_out to the branch we are moving a jump out of. + */ + int move_out = -1; + if(jumps[0] && block_records[1].min_strength >= strength_continue) + move_out = 0; + else if(jumps[1] && block_records[0].min_strength >= strength_continue) + move_out = 1; + + if(move_out >= 0) + { + jumps[move_out]->remove(); + ir->insert_after(jumps[move_out]); + /* Note: we must update block_records and jumps to reflect + * the fact that the jump has been moved out of the if. + */ + jumps[move_out] = 0; + block_records[move_out].min_strength = strength_none; + this->progress = true; + } + } + + /* Now satisfy the ANALYSIS postcondition by setting + * this->block.min_strength and + * this->block.may_clear_execute_flag based on the + * characteristics of the two branches. + */ + if(block_records[0].min_strength < block_records[1].min_strength) + this->block.min_strength = block_records[0].min_strength; + else + this->block.min_strength = block_records[1].min_strength; + this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag; + + /* Now we need to clean up the instructions that follow the + * if. + * + * If those instructions are unreachable, then satisfy the + * DEAD_CODE_ELIMINATION postcondition by eliminating them. + * Otherwise that postcondition is already satisfied. + */ + if(this->block.min_strength) + truncate_after_instruction(ir); + else if(this->block.may_clear_execute_flag) + { + /* If the "if" instruction might clear the execute flag, then + * we need to guard any instructions that follow so that they + * are only executed if the execute flag is set. + * + * If one of the branches of the "if" always clears the + * execute flag, and the other branch never clears it, then + * this is easy: just move all the instructions following the + * "if" into the branch that never clears it. + */ + int move_into = -1; + if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag) + move_into = 1; + else if(block_records[1].min_strength && !block_records[0].may_clear_execute_flag) + move_into = 0; + + if(move_into >= 0) { + assert(!block_records[move_into].min_strength && !block_records[move_into].may_clear_execute_flag); /* otherwise, we just truncated */ + + exec_list* list = move_into ? &ir->else_instructions : &ir->then_instructions; + exec_node* next = ir->get_next(); + if(!next->is_tail_sentinel()) { + move_outer_block_inside(ir, list); + + /* If any instructions moved, then we need to visit + * them (since they are now inside the "if"). Since + * block_records[move_into] is in its default state + * (see assertion above), we can safely replace + * block_records[move_into] with the result of this + * analysis. + */ + exec_list list; + list.head = next; + block_records[move_into] = visit_block(&list); + + /* + * Then we need to re-start our jump lowering, since one + * of the instructions we moved might be a jump that + * needs to be lowered. + */ + this->progress = true; + goto retry; + } + } else { + /* If we get here, then the simple case didn't apply; we + * need to actually guard the instructions that follow. + * + * To avoid creating unnecessarily-deep nesting, first + * look through the instructions that follow and unwrap + * any instructions that that are already wrapped in the + * appropriate guard. + */ + ir_instruction* ir_after; + for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();) + { + ir_if* ir_if = ir_after->as_if(); + if(ir_if && ir_if->else_instructions.is_empty()) { + ir_dereference_variable* ir_if_cond_deref = ir_if->condition->as_dereference_variable(); + if(ir_if_cond_deref && ir_if_cond_deref->var == this->loop.execute_flag) { + ir_instruction* ir_next = (ir_instruction*)ir_after->get_next(); + ir_after->insert_before(&ir_if->then_instructions); + ir_after->remove(); + ir_after = ir_next; + continue; + } + } + ir_after = (ir_instruction*)ir_after->get_next(); + + /* only set this if we find any unprotected instruction */ + this->progress = true; + } + + /* Then, wrap all the instructions that follow in a single + * guard. + */ + if(!ir->get_next()->is_tail_sentinel()) { + assert(this->loop.execute_flag); + ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag)); + move_outer_block_inside(ir, &if_execute->then_instructions); + ir->insert_after(if_execute); + } + } + } + --this->loop.nesting_depth; + --this->function.nesting_depth; + } + + virtual void visit(ir_loop *ir) + { + /* Visit the body of the loop, with a fresh data structure in + * this->loop so that the analysis we do here won't bleed into + * enclosing loops. + * + * We assume that all code after a loop is reachable from the + * loop (see comments on enum jump_strength), so the + * DEAD_CODE_ELIMINATION postcondition is automatically + * satisfied, as is the block.min_strength portion of the + * ANALYSIS postcondition. + * + * The block.may_clear_execute_flag portion of the ANALYSIS + * postcondition is automatically satisfied because execute + * flags do not propagate outside of loops. + * + * The loop.may_set_return_flag portion of the ANALYSIS + * postcondition is handled below. + */ + ++this->function.nesting_depth; + loop_record saved_loop = this->loop; + this->loop = loop_record(this->function.signature, ir); + + /* Recursively lower nested jumps. This satisfies the + * CONTAINED_JUMPS_LOWERED postcondition, except in the case of + * an unconditional continue or return at the bottom of the + * loop, which are handled below. + */ + block_record body = visit_block(&ir->body_instructions); + + /* If the loop ends in an unconditional continue, eliminate it + * because it is redundant. + */ + ir_instruction *ir_last + = (ir_instruction *) ir->body_instructions.get_tail(); + if (get_jump_strength(ir_last) == strength_continue) { + ir_last->remove(); + } + + /* If the loop ends in an unconditional return, and we are + * lowering returns, lower it. + */ + if (this->function.lower_return) + lower_return_unconditionally(ir_last); + + if(body.min_strength >= strength_break) { + /* FINISHME: If the min_strength of the loop body is + * strength_break or strength_return, that means that it + * isn't a loop at all, since control flow always leaves the + * body of the loop via break or return. In principle the + * loop could be eliminated in this case. This optimization + * is not implemented yet. + */ + } + + if(this->loop.break_flag) { + /* We only get here if we are lowering breaks */ + assert (lower_break); + + /* If a break flag was generated while visiting the body of + * the loop, then at least one break was lowered, so we need + * to generate an if statement at the end of the loop that + * does a "break" if the break flag is set. The break we + * generate won't violate the CONTAINED_JUMPS_LOWERED + * postcondition, because should_lower_jump() always returns + * false for a break that happens at the end of a loop. + * + * However, if the loop already ends in a conditional or + * unconditional break, then we need to lower that break, + * because it won't be at the end of the loop anymore. + */ + lower_final_breaks(&ir->body_instructions); + + ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag)); + break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + ir->body_instructions.push_tail(break_if); + } + + /* If the body of the loop may set the return flag, then at + * least one return was lowered to a break, so we need to ensure + * that the return flag is checked after the body of the loop is + * executed. + */ + if(this->loop.may_set_return_flag) { + assert(this->function.return_flag); + /* Generate the if statement to check the return flag */ + ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag)); + /* Note: we also need to propagate the knowledge that the + * return flag may get set to the outer context. This + * satisfies the loop.may_set_return_flag part of the + * ANALYSIS postcondition. + */ + saved_loop.may_set_return_flag = true; + if(saved_loop.loop) + /* If this loop is nested inside another one, then the if + * statement that we generated should break out of that + * loop if the return flag is set. Caller will lower that + * break statement if necessary. + */ + return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); + else + /* Otherwise, all we need to do is ensure that the + * instructions that follow are only executed if the + * return flag is clear. We can do that by moving those + * instructions into the else clause of the generated if + * statement. + */ + move_outer_block_inside(ir, &return_if->else_instructions); + ir->insert_after(return_if); + } + + this->loop = saved_loop; + --this->function.nesting_depth; + } + + virtual void visit(ir_function_signature *ir) + { + /* these are not strictly necessary */ + assert(!this->function.signature); + assert(!this->loop.loop); + + bool lower_return; + if (strcmp(ir->function_name(), "main") == 0) + lower_return = lower_main_return; + else + lower_return = lower_sub_return; + + function_record saved_function = this->function; + loop_record saved_loop = this->loop; + this->function = function_record(ir, lower_return); + this->loop = loop_record(ir); + + assert(!this->loop.loop); + + /* Visit the body of the function to lower any jumps that occur + * in it, except possibly an unconditional return statement at + * the end of it. + */ + visit_block(&ir->body); + + /* If the body ended in an unconditional return of non-void, + * then we don't need to lower it because it's the one canonical + * return. + * + * If the body ended in a return of void, eliminate it because + * it is redundant. + */ + if (ir->return_type->is_void() && + get_jump_strength((ir_instruction *) ir->body.get_tail())) { + ir_jump *jump = (ir_jump *) ir->body.get_tail(); + assert (jump->ir_type == ir_type_return); + jump->remove(); + } + + if(this->function.return_value) + ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value))); + + this->loop = saved_loop; + this->function = saved_function; + } + + virtual void visit(class ir_function * ir) + { + visit_block(&ir->signatures); + } +}; + +} /* anonymous namespace */ + +bool +do_lower_jumps(exec_list *instructions, bool pull_out_jumps, bool lower_sub_return, bool lower_main_return, bool lower_continue, bool lower_break) +{ + ir_lower_jumps_visitor v; + v.pull_out_jumps = pull_out_jumps; + v.lower_continue = lower_continue; + v.lower_break = lower_break; + v.lower_sub_return = lower_sub_return; + v.lower_main_return = lower_main_return; + + bool progress_ever = false; + do { + v.progress = false; + visit_exec_list(instructions, &v); + progress_ever = v.progress || progress_ever; + } while (v.progress); + + return progress_ever; +} diff --git a/src/compiler/glsl/lower_mat_op_to_vec.cpp b/src/compiler/glsl/lower_mat_op_to_vec.cpp new file mode 100644 index 00000000000..266fdc6a250 --- /dev/null +++ b/src/compiler/glsl/lower_mat_op_to_vec.cpp @@ -0,0 +1,438 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_mat_op_to_vec.cpp + * + * Breaks matrix operation expressions down to a series of vector operations. + * + * Generally this is how we have to codegen matrix operations for a + * GPU, so this gives us the chance to constant fold operations on a + * column or row. + */ + +#include "ir.h" +#include "ir_expression_flattening.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_mat_op_to_vec_visitor : public ir_hierarchical_visitor { +public: + ir_mat_op_to_vec_visitor() + { + this->made_progress = false; + this->mem_ctx = NULL; + } + + ir_visitor_status visit_leave(ir_assignment *); + + ir_dereference *get_column(ir_dereference *val, int col); + ir_rvalue *get_element(ir_dereference *val, int col, int row); + + void do_mul_mat_mat(ir_dereference *result, + ir_dereference *a, ir_dereference *b); + void do_mul_mat_vec(ir_dereference *result, + ir_dereference *a, ir_dereference *b); + void do_mul_vec_mat(ir_dereference *result, + ir_dereference *a, ir_dereference *b); + void do_mul_mat_scalar(ir_dereference *result, + ir_dereference *a, ir_dereference *b); + void do_equal_mat_mat(ir_dereference *result, ir_dereference *a, + ir_dereference *b, bool test_equal); + + void *mem_ctx; + bool made_progress; +}; + +} /* anonymous namespace */ + +static bool +mat_op_to_vec_predicate(ir_instruction *ir) +{ + ir_expression *expr = ir->as_expression(); + unsigned int i; + + if (!expr) + return false; + + for (i = 0; i < expr->get_num_operands(); i++) { + if (expr->operands[i]->type->is_matrix()) + return true; + } + + return false; +} + +bool +do_mat_op_to_vec(exec_list *instructions) +{ + ir_mat_op_to_vec_visitor v; + + /* Pull out any matrix expression to a separate assignment to a + * temp. This will make our handling of the breakdown to + * operations on the matrix's vector components much easier. + */ + do_expression_flattening(instructions, mat_op_to_vec_predicate); + + visit_list_elements(&v, instructions); + + return v.made_progress; +} + +ir_rvalue * +ir_mat_op_to_vec_visitor::get_element(ir_dereference *val, int col, int row) +{ + val = get_column(val, col); + + return new(mem_ctx) ir_swizzle(val, row, 0, 0, 0, 1); +} + +ir_dereference * +ir_mat_op_to_vec_visitor::get_column(ir_dereference *val, int row) +{ + val = val->clone(mem_ctx, NULL); + + if (val->type->is_matrix()) { + val = new(mem_ctx) ir_dereference_array(val, + new(mem_ctx) ir_constant(row)); + } + + return val; +} + +void +ir_mat_op_to_vec_visitor::do_mul_mat_mat(ir_dereference *result, + ir_dereference *a, + ir_dereference *b) +{ + unsigned b_col, i; + ir_assignment *assign; + ir_expression *expr; + + for (b_col = 0; b_col < b->type->matrix_columns; b_col++) { + /* first column */ + expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, 0), + get_element(b, b_col, 0)); + + /* following columns */ + for (i = 1; i < a->type->matrix_columns; i++) { + ir_expression *mul_expr; + + mul_expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, i), + get_element(b, b_col, i)); + expr = new(mem_ctx) ir_expression(ir_binop_add, + expr, + mul_expr); + } + + assign = new(mem_ctx) ir_assignment(get_column(result, b_col), expr); + base_ir->insert_before(assign); + } +} + +void +ir_mat_op_to_vec_visitor::do_mul_mat_vec(ir_dereference *result, + ir_dereference *a, + ir_dereference *b) +{ + unsigned i; + ir_assignment *assign; + ir_expression *expr; + + /* first column */ + expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, 0), + get_element(b, 0, 0)); + + /* following columns */ + for (i = 1; i < a->type->matrix_columns; i++) { + ir_expression *mul_expr; + + mul_expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, i), + get_element(b, 0, i)); + expr = new(mem_ctx) ir_expression(ir_binop_add, expr, mul_expr); + } + + result = result->clone(mem_ctx, NULL); + assign = new(mem_ctx) ir_assignment(result, expr); + base_ir->insert_before(assign); +} + +void +ir_mat_op_to_vec_visitor::do_mul_vec_mat(ir_dereference *result, + ir_dereference *a, + ir_dereference *b) +{ + unsigned i; + + for (i = 0; i < b->type->matrix_columns; i++) { + ir_rvalue *column_result; + ir_expression *column_expr; + ir_assignment *column_assign; + + column_result = result->clone(mem_ctx, NULL); + column_result = new(mem_ctx) ir_swizzle(column_result, i, 0, 0, 0, 1); + + column_expr = new(mem_ctx) ir_expression(ir_binop_dot, + a->clone(mem_ctx, NULL), + get_column(b, i)); + + column_assign = new(mem_ctx) ir_assignment(column_result, + column_expr); + base_ir->insert_before(column_assign); + } +} + +void +ir_mat_op_to_vec_visitor::do_mul_mat_scalar(ir_dereference *result, + ir_dereference *a, + ir_dereference *b) +{ + unsigned i; + + for (i = 0; i < a->type->matrix_columns; i++) { + ir_expression *column_expr; + ir_assignment *column_assign; + + column_expr = new(mem_ctx) ir_expression(ir_binop_mul, + get_column(a, i), + b->clone(mem_ctx, NULL)); + + column_assign = new(mem_ctx) ir_assignment(get_column(result, i), + column_expr); + base_ir->insert_before(column_assign); + } +} + +void +ir_mat_op_to_vec_visitor::do_equal_mat_mat(ir_dereference *result, + ir_dereference *a, + ir_dereference *b, + bool test_equal) +{ + /* This essentially implements the following GLSL: + * + * bool equal(mat4 a, mat4 b) + * { + * return !any(bvec4(a[0] != b[0], + * a[1] != b[1], + * a[2] != b[2], + * a[3] != b[3]); + * } + * + * bool nequal(mat4 a, mat4 b) + * { + * return any(bvec4(a[0] != b[0], + * a[1] != b[1], + * a[2] != b[2], + * a[3] != b[3]); + * } + */ + const unsigned columns = a->type->matrix_columns; + const glsl_type *const bvec_type = + glsl_type::get_instance(GLSL_TYPE_BOOL, columns, 1); + + ir_variable *const tmp_bvec = + new(this->mem_ctx) ir_variable(bvec_type, "mat_cmp_bvec", + ir_var_temporary); + this->base_ir->insert_before(tmp_bvec); + + for (unsigned i = 0; i < columns; i++) { + ir_expression *const cmp = + new(this->mem_ctx) ir_expression(ir_binop_any_nequal, + get_column(a, i), + get_column(b, i)); + + ir_dereference *const lhs = + new(this->mem_ctx) ir_dereference_variable(tmp_bvec); + + ir_assignment *const assign = + new(this->mem_ctx) ir_assignment(lhs, cmp, NULL, (1U << i)); + + this->base_ir->insert_before(assign); + } + + ir_rvalue *const val = new(this->mem_ctx) ir_dereference_variable(tmp_bvec); + uint8_t vec_elems = val->type->vector_elements; + ir_expression *any = + new(this->mem_ctx) ir_expression(ir_binop_any_nequal, val, + new(this->mem_ctx) ir_constant(false, + vec_elems)); + + if (test_equal) + any = new(this->mem_ctx) ir_expression(ir_unop_logic_not, any); + + ir_assignment *const assign = + new(mem_ctx) ir_assignment(result->clone(mem_ctx, NULL), any); + base_ir->insert_before(assign); +} + +static bool +has_matrix_operand(const ir_expression *expr, unsigned &columns) +{ + for (unsigned i = 0; i < expr->get_num_operands(); i++) { + if (expr->operands[i]->type->is_matrix()) { + columns = expr->operands[i]->type->matrix_columns; + return true; + } + } + + return false; +} + + +ir_visitor_status +ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *orig_assign) +{ + ir_expression *orig_expr = orig_assign->rhs->as_expression(); + unsigned int i, matrix_columns = 1; + ir_dereference *op[2]; + + if (!orig_expr) + return visit_continue; + + if (!has_matrix_operand(orig_expr, matrix_columns)) + return visit_continue; + + assert(orig_expr->get_num_operands() <= 2); + + mem_ctx = ralloc_parent(orig_assign); + + ir_dereference_variable *result = + orig_assign->lhs->as_dereference_variable(); + assert(result); + + /* Store the expression operands in temps so we can use them + * multiple times. + */ + for (i = 0; i < orig_expr->get_num_operands(); i++) { + ir_assignment *assign; + ir_dereference *deref = orig_expr->operands[i]->as_dereference(); + + /* Avoid making a temporary if we don't need to to avoid aliasing. */ + if (deref && + deref->variable_referenced() != result->variable_referenced()) { + op[i] = deref; + continue; + } + + /* Otherwise, store the operand in a temporary generally if it's + * not a dereference. + */ + ir_variable *var = new(mem_ctx) ir_variable(orig_expr->operands[i]->type, + "mat_op_to_vec", + ir_var_temporary); + base_ir->insert_before(var); + + /* Note that we use this dereference for the assignment. That means + * that others that want to use op[i] have to clone the deref. + */ + op[i] = new(mem_ctx) ir_dereference_variable(var); + assign = new(mem_ctx) ir_assignment(op[i], orig_expr->operands[i]); + base_ir->insert_before(assign); + } + + /* OK, time to break down this matrix operation. */ + switch (orig_expr->operation) { + case ir_unop_d2f: + case ir_unop_f2d: + case ir_unop_neg: { + /* Apply the operation to each column.*/ + for (i = 0; i < matrix_columns; i++) { + ir_expression *column_expr; + ir_assignment *column_assign; + + column_expr = new(mem_ctx) ir_expression(orig_expr->operation, + get_column(op[0], i)); + + column_assign = new(mem_ctx) ir_assignment(get_column(result, i), + column_expr); + assert(column_assign->write_mask != 0); + base_ir->insert_before(column_assign); + } + break; + } + case ir_binop_add: + case ir_binop_sub: + case ir_binop_div: + case ir_binop_mod: { + /* For most operations, the matrix version is just going + * column-wise through and applying the operation to each column + * if available. + */ + for (i = 0; i < matrix_columns; i++) { + ir_expression *column_expr; + ir_assignment *column_assign; + + column_expr = new(mem_ctx) ir_expression(orig_expr->operation, + get_column(op[0], i), + get_column(op[1], i)); + + column_assign = new(mem_ctx) ir_assignment(get_column(result, i), + column_expr); + assert(column_assign->write_mask != 0); + base_ir->insert_before(column_assign); + } + break; + } + case ir_binop_mul: + if (op[0]->type->is_matrix()) { + if (op[1]->type->is_matrix()) { + do_mul_mat_mat(result, op[0], op[1]); + } else if (op[1]->type->is_vector()) { + do_mul_mat_vec(result, op[0], op[1]); + } else { + assert(op[1]->type->is_scalar()); + do_mul_mat_scalar(result, op[0], op[1]); + } + } else { + assert(op[1]->type->is_matrix()); + if (op[0]->type->is_vector()) { + do_mul_vec_mat(result, op[0], op[1]); + } else { + assert(op[0]->type->is_scalar()); + do_mul_mat_scalar(result, op[1], op[0]); + } + } + break; + + case ir_binop_all_equal: + case ir_binop_any_nequal: + do_equal_mat_mat(result, op[1], op[0], + (orig_expr->operation == ir_binop_all_equal)); + break; + + default: + printf("FINISHME: Handle matrix operation for %s\n", + orig_expr->operator_string()); + abort(); + } + orig_assign->remove(); + this->made_progress = true; + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_named_interface_blocks.cpp b/src/compiler/glsl/lower_named_interface_blocks.cpp new file mode 100644 index 00000000000..f29eba4f75f --- /dev/null +++ b/src/compiler/glsl/lower_named_interface_blocks.cpp @@ -0,0 +1,280 @@ +/* + * Copyright (c) 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_named_interface_blocks.cpp + * + * This lowering pass converts all interface blocks with instance names + * into interface blocks without an instance name. + * + * For example, the following shader: + * + * out block { + * float block_var; + * } inst_name; + * + * main() + * { + * inst_name.block_var = 0.0; + * } + * + * Is rewritten to: + * + * out block { + * float block_var; + * }; + * + * main() + * { + * block_var = 0.0; + * } + * + * This takes place after the shader code has already been verified with + * the interface name in place. + * + * The linking phase will use the interface block name rather than the + * interface's instance name when linking interfaces. + * + * This modification to the ir allows our currently existing dead code + * elimination to work with interface blocks without changes. + */ + +#include "glsl_symbol_table.h" +#include "ir.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" +#include "program/hash_table.h" + +static const glsl_type * +process_array_type(const glsl_type *type, unsigned idx) +{ + const glsl_type *element_type = type->fields.array; + if (element_type->is_array()) { + const glsl_type *new_array_type = process_array_type(element_type, idx); + return glsl_type::get_array_instance(new_array_type, type->length); + } else { + return glsl_type::get_array_instance( + element_type->fields.structure[idx].type, type->length); + } +} + +static ir_rvalue * +process_array_ir(void * const mem_ctx, + ir_dereference_array *deref_array_prev, + ir_rvalue *deref_var) +{ + ir_dereference_array *deref_array = + deref_array_prev->array->as_dereference_array(); + + if (deref_array == NULL) { + return new(mem_ctx) ir_dereference_array(deref_var, + deref_array_prev->array_index); + } else { + deref_array = (ir_dereference_array *) process_array_ir(mem_ctx, + deref_array, + deref_var); + return new(mem_ctx) ir_dereference_array(deref_array, + deref_array_prev->array_index); + } +} + +namespace { + +class flatten_named_interface_blocks_declarations : public ir_rvalue_visitor +{ +public: + void * const mem_ctx; + hash_table *interface_namespace; + + flatten_named_interface_blocks_declarations(void *mem_ctx) + : mem_ctx(mem_ctx), + interface_namespace(NULL) + { + } + + void run(exec_list *instructions); + + virtual ir_visitor_status visit_leave(ir_assignment *); + virtual void handle_rvalue(ir_rvalue **rvalue); +}; + +} /* anonymous namespace */ + +void +flatten_named_interface_blocks_declarations::run(exec_list *instructions) +{ + interface_namespace = hash_table_ctor(0, hash_table_string_hash, + hash_table_string_compare); + + /* First pass: adjust instance block variables with an instance name + * to not have an instance name. + * + * The interface block variables are stored in the interface_namespace + * hash table so they can be used in the second pass. + */ + foreach_in_list_safe(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (!var || !var->is_interface_instance()) + continue; + + /* It should be possible to handle uniforms during this pass, + * but, this will require changes to the other uniform block + * support code. + */ + if (var->data.mode == ir_var_uniform || + var->data.mode == ir_var_shader_storage) + continue; + + const glsl_type * iface_t = var->type->without_array(); + exec_node *insert_pos = var; + + assert (iface_t->is_interface()); + + for (unsigned i = 0; i < iface_t->length; i++) { + const char * field_name = iface_t->fields.structure[i].name; + char *iface_field_name = + ralloc_asprintf(mem_ctx, "%s %s.%s.%s", + var->data.mode == ir_var_shader_in ? "in" : "out", + iface_t->name, var->name, field_name); + + ir_variable *found_var = + (ir_variable *) hash_table_find(interface_namespace, + iface_field_name); + if (!found_var) { + ir_variable *new_var; + char *var_name = + ralloc_strdup(mem_ctx, iface_t->fields.structure[i].name); + if (!var->type->is_array()) { + new_var = + new(mem_ctx) ir_variable(iface_t->fields.structure[i].type, + var_name, + (ir_variable_mode) var->data.mode); + new_var->data.from_named_ifc_block_nonarray = 1; + } else { + const glsl_type *new_array_type = + process_array_type(var->type, i); + new_var = + new(mem_ctx) ir_variable(new_array_type, + var_name, + (ir_variable_mode) var->data.mode); + new_var->data.from_named_ifc_block_array = 1; + } + new_var->data.location = iface_t->fields.structure[i].location; + new_var->data.explicit_location = (new_var->data.location >= 0); + new_var->data.interpolation = + iface_t->fields.structure[i].interpolation; + new_var->data.centroid = iface_t->fields.structure[i].centroid; + new_var->data.sample = iface_t->fields.structure[i].sample; + new_var->data.patch = iface_t->fields.structure[i].patch; + new_var->data.stream = var->data.stream; + new_var->data.how_declared = var->data.how_declared; + + new_var->init_interface_type(iface_t); + hash_table_insert(interface_namespace, new_var, + iface_field_name); + insert_pos->insert_after(new_var); + insert_pos = new_var; + } + } + var->remove(); + } + + /* Second pass: visit all ir_dereference_record instances, and if they + * reference an interface block, then flatten the refererence out. + */ + visit_list_elements(this, instructions); + hash_table_dtor(interface_namespace); + interface_namespace = NULL; +} + +ir_visitor_status +flatten_named_interface_blocks_declarations::visit_leave(ir_assignment *ir) +{ + ir_dereference_record *lhs_rec = ir->lhs->as_dereference_record(); + if (lhs_rec) { + ir_rvalue *lhs_rec_tmp = lhs_rec; + handle_rvalue(&lhs_rec_tmp); + if (lhs_rec_tmp != lhs_rec) { + ir->set_lhs(lhs_rec_tmp); + } + } + return rvalue_visit(ir); +} + +void +flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue) +{ + if (*rvalue == NULL) + return; + + ir_dereference_record *ir = (*rvalue)->as_dereference_record(); + if (ir == NULL) + return; + + ir_variable *var = ir->variable_referenced(); + if (var == NULL) + return; + + if (!var->is_interface_instance()) + return; + + /* It should be possible to handle uniforms during this pass, + * but, this will require changes to the other uniform block + * support code. + */ + if (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) + return; + + if (var->get_interface_type() != NULL) { + char *iface_field_name = + ralloc_asprintf(mem_ctx, "%s %s.%s.%s", + var->data.mode == ir_var_shader_in ? "in" : "out", + var->get_interface_type()->name, + var->name, ir->field); + /* Find the variable in the set of flattened interface blocks */ + ir_variable *found_var = + (ir_variable *) hash_table_find(interface_namespace, + iface_field_name); + assert(found_var); + + ir_dereference_variable *deref_var = + new(mem_ctx) ir_dereference_variable(found_var); + + ir_dereference_array *deref_array = + ir->record->as_dereference_array(); + if (deref_array != NULL) { + *rvalue = process_array_ir(mem_ctx, deref_array, + (ir_rvalue *)deref_var); + } else { + *rvalue = deref_var; + } + } +} + +void +lower_named_interface_blocks(void *mem_ctx, gl_shader *shader) +{ + flatten_named_interface_blocks_declarations v_decl(mem_ctx); + v_decl.run(shader->ir); +} + diff --git a/src/compiler/glsl/lower_noise.cpp b/src/compiler/glsl/lower_noise.cpp new file mode 100644 index 00000000000..85f59b675e0 --- /dev/null +++ b/src/compiler/glsl/lower_noise.cpp @@ -0,0 +1,71 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_noise.cpp + * IR lower pass to remove noise opcodes. + * + * \author Ian Romanick + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" + +class lower_noise_visitor : public ir_rvalue_visitor { +public: + lower_noise_visitor() : progress(false) + { + /* empty */ + } + + void handle_rvalue(ir_rvalue **rvalue) + { + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr) + return; + + /* In the future, ir_unop_noise may be replaced by a call to a function + * that implements noise. No hardware has a noise instruction. + */ + if (expr->operation == ir_unop_noise) { + *rvalue = ir_constant::zero(ralloc_parent(expr), expr->type); + this->progress = true; + } + } + + bool progress; +}; + + +bool +lower_noise(exec_list *instructions) +{ + lower_noise_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_offset_array.cpp b/src/compiler/glsl/lower_offset_array.cpp new file mode 100644 index 00000000000..96486c3a711 --- /dev/null +++ b/src/compiler/glsl/lower_offset_array.cpp @@ -0,0 +1,91 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_offset_array.cpp + * + * IR lower pass to decompose ir_texture ir_tg4 with an array of offsets + * into four ir_tg4s with a single ivec2 offset, select the .w component of each, + * and return those four values packed into a gvec4. + * + * \author Chris Forbes + */ + +#include "compiler/glsl_types.h" +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" + +using namespace ir_builder; + +class lower_offset_array_visitor : public ir_rvalue_visitor { +public: + lower_offset_array_visitor() + { + progress = false; + } + + void handle_rvalue(ir_rvalue **rv); + + bool progress; +}; + +void +lower_offset_array_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL || (*rv)->ir_type != ir_type_texture) + return; + + ir_texture *ir = (ir_texture *) *rv; + if (ir->op != ir_tg4 || !ir->offset || !ir->offset->type->is_array()) + return; + + void *mem_ctx = ralloc_parent(ir); + + ir_variable *var = + new (mem_ctx) ir_variable(ir->type, "result", ir_var_temporary); + base_ir->insert_before(var); + + for (int i = 0; i < 4; i++) { + ir_texture *tex = ir->clone(mem_ctx, NULL); + tex->offset = new (mem_ctx) ir_dereference_array(tex->offset, + new (mem_ctx) ir_constant(i)); + + base_ir->insert_before(assign(var, swizzle_w(tex), 1 << i)); + } + + *rv = new (mem_ctx) ir_dereference_variable(var); + + progress = true; +} + +bool +lower_offset_arrays(exec_list *instructions) +{ + lower_offset_array_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_output_reads.cpp b/src/compiler/glsl/lower_output_reads.cpp new file mode 100644 index 00000000000..79488df2932 --- /dev/null +++ b/src/compiler/glsl/lower_output_reads.cpp @@ -0,0 +1,178 @@ +/* + * Copyright © 2012 Vincent Lejeune + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "program/hash_table.h" + +/** + * \file lower_output_reads.cpp + * + * In GLSL, shader output variables (such as varyings) can be both read and + * written. However, on some hardware, reading an output register causes + * trouble. + * + * This pass creates temporary shadow copies of every (used) shader output, + * and replaces all accesses to use those instead. It also adds code to the + * main() function to copy the final values to the actual shader outputs. + */ + +namespace { + +class output_read_remover : public ir_hierarchical_visitor { +protected: + /** + * A hash table mapping from the original ir_variable shader outputs + * (ir_var_shader_out mode) to the new temporaries to be used instead. + */ + hash_table *replacements; + + void *mem_ctx; + + unsigned stage; +public: + output_read_remover(unsigned stage); + ~output_read_remover(); + virtual ir_visitor_status visit(class ir_dereference_variable *); + virtual ir_visitor_status visit_leave(class ir_emit_vertex *); + virtual ir_visitor_status visit_leave(class ir_return *); + virtual ir_visitor_status visit_leave(class ir_function_signature *); +}; + +} /* anonymous namespace */ + +/** + * Hash function for the output variables - computes the hash of the name. + * NOTE: We're using the name string to ensure that the hash doesn't depend + * on any random factors, otherwise the output_read_remover could produce + * the random order of the assignments. + * + * NOTE: If you want to reuse this function please take into account that + * generally the names of the variables are non-unique. + */ +static unsigned +hash_table_var_hash(const void *key) +{ + const ir_variable * var = static_cast(key); + return hash_table_string_hash(var->name); +} + +output_read_remover::output_read_remover(unsigned stage) +{ + this->stage = stage; + mem_ctx = ralloc_context(NULL); + replacements = + hash_table_ctor(0, hash_table_var_hash, hash_table_pointer_compare); +} + +output_read_remover::~output_read_remover() +{ + hash_table_dtor(replacements); + ralloc_free(mem_ctx); +} + +ir_visitor_status +output_read_remover::visit(ir_dereference_variable *ir) +{ + if (ir->var->data.mode != ir_var_shader_out) + return visit_continue; + if (stage == MESA_SHADER_TESS_CTRL) + return visit_continue; + + ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var); + + /* If we don't have an existing temporary, create one. */ + if (temp == NULL) { + void *var_ctx = ralloc_parent(ir->var); + temp = new(var_ctx) ir_variable(ir->var->type, ir->var->name, + ir_var_temporary); + hash_table_insert(replacements, temp, ir->var); + ir->var->insert_after(temp); + } + + /* Update the dereference to use the temporary */ + ir->var = temp; + + return visit_continue; +} + +/** + * Create an assignment to copy a temporary value back to the actual output. + */ +static ir_assignment * +copy(void *ctx, ir_variable *output, ir_variable *temp) +{ + ir_dereference_variable *lhs = new(ctx) ir_dereference_variable(output); + ir_dereference_variable *rhs = new(ctx) ir_dereference_variable(temp); + return new(ctx) ir_assignment(lhs, rhs); +} + +/** Insert a copy-back assignment before a "return" statement or a call to + * EmitVertex(). + */ +static void +emit_return_copy(const void *key, void *data, void *closure) +{ + ir_return *ir = (ir_return *) closure; + ir->insert_before(copy(ir, (ir_variable *) key, (ir_variable *) data)); +} + +/** Insert a copy-back assignment at the end of the main() function */ +static void +emit_main_copy(const void *key, void *data, void *closure) +{ + ir_function_signature *sig = (ir_function_signature *) closure; + sig->body.push_tail(copy(sig, (ir_variable *) key, (ir_variable *) data)); +} + +ir_visitor_status +output_read_remover::visit_leave(ir_return *ir) +{ + hash_table_call_foreach(replacements, emit_return_copy, ir); + return visit_continue; +} + +ir_visitor_status +output_read_remover::visit_leave(ir_emit_vertex *ir) +{ + hash_table_call_foreach(replacements, emit_return_copy, ir); + hash_table_clear(replacements); + return visit_continue; +} + +ir_visitor_status +output_read_remover::visit_leave(ir_function_signature *sig) +{ + if (strcmp(sig->function_name(), "main") != 0) + return visit_continue; + + hash_table_call_foreach(replacements, emit_main_copy, sig); + return visit_continue; +} + +void +lower_output_reads(unsigned stage, exec_list *instructions) +{ + output_read_remover v(stage); + visit_list_elements(&v, instructions); +} diff --git a/src/compiler/glsl/lower_packed_varyings.cpp b/src/compiler/glsl/lower_packed_varyings.cpp new file mode 100644 index 00000000000..8d1eb1725d5 --- /dev/null +++ b/src/compiler/glsl/lower_packed_varyings.cpp @@ -0,0 +1,749 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_varyings_to_packed.cpp + * + * This lowering pass generates GLSL code that manually packs varyings into + * vec4 slots, for the benefit of back-ends that don't support packed varyings + * natively. + * + * For example, the following shader: + * + * out mat3x2 foo; // location=4, location_frac=0 + * out vec3 bar[2]; // location=5, location_frac=2 + * + * main() + * { + * ... + * } + * + * Is rewritten to: + * + * mat3x2 foo; + * vec3 bar[2]; + * out vec4 packed4; // location=4, location_frac=0 + * out vec4 packed5; // location=5, location_frac=0 + * out vec4 packed6; // location=6, location_frac=0 + * + * main() + * { + * ... + * packed4.xy = foo[0]; + * packed4.zw = foo[1]; + * packed5.xy = foo[2]; + * packed5.zw = bar[0].xy; + * packed6.x = bar[0].z; + * packed6.yzw = bar[1]; + * } + * + * This lowering pass properly handles "double parking" of a varying vector + * across two varying slots. For example, in the code above, two of the + * components of bar[0] are stored in packed5, and the remaining component is + * stored in packed6. + * + * Note that in theory, the extra instructions may cause some loss of + * performance. However, hopefully in most cases the performance loss will + * either be absorbed by a later optimization pass, or it will be offset by + * memory bandwidth savings (because fewer varyings are used). + * + * This lowering pass also packs flat floats, ints, and uints together, by + * using ivec4 as the base type of flat "varyings", and using appropriate + * casts to convert floats and uints into ints. + * + * This lowering pass also handles varyings whose type is a struct or an array + * of struct. Structs are packed in order and with no gaps, so there may be a + * performance penalty due to structure elements being double-parked. + * + * Lowering of geometry shader inputs is slightly more complex, since geometry + * inputs are always arrays, so we need to lower arrays to arrays. For + * example, the following input: + * + * in struct Foo { + * float f; + * vec3 v; + * vec2 a[2]; + * } arr[3]; // location=4, location_frac=0 + * + * Would get lowered like this if it occurred in a fragment shader: + * + * struct Foo { + * float f; + * vec3 v; + * vec2 a[2]; + * } arr[3]; + * in vec4 packed4; // location=4, location_frac=0 + * in vec4 packed5; // location=5, location_frac=0 + * in vec4 packed6; // location=6, location_frac=0 + * in vec4 packed7; // location=7, location_frac=0 + * in vec4 packed8; // location=8, location_frac=0 + * in vec4 packed9; // location=9, location_frac=0 + * + * main() + * { + * arr[0].f = packed4.x; + * arr[0].v = packed4.yzw; + * arr[0].a[0] = packed5.xy; + * arr[0].a[1] = packed5.zw; + * arr[1].f = packed6.x; + * arr[1].v = packed6.yzw; + * arr[1].a[0] = packed7.xy; + * arr[1].a[1] = packed7.zw; + * arr[2].f = packed8.x; + * arr[2].v = packed8.yzw; + * arr[2].a[0] = packed9.xy; + * arr[2].a[1] = packed9.zw; + * ... + * } + * + * But it would get lowered like this if it occurred in a geometry shader: + * + * struct Foo { + * float f; + * vec3 v; + * vec2 a[2]; + * } arr[3]; + * in vec4 packed4[3]; // location=4, location_frac=0 + * in vec4 packed5[3]; // location=5, location_frac=0 + * + * main() + * { + * arr[0].f = packed4[0].x; + * arr[0].v = packed4[0].yzw; + * arr[0].a[0] = packed5[0].xy; + * arr[0].a[1] = packed5[0].zw; + * arr[1].f = packed4[1].x; + * arr[1].v = packed4[1].yzw; + * arr[1].a[0] = packed5[1].xy; + * arr[1].a[1] = packed5[1].zw; + * arr[2].f = packed4[2].x; + * arr[2].v = packed4[2].yzw; + * arr[2].a[0] = packed5[2].xy; + * arr[2].a[1] = packed5[2].zw; + * ... + * } + */ + +#include "glsl_symbol_table.h" +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" +#include "program/prog_instruction.h" + +using namespace ir_builder; + +namespace { + +/** + * Visitor that performs varying packing. For each varying declared in the + * shader, this visitor determines whether it needs to be packed. If so, it + * demotes it to an ordinary global, creates new packed varyings, and + * generates assignments to convert between the original varying and the + * packed varying. + */ +class lower_packed_varyings_visitor +{ +public: + lower_packed_varyings_visitor(void *mem_ctx, unsigned locations_used, + ir_variable_mode mode, + unsigned gs_input_vertices, + exec_list *out_instructions, + exec_list *out_variables); + + void run(struct gl_shader *shader); + +private: + void bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs); + void bitwise_assign_unpack(ir_rvalue *lhs, ir_rvalue *rhs); + unsigned lower_rvalue(ir_rvalue *rvalue, unsigned fine_location, + ir_variable *unpacked_var, const char *name, + bool gs_input_toplevel, unsigned vertex_index); + unsigned lower_arraylike(ir_rvalue *rvalue, unsigned array_size, + unsigned fine_location, + ir_variable *unpacked_var, const char *name, + bool gs_input_toplevel, unsigned vertex_index); + ir_dereference *get_packed_varying_deref(unsigned location, + ir_variable *unpacked_var, + const char *name, + unsigned vertex_index); + bool needs_lowering(ir_variable *var); + + /** + * Memory context used to allocate new instructions for the shader. + */ + void * const mem_ctx; + + /** + * Number of generic varying slots which are used by this shader. This is + * used to allocate temporary intermediate data structures. If any varying + * used by this shader has a location greater than or equal to + * VARYING_SLOT_VAR0 + locations_used, an assertion will fire. + */ + const unsigned locations_used; + + /** + * Array of pointers to the packed varyings that have been created for each + * generic varying slot. NULL entries in this array indicate varying slots + * for which a packed varying has not been created yet. + */ + ir_variable **packed_varyings; + + /** + * Type of varying which is being lowered in this pass (either + * ir_var_shader_in or ir_var_shader_out). + */ + const ir_variable_mode mode; + + /** + * If we are currently lowering geometry shader inputs, the number of input + * vertices the geometry shader accepts. Otherwise zero. + */ + const unsigned gs_input_vertices; + + /** + * Exec list into which the visitor should insert the packing instructions. + * Caller provides this list; it should insert the instructions into the + * appropriate place in the shader once the visitor has finished running. + */ + exec_list *out_instructions; + + /** + * Exec list into which the visitor should insert any new variables. + */ + exec_list *out_variables; +}; + +} /* anonymous namespace */ + +lower_packed_varyings_visitor::lower_packed_varyings_visitor( + void *mem_ctx, unsigned locations_used, ir_variable_mode mode, + unsigned gs_input_vertices, exec_list *out_instructions, + exec_list *out_variables) + : mem_ctx(mem_ctx), + locations_used(locations_used), + packed_varyings((ir_variable **) + rzalloc_array_size(mem_ctx, sizeof(*packed_varyings), + locations_used)), + mode(mode), + gs_input_vertices(gs_input_vertices), + out_instructions(out_instructions), + out_variables(out_variables) +{ +} + +void +lower_packed_varyings_visitor::run(struct gl_shader *shader) +{ + foreach_in_list(ir_instruction, node, shader->ir) { + ir_variable *var = node->as_variable(); + if (var == NULL) + continue; + + if (var->data.mode != this->mode || + var->data.location < VARYING_SLOT_VAR0 || + !this->needs_lowering(var)) + continue; + + /* This lowering pass is only capable of packing floats and ints + * together when their interpolation mode is "flat". Therefore, to be + * safe, caller should ensure that integral varyings always use flat + * interpolation, even when this is not required by GLSL. + */ + assert(var->data.interpolation == INTERP_QUALIFIER_FLAT || + !var->type->contains_integer()); + + /* Clone the variable for program resource list before + * it gets modified and lost. + */ + if (!shader->packed_varyings) + shader->packed_varyings = new (shader) exec_list; + + shader->packed_varyings->push_tail(var->clone(shader, NULL)); + + /* Change the old varying into an ordinary global. */ + assert(var->data.mode != ir_var_temporary); + var->data.mode = ir_var_auto; + + /* Create a reference to the old varying. */ + ir_dereference_variable *deref + = new(this->mem_ctx) ir_dereference_variable(var); + + /* Recursively pack or unpack it. */ + this->lower_rvalue(deref, var->data.location * 4 + var->data.location_frac, var, + var->name, this->gs_input_vertices != 0, 0); + } +} + +#define SWIZZLE_ZWZW MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W) + +/** + * Make an ir_assignment from \c rhs to \c lhs, performing appropriate + * bitcasts if necessary to match up types. + * + * This function is called when packing varyings. + */ +void +lower_packed_varyings_visitor::bitwise_assign_pack(ir_rvalue *lhs, + ir_rvalue *rhs) +{ + if (lhs->type->base_type != rhs->type->base_type) { + /* Since we only mix types in flat varyings, and we always store flat + * varyings as type ivec4, we need only produce conversions from (uint + * or float) to int. + */ + assert(lhs->type->base_type == GLSL_TYPE_INT); + switch (rhs->type->base_type) { + case GLSL_TYPE_UINT: + rhs = new(this->mem_ctx) + ir_expression(ir_unop_u2i, lhs->type, rhs); + break; + case GLSL_TYPE_FLOAT: + rhs = new(this->mem_ctx) + ir_expression(ir_unop_bitcast_f2i, lhs->type, rhs); + break; + case GLSL_TYPE_DOUBLE: + assert(rhs->type->vector_elements <= 2); + if (rhs->type->vector_elements == 2) { + ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", ir_var_temporary); + + assert(lhs->type->vector_elements == 4); + this->out_variables->push_tail(t); + this->out_instructions->push_tail( + assign(t, u2i(expr(ir_unop_unpack_double_2x32, swizzle_x(rhs->clone(mem_ctx, NULL)))), 0x3)); + this->out_instructions->push_tail( + assign(t, u2i(expr(ir_unop_unpack_double_2x32, swizzle_y(rhs))), 0xc)); + rhs = deref(t).val; + } else { + rhs = u2i(expr(ir_unop_unpack_double_2x32, rhs)); + } + break; + default: + assert(!"Unexpected type conversion while lowering varyings"); + break; + } + } + this->out_instructions->push_tail(new (this->mem_ctx) ir_assignment(lhs, rhs)); +} + + +/** + * Make an ir_assignment from \c rhs to \c lhs, performing appropriate + * bitcasts if necessary to match up types. + * + * This function is called when unpacking varyings. + */ +void +lower_packed_varyings_visitor::bitwise_assign_unpack(ir_rvalue *lhs, + ir_rvalue *rhs) +{ + if (lhs->type->base_type != rhs->type->base_type) { + /* Since we only mix types in flat varyings, and we always store flat + * varyings as type ivec4, we need only produce conversions from int to + * (uint or float). + */ + assert(rhs->type->base_type == GLSL_TYPE_INT); + switch (lhs->type->base_type) { + case GLSL_TYPE_UINT: + rhs = new(this->mem_ctx) + ir_expression(ir_unop_i2u, lhs->type, rhs); + break; + case GLSL_TYPE_FLOAT: + rhs = new(this->mem_ctx) + ir_expression(ir_unop_bitcast_i2f, lhs->type, rhs); + break; + case GLSL_TYPE_DOUBLE: + assert(lhs->type->vector_elements <= 2); + if (lhs->type->vector_elements == 2) { + ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", ir_var_temporary); + assert(rhs->type->vector_elements == 4); + this->out_variables->push_tail(t); + this->out_instructions->push_tail( + assign(t, expr(ir_unop_pack_double_2x32, i2u(swizzle_xy(rhs->clone(mem_ctx, NULL)))), 0x1)); + this->out_instructions->push_tail( + assign(t, expr(ir_unop_pack_double_2x32, i2u(swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2))), 0x2)); + rhs = deref(t).val; + } else { + rhs = expr(ir_unop_pack_double_2x32, i2u(rhs)); + } + break; + default: + assert(!"Unexpected type conversion while lowering varyings"); + break; + } + } + this->out_instructions->push_tail(new(this->mem_ctx) ir_assignment(lhs, rhs)); +} + + +/** + * Recursively pack or unpack the given varying (or portion of a varying) by + * traversing all of its constituent vectors. + * + * \param fine_location is the location where the first constituent vector + * should be packed--the word "fine" indicates that this location is expressed + * in multiples of a float, rather than multiples of a vec4 as is used + * elsewhere in Mesa. + * + * \param gs_input_toplevel should be set to true if we are lowering geometry + * shader inputs, and we are currently lowering the whole input variable + * (i.e. we are lowering the array whose index selects the vertex). + * + * \param vertex_index: if we are lowering geometry shader inputs, and the + * level of the array that we are currently lowering is *not* the top level, + * then this indicates which vertex we are currently lowering. Otherwise it + * is ignored. + * + * \return the location where the next constituent vector (after this one) + * should be packed. + */ +unsigned +lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue, + unsigned fine_location, + ir_variable *unpacked_var, + const char *name, + bool gs_input_toplevel, + unsigned vertex_index) +{ + unsigned dmul = rvalue->type->is_double() ? 2 : 1; + /* When gs_input_toplevel is set, we should be looking at a geometry shader + * input array. + */ + assert(!gs_input_toplevel || rvalue->type->is_array()); + + if (rvalue->type->is_record()) { + for (unsigned i = 0; i < rvalue->type->length; i++) { + if (i != 0) + rvalue = rvalue->clone(this->mem_ctx, NULL); + const char *field_name = rvalue->type->fields.structure[i].name; + ir_dereference_record *dereference_record = new(this->mem_ctx) + ir_dereference_record(rvalue, field_name); + char *deref_name + = ralloc_asprintf(this->mem_ctx, "%s.%s", name, field_name); + fine_location = this->lower_rvalue(dereference_record, fine_location, + unpacked_var, deref_name, false, + vertex_index); + } + return fine_location; + } else if (rvalue->type->is_array()) { + /* Arrays are packed/unpacked by considering each array element in + * sequence. + */ + return this->lower_arraylike(rvalue, rvalue->type->array_size(), + fine_location, unpacked_var, name, + gs_input_toplevel, vertex_index); + } else if (rvalue->type->is_matrix()) { + /* Matrices are packed/unpacked by considering each column vector in + * sequence. + */ + return this->lower_arraylike(rvalue, rvalue->type->matrix_columns, + fine_location, unpacked_var, name, + false, vertex_index); + } else if (rvalue->type->vector_elements * dmul + + fine_location % 4 > 4) { + /* This vector is going to be "double parked" across two varying slots, + * so handle it as two separate assignments. For doubles, a dvec3/dvec4 + * can end up being spread over 3 slots. However the second splitting + * will happen later, here we just always want to split into 2. + */ + unsigned left_components, right_components; + unsigned left_swizzle_values[4] = { 0, 0, 0, 0 }; + unsigned right_swizzle_values[4] = { 0, 0, 0, 0 }; + char left_swizzle_name[4] = { 0, 0, 0, 0 }; + char right_swizzle_name[4] = { 0, 0, 0, 0 }; + + left_components = 4 - fine_location % 4; + if (rvalue->type->is_double()) { + /* We might actually end up with 0 left components! */ + left_components /= 2; + } + right_components = rvalue->type->vector_elements - left_components; + + for (unsigned i = 0; i < left_components; i++) { + left_swizzle_values[i] = i; + left_swizzle_name[i] = "xyzw"[i]; + } + for (unsigned i = 0; i < right_components; i++) { + right_swizzle_values[i] = i + left_components; + right_swizzle_name[i] = "xyzw"[i + left_components]; + } + ir_swizzle *left_swizzle = new(this->mem_ctx) + ir_swizzle(rvalue, left_swizzle_values, left_components); + ir_swizzle *right_swizzle = new(this->mem_ctx) + ir_swizzle(rvalue->clone(this->mem_ctx, NULL), right_swizzle_values, + right_components); + char *left_name + = ralloc_asprintf(this->mem_ctx, "%s.%s", name, left_swizzle_name); + char *right_name + = ralloc_asprintf(this->mem_ctx, "%s.%s", name, right_swizzle_name); + if (left_components) + fine_location = this->lower_rvalue(left_swizzle, fine_location, + unpacked_var, left_name, false, + vertex_index); + else + /* Top up the fine location to the next slot */ + fine_location++; + return this->lower_rvalue(right_swizzle, fine_location, unpacked_var, + right_name, false, vertex_index); + } else { + /* No special handling is necessary; pack the rvalue into the + * varying. + */ + unsigned swizzle_values[4] = { 0, 0, 0, 0 }; + unsigned components = rvalue->type->vector_elements * dmul; + unsigned location = fine_location / 4; + unsigned location_frac = fine_location % 4; + for (unsigned i = 0; i < components; ++i) + swizzle_values[i] = i + location_frac; + ir_dereference *packed_deref = + this->get_packed_varying_deref(location, unpacked_var, name, + vertex_index); + ir_swizzle *swizzle = new(this->mem_ctx) + ir_swizzle(packed_deref, swizzle_values, components); + if (this->mode == ir_var_shader_out) { + this->bitwise_assign_pack(swizzle, rvalue); + } else { + this->bitwise_assign_unpack(rvalue, swizzle); + } + return fine_location + components; + } +} + +/** + * Recursively pack or unpack a varying for which we need to iterate over its + * constituent elements, accessing each one using an ir_dereference_array. + * This takes care of both arrays and matrices, since ir_dereference_array + * treats a matrix like an array of its column vectors. + * + * \param gs_input_toplevel should be set to true if we are lowering geometry + * shader inputs, and we are currently lowering the whole input variable + * (i.e. we are lowering the array whose index selects the vertex). + * + * \param vertex_index: if we are lowering geometry shader inputs, and the + * level of the array that we are currently lowering is *not* the top level, + * then this indicates which vertex we are currently lowering. Otherwise it + * is ignored. + */ +unsigned +lower_packed_varyings_visitor::lower_arraylike(ir_rvalue *rvalue, + unsigned array_size, + unsigned fine_location, + ir_variable *unpacked_var, + const char *name, + bool gs_input_toplevel, + unsigned vertex_index) +{ + for (unsigned i = 0; i < array_size; i++) { + if (i != 0) + rvalue = rvalue->clone(this->mem_ctx, NULL); + ir_constant *constant = new(this->mem_ctx) ir_constant(i); + ir_dereference_array *dereference_array = new(this->mem_ctx) + ir_dereference_array(rvalue, constant); + if (gs_input_toplevel) { + /* Geometry shader inputs are a special case. Instead of storing + * each element of the array at a different location, all elements + * are at the same location, but with a different vertex index. + */ + (void) this->lower_rvalue(dereference_array, fine_location, + unpacked_var, name, false, i); + } else { + char *subscripted_name + = ralloc_asprintf(this->mem_ctx, "%s[%d]", name, i); + fine_location = + this->lower_rvalue(dereference_array, fine_location, + unpacked_var, subscripted_name, + false, vertex_index); + } + } + return fine_location; +} + +/** + * Retrieve the packed varying corresponding to the given varying location. + * If no packed varying has been created for the given varying location yet, + * create it and add it to the shader before returning it. + * + * The newly created varying inherits its interpolation parameters from \c + * unpacked_var. Its base type is ivec4 if we are lowering a flat varying, + * vec4 otherwise. + * + * \param vertex_index: if we are lowering geometry shader inputs, then this + * indicates which vertex we are currently lowering. Otherwise it is ignored. + */ +ir_dereference * +lower_packed_varyings_visitor::get_packed_varying_deref( + unsigned location, ir_variable *unpacked_var, const char *name, + unsigned vertex_index) +{ + unsigned slot = location - VARYING_SLOT_VAR0; + assert(slot < locations_used); + if (this->packed_varyings[slot] == NULL) { + char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name); + const glsl_type *packed_type; + if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT) + packed_type = glsl_type::ivec4_type; + else + packed_type = glsl_type::vec4_type; + if (this->gs_input_vertices != 0) { + packed_type = + glsl_type::get_array_instance(packed_type, + this->gs_input_vertices); + } + ir_variable *packed_var = new(this->mem_ctx) + ir_variable(packed_type, packed_name, this->mode); + if (this->gs_input_vertices != 0) { + /* Prevent update_array_sizes() from messing with the size of the + * array. + */ + packed_var->data.max_array_access = this->gs_input_vertices - 1; + } + packed_var->data.centroid = unpacked_var->data.centroid; + packed_var->data.sample = unpacked_var->data.sample; + packed_var->data.patch = unpacked_var->data.patch; + packed_var->data.interpolation = unpacked_var->data.interpolation; + packed_var->data.location = location; + packed_var->data.precision = unpacked_var->data.precision; + packed_var->data.always_active_io = unpacked_var->data.always_active_io; + unpacked_var->insert_before(packed_var); + this->packed_varyings[slot] = packed_var; + } else { + /* For geometry shader inputs, only update the packed variable name the + * first time we visit each component. + */ + if (this->gs_input_vertices == 0 || vertex_index == 0) { + ralloc_asprintf_append((char **) &this->packed_varyings[slot]->name, + ",%s", name); + } + } + + ir_dereference *deref = new(this->mem_ctx) + ir_dereference_variable(this->packed_varyings[slot]); + if (this->gs_input_vertices != 0) { + /* When lowering GS inputs, the packed variable is an array, so we need + * to dereference it using vertex_index. + */ + ir_constant *constant = new(this->mem_ctx) ir_constant(vertex_index); + deref = new(this->mem_ctx) ir_dereference_array(deref, constant); + } + return deref; +} + +bool +lower_packed_varyings_visitor::needs_lowering(ir_variable *var) +{ + /* Things composed of vec4's and varyings with explicitly assigned + * locations don't need lowering. Everything else does. + */ + if (var->data.explicit_location) + return false; + + const glsl_type *type = var->type->without_array(); + if (type->vector_elements == 4 && !type->is_double()) + return false; + return true; +} + + +/** + * Visitor that splices varying packing code before every use of EmitVertex() + * in a geometry shader. + */ +class lower_packed_varyings_gs_splicer : public ir_hierarchical_visitor +{ +public: + explicit lower_packed_varyings_gs_splicer(void *mem_ctx, + const exec_list *instructions); + + virtual ir_visitor_status visit_leave(ir_emit_vertex *ev); + +private: + /** + * Memory context used to allocate new instructions for the shader. + */ + void * const mem_ctx; + + /** + * Instructions that should be spliced into place before each EmitVertex() + * call. + */ + const exec_list *instructions; +}; + + +lower_packed_varyings_gs_splicer::lower_packed_varyings_gs_splicer( + void *mem_ctx, const exec_list *instructions) + : mem_ctx(mem_ctx), instructions(instructions) +{ +} + + +ir_visitor_status +lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev) +{ + foreach_in_list(ir_instruction, ir, this->instructions) { + ev->insert_before(ir->clone(this->mem_ctx, NULL)); + } + return visit_continue; +} + + +void +lower_packed_varyings(void *mem_ctx, unsigned locations_used, + ir_variable_mode mode, unsigned gs_input_vertices, + gl_shader *shader) +{ + exec_list *instructions = shader->ir; + ir_function *main_func = shader->symbols->get_function("main"); + exec_list void_parameters; + ir_function_signature *main_func_sig + = main_func->matching_signature(NULL, &void_parameters, false); + exec_list new_instructions, new_variables; + lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode, + gs_input_vertices, + &new_instructions, + &new_variables); + visitor.run(shader); + if (mode == ir_var_shader_out) { + if (shader->Stage == MESA_SHADER_GEOMETRY) { + /* For geometry shaders, outputs need to be lowered before each call + * to EmitVertex() + */ + lower_packed_varyings_gs_splicer splicer(mem_ctx, &new_instructions); + + /* Add all the variables in first. */ + main_func_sig->body.head->insert_before(&new_variables); + + /* Now update all the EmitVertex instances */ + splicer.run(instructions); + } else { + /* For other shader types, outputs need to be lowered at the end of + * main() + */ + main_func_sig->body.append_list(&new_variables); + main_func_sig->body.append_list(&new_instructions); + } + } else { + /* Shader inputs need to be lowered at the beginning of main() */ + main_func_sig->body.head->insert_before(&new_instructions); + main_func_sig->body.head->insert_before(&new_variables); + } +} diff --git a/src/compiler/glsl/lower_packing_builtins.cpp b/src/compiler/glsl/lower_packing_builtins.cpp new file mode 100644 index 00000000000..7f18238bc6e --- /dev/null +++ b/src/compiler/glsl/lower_packing_builtins.cpp @@ -0,0 +1,1412 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_builder.h" +#include "ir_optimization.h" +#include "ir_rvalue_visitor.h" + +namespace { + +using namespace ir_builder; + +/** + * A visitor that lowers built-in floating-point pack/unpack expressions + * such packSnorm2x16. + */ +class lower_packing_builtins_visitor : public ir_rvalue_visitor { +public: + /** + * \param op_mask is a bitmask of `enum lower_packing_builtins_op` + */ + explicit lower_packing_builtins_visitor(int op_mask) + : op_mask(op_mask), + progress(false) + { + /* Mutually exclusive options. */ + assert(!((op_mask & LOWER_PACK_HALF_2x16) && + (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT))); + + assert(!((op_mask & LOWER_UNPACK_HALF_2x16) && + (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT))); + + factory.instructions = &factory_instructions; + } + + virtual ~lower_packing_builtins_visitor() + { + assert(factory_instructions.is_empty()); + } + + bool get_progress() { return progress; } + + void handle_rvalue(ir_rvalue **rvalue) + { + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr) + return; + + enum lower_packing_builtins_op lowering_op = + choose_lowering_op(expr->operation); + + if (lowering_op == LOWER_PACK_UNPACK_NONE) + return; + + setup_factory(ralloc_parent(expr)); + + ir_rvalue *op0 = expr->operands[0]; + ralloc_steal(factory.mem_ctx, op0); + + switch (lowering_op) { + case LOWER_PACK_SNORM_2x16: + *rvalue = lower_pack_snorm_2x16(op0); + break; + case LOWER_PACK_SNORM_4x8: + *rvalue = lower_pack_snorm_4x8(op0); + break; + case LOWER_PACK_UNORM_2x16: + *rvalue = lower_pack_unorm_2x16(op0); + break; + case LOWER_PACK_UNORM_4x8: + *rvalue = lower_pack_unorm_4x8(op0); + break; + case LOWER_PACK_HALF_2x16: + *rvalue = lower_pack_half_2x16(op0); + break; + case LOWER_PACK_HALF_2x16_TO_SPLIT: + *rvalue = split_pack_half_2x16(op0); + break; + case LOWER_UNPACK_SNORM_2x16: + *rvalue = lower_unpack_snorm_2x16(op0); + break; + case LOWER_UNPACK_SNORM_4x8: + *rvalue = lower_unpack_snorm_4x8(op0); + break; + case LOWER_UNPACK_UNORM_2x16: + *rvalue = lower_unpack_unorm_2x16(op0); + break; + case LOWER_UNPACK_UNORM_4x8: + *rvalue = lower_unpack_unorm_4x8(op0); + break; + case LOWER_UNPACK_HALF_2x16: + *rvalue = lower_unpack_half_2x16(op0); + break; + case LOWER_UNPACK_HALF_2x16_TO_SPLIT: + *rvalue = split_unpack_half_2x16(op0); + break; + case LOWER_PACK_UNPACK_NONE: + case LOWER_PACK_USE_BFI: + case LOWER_PACK_USE_BFE: + assert(!"not reached"); + break; + } + + teardown_factory(); + progress = true; + } + +private: + const int op_mask; + bool progress; + ir_factory factory; + exec_list factory_instructions; + + /** + * Determine the needed lowering operation by filtering \a expr_op + * through \ref op_mask. + */ + enum lower_packing_builtins_op + choose_lowering_op(ir_expression_operation expr_op) + { + /* C++ regards int and enum as fundamentally different types. + * So, we can't simply return from each case; we must cast the return + * value. + */ + int result; + + switch (expr_op) { + case ir_unop_pack_snorm_2x16: + result = op_mask & LOWER_PACK_SNORM_2x16; + break; + case ir_unop_pack_snorm_4x8: + result = op_mask & LOWER_PACK_SNORM_4x8; + break; + case ir_unop_pack_unorm_2x16: + result = op_mask & LOWER_PACK_UNORM_2x16; + break; + case ir_unop_pack_unorm_4x8: + result = op_mask & LOWER_PACK_UNORM_4x8; + break; + case ir_unop_pack_half_2x16: + result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); + break; + case ir_unop_unpack_snorm_2x16: + result = op_mask & LOWER_UNPACK_SNORM_2x16; + break; + case ir_unop_unpack_snorm_4x8: + result = op_mask & LOWER_UNPACK_SNORM_4x8; + break; + case ir_unop_unpack_unorm_2x16: + result = op_mask & LOWER_UNPACK_UNORM_2x16; + break; + case ir_unop_unpack_unorm_4x8: + result = op_mask & LOWER_UNPACK_UNORM_4x8; + break; + case ir_unop_unpack_half_2x16: + result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); + break; + default: + result = LOWER_PACK_UNPACK_NONE; + break; + } + + return static_cast(result); + } + + void + setup_factory(void *mem_ctx) + { + assert(factory.mem_ctx == NULL); + assert(factory.instructions->is_empty()); + + factory.mem_ctx = mem_ctx; + } + + void + teardown_factory() + { + base_ir->insert_before(factory.instructions); + assert(factory.instructions->is_empty()); + factory.mem_ctx = NULL; + } + + template + ir_constant* + constant(T x) + { + return factory.constant(x); + } + + /** + * \brief Pack two uint16's into a single uint32. + * + * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 + * where the least significant bits specify the first element of the pair. + * Return the uint32. + */ + ir_rvalue* + pack_uvec2_to_uint(ir_rvalue *uvec2_rval) + { + assert(uvec2_rval->type == glsl_type::uvec2_type); + + /* uvec2 u = UVEC2_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_uvec2_to_uint"); + factory.emit(assign(u, uvec2_rval)); + + if (op_mask & LOWER_PACK_USE_BFI) { + return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), + swizzle_y(u), + constant(16u), + constant(16u)); + } + + /* return (u.y << 16) | (u.x & 0xffff); */ + return bit_or(lshift(swizzle_y(u), constant(16u)), + bit_and(swizzle_x(u), constant(0xffffu))); + } + + /** + * \brief Pack four uint8's into a single uint32. + * + * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a + * uint32 where the least significant bits specify the first element of the + * 4-tuple. Return the uint32. + */ + ir_rvalue* + pack_uvec4_to_uint(ir_rvalue *uvec4_rval) + { + assert(uvec4_rval->type == glsl_type::uvec4_type); + + ir_variable *u = factory.make_temp(glsl_type::uvec4_type, + "tmp_pack_uvec4_to_uint"); + + if (op_mask & LOWER_PACK_USE_BFI) { + /* uvec4 u = UVEC4_RVAL; */ + factory.emit(assign(u, uvec4_rval)); + + return bitfield_insert(bitfield_insert( + bitfield_insert( + bit_and(swizzle_x(u), constant(0xffu)), + swizzle_y(u), constant(8u), constant(8u)), + swizzle_z(u), constant(16u), constant(8u)), + swizzle_w(u), constant(24u), constant(8u)); + } + + /* uvec4 u = UVEC4_RVAL & 0xff */ + factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); + + /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ + return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), + lshift(swizzle_z(u), constant(16u))), + bit_or(lshift(swizzle_y(u), constant(8u)), + swizzle_x(u))); + } + + /** + * \brief Unpack a uint32 into two uint16's. + * + * Interpret the given uint32 as a uint16 pair where the uint32's least + * significant bits specify the pair's first element. Return the uint16 + * pair as a uvec2. + */ + ir_rvalue* + unpack_uint_to_uvec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_uint_to_uvec2_u"); + factory.emit(assign(u, uint_rval)); + + /* uvec2 u2; */ + ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_uint_to_uvec2_u2"); + + /* u2.x = u & 0xffffu; */ + factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); + + /* u2.y = u >> 16u; */ + factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); + + return deref(u2).val; + } + + /** + * \brief Unpack a uint32 into two int16's. + * + * Specifically each 16-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec2(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), + constant(16u)), + constant(16u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec2_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec2 i2; */ + ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, + "tmp_unpack_uint_to_ivec2_i2"); + + factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), + WRITEMASK_X)); + factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), + WRITEMASK_Y)); + + return deref(i2).val; + } + + /** + * \brief Unpack a uint32 into four uint8's. + * + * Interpret the given uint32 as a uint8 4-tuple where the uint32's least + * significant bits specify the 4-tuple's first element. Return the uint8 + * 4-tuple as a uvec4. + */ + ir_rvalue* + unpack_uint_to_uvec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_uint_to_uvec4_u"); + factory.emit(assign(u, uint_rval)); + + /* uvec4 u4; */ + ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, + "tmp_unpack_uint_to_uvec4_u4"); + + /* u4.x = u & 0xffu; */ + factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); + + if (op_mask & LOWER_PACK_USE_BFE) { + /* u4.y = bitfield_extract(u, 8, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)), + WRITEMASK_Y)); + + /* u4.z = bitfield_extract(u, 16, 8); */ + factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)), + WRITEMASK_Z)); + } else { + /* u4.y = (u >> 8u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), + constant(0xffu)), WRITEMASK_Y)); + + /* u4.z = (u >> 16u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), + constant(0xffu)), WRITEMASK_Z)); + } + + /* u4.w = (u >> 24u) */ + factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); + + return deref(u4).val; + } + + /** + * \brief Unpack a uint32 into four int8's. + * + * Specifically each 8-bit value is sign-extended to the full width of an + * int32 on return. + */ + ir_rvalue * + unpack_uint_to_ivec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + if (!(op_mask & LOWER_PACK_USE_BFE)) { + return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), + constant(24u)), + constant(24u)); + } + + ir_variable *i = factory.make_temp(glsl_type::int_type, + "tmp_unpack_uint_to_ivec4_i"); + factory.emit(assign(i, u2i(uint_rval))); + + /* ivec4 i4; */ + ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, + "tmp_unpack_uint_to_ivec4_i4"); + + factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), + WRITEMASK_X)); + factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), + WRITEMASK_Y)); + factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), + WRITEMASK_Z)); + factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), + WRITEMASK_W)); + + return deref(i4).val; + } + + /** + * \brief Lower a packSnorm2x16 expression. + * + * \param vec2_rval is packSnorm2x16's input + * \return packSnorm2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_snorm_2x16(ir_rvalue *vec2_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packSnorm2x16(vec2 v) + * -------------------------------- + * First, converts each component of the normalized floating-point value + * v into 16-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec2_to_uint( + * uvec2(ivec2( + * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); + * + * It is necessary to first convert the vec2 to ivec2 rather than directly + * converting vec2 to uvec2 because the latter conversion is undefined. + * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to + * convert a negative floating point value to an uint". + */ + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_rvalue *result = pack_uvec2_to_uint( + i2u(f2i(round_even(mul(clamp(vec2_rval, + constant(-1.0f), + constant(1.0f)), + constant(32767.0f)))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower a packSnorm4x8 expression. + * + * \param vec4_rval is packSnorm4x8's input + * \return packSnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_snorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packSnorm4x8(vec4 v) + * ------------------------------- + * First, converts each component of the normalized floating-point value + * v into 8-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint( + * uvec4(ivec4( + * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); + * + * It is necessary to first convert the vec4 to ivec4 rather than directly + * converting vec4 to uvec4 because the latter conversion is undefined. + * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to + * convert a negative floating point value to an uint". + */ + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + i2u(f2i(round_even(mul(clamp(vec4_rval, + constant(-1.0f), + constant(1.0f)), + constant(127.0f)))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower an unpackSnorm2x16 expression. + * + * \param uint_rval is unpackSnorm2x16's input + * \return unpackSnorm2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_snorm_2x16(ir_rvalue *uint_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp vec2 unpackSnorm2x16 (highp uint p) + * ----------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into a pair of + * 16-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm2x16: clamp(f / 32767.0, -1,+1) + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return clamp( + * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, + * -1.0f, 1.0f); + * + * The above IR may appear unnecessarily complex, but the intermediate + * conversion to ivec2 and the bit shifts are necessary to correctly unpack + * negative floats. + * + * To see why, consider packing and then unpacking vec2(-1.0, 0.0). + * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we + * place that int16 into an int32, which results in the *positive* integer + * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather + * unimportant bit 16. We must now extend the int16's sign bit into bits + * 17-32, which is accomplished by left-shifting then right-shifting. + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = + clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), + constant(32767.0f)), + constant(-1.0f), + constant(1.0f)); + + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Lower an unpackSnorm4x8 expression. + * + * \param uint_rval is unpackSnorm4x8's input + * \return unpackSnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_snorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackSnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * four-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return clamp( + * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, + * -1.0f, 1.0f); + * + * The above IR may appear unnecessarily complex, but the intermediate + * conversion to ivec4 and the bit shifts are necessary to correctly unpack + * negative floats. + * + * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, + * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we + * place that int8 into an int32, which results in the *positive* integer + * 0x000000ff. The int8's sign bit becomes, in the int32, the rather + * unimportant bit 8. We must now extend the int8's sign bit into bits + * 9-32, which is accomplished by left-shifting then right-shifting. + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = + clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), + constant(127.0f)), + constant(-1.0f), + constant(1.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** + * \brief Lower a packUnorm2x16 expression. + * + * \param vec2_rval is packUnorm2x16's input + * \return packUnorm2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_unorm_2x16(ir_rvalue *vec2_rval) + { + /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packUnorm2x16 (vec2 v) + * --------------------------------- + * First, converts each component of the normalized floating-point value + * v into 16-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec2_to_uint(uvec2( + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); + * + * Here it is safe to directly convert the vec2 to uvec2 because the vec2 + * has been clamped to a non-negative range. + */ + + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_rvalue *result = pack_uvec2_to_uint( + f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower a packUnorm4x8 expression. + * + * \param vec4_rval is packUnorm4x8's input + * \return packUnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_unorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packUnorm4x8 (vec4 v) + * -------------------------------- + * First, converts each component of the normalized floating-point value + * v into 8-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint(uvec4( + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); + * + * Here it is safe to directly convert the vec4 to uvec4 because the vec4 + * has been clamped to a non-negative range. + */ + + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Lower an unpackUnorm2x16 expression. + * + * \param uint_rval is unpackUnorm2x16's input + * \return unpackUnorm2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_unorm_2x16(ir_rvalue *uint_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * highp vec2 unpackUnorm2x16 (highp uint p) + * ----------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into a pair of + * 16-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm2x16: f / 65535.0 + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), + constant(65535.0f)); + + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Lower an unpackUnorm4x8 expression. + * + * \param uint_rval is unpackUnorm4x8's input + * \return unpackUnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_unorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackUnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), + constant(255.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** + * \brief Lower the component-wise calculation of packHalf2x16. + * + * \param f_rval is one component of packHafl2x16's input + * \param e_rval is the unshifted exponent bits of f_rval + * \param m_rval is the unshifted mantissa bits of f_rval + * + * \return a uint rvalue that encodes a float16 in its lower 16 bits + */ + ir_rvalue* + pack_half_1x16_nosign(ir_rvalue *f_rval, + ir_rvalue *e_rval, + ir_rvalue *m_rval) + { + assert(e_rval->type == glsl_type::uint_type); + assert(m_rval->type == glsl_type::uint_type); + + /* uint u16; */ + ir_variable *u16 = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_u16"); + + /* float f = FLOAT_RVAL; */ + ir_variable *f = factory.make_temp(glsl_type::float_type, + "tmp_pack_half_1x16_f"); + factory.emit(assign(f, f_rval)); + + /* uint e = E_RVAL; */ + ir_variable *e = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_e"); + factory.emit(assign(e, e_rval)); + + /* uint m = M_RVAL; */ + ir_variable *m = factory.make_temp(glsl_type::uint_type, + "tmp_pack_half_1x16_m"); + factory.emit(assign(m, m_rval)); + + /* Preliminaries + * ------------- + * + * For a float16, the bit layout is: + * + * sign: 15 + * exponent: 10:14 + * mantissa: 0:9 + * + * Let f16 be a float16 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) + * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) + * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) + * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) + * if e16 = 31 and m16 != 0, then NaN (5) + * + * where 0 <= m16 < 2^10. + * + * For a float32, the bit layout is: + * + * sign: 31 + * exponent: 23:30 + * mantissa: 0:22 + * + * Let f32 be a float32 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) + * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) + * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) + * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) + * if e32 = 255 and m32 != 0, then NaN (14) + * + * where 0 <= m32 < 2^23. + * + * The minimum and maximum normal float16 values are + * + * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20) + * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21) + * + * The step at max_norm16 is + * + * max_step16 = 2^5 (22) + * + * Observe that the float16 boundary values in equations 20-21 lie in the + * range of normal float32 values. + * + * + * Rounding Behavior + * ----------------- + * Not all float32 values can be exactly represented as a float16. We + * round all such intermediate float32 values to the nearest float16; if + * the float32 is exactly between to float16 values, we round to the one + * with an even mantissa. This rounding behavior has several benefits: + * + * - It has no sign bias. + * + * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's + * GPU ISA. + * + * - By reproducing the behavior of the GPU (at least on Intel hardware), + * compile-time evaluation of constant packHalf2x16 GLSL expressions will + * result in the same value as if the expression were executed on the + * GPU. + * + * Calculation + * ----------- + * Our task is to compute s16, e16, m16 given f32. Since this function + * ignores the sign bit, assume that s32 = s16 = 0. There are several + * cases consider. + */ + + factory.emit( + + /* Case 1) f32 is NaN + * + * The resultant f16 will also be NaN. + */ + + /* if (e32 == 255 && m32 != 0) { */ + if_tree(logic_and(equal(e, constant(0xffu << 23u)), + logic_not(equal(m, constant(0u)))), + + assign(u16, constant(0x7fffu)), + + /* Case 2) f32 lies in the range [0, min_norm16). + * + * The resultant float16 will be either zero, subnormal, or normal. + * + * Solving + * + * f32 = min_norm16 (30) + * + * gives + * + * e32 = 113 and m32 = 0 (31) + * + * Therefore this case occurs if and only if + * + * e32 < 113 (32) + */ + + /* } else if (e32 < 113) { */ + if_tree(less(e, constant(113u << 23u)), + + /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ + assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), + constant((float) (1 << 24)))))), + + /* Case 3) f32 lies in the range + * [min_norm16, max_norm16 + max_step16). + * + * The resultant float16 will be either normal or infinite. + * + * Solving + * + * f32 = max_norm16 + max_step16 (40) + * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41) + * = 2^16 (42) + * gives + * + * e32 = 143 and m32 = 0 (43) + * + * We already solved the boundary condition f32 = min_norm16 above + * in equation 31. Therefore this case occurs if and only if + * + * 113 <= e32 and e32 < 143 + */ + + /* } else if (e32 < 143) { */ + if_tree(less(e, constant(143u << 23u)), + + /* The addition below handles the case where the mantissa rounds + * up to 1024 and bumps the exponent. + * + * u16 = ((e - (112u << 23u)) >> 13u) + * + round_to_even((float(m) / (1u << 13u)); + */ + assign(u16, add(rshift(sub(e, constant(112u << 23u)), + constant(13u)), + f2u(round_even( + div(u2f(m), constant((float) (1 << 13))))))), + + /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. + * + * The resultant float16 will be infinite. + * + * The cases above caught all float32 values in the range + * [0, max_norm16 + max_step16), so this is the fall-through case. + */ + + /* } else { */ + + assign(u16, constant(31u << 10u)))))); + + /* } */ + + return deref(u16).val; + } + + /** + * \brief Lower a packHalf2x16 expression. + * + * \param vec2_rval is packHalf2x16's input + * \return packHalf2x16's output as a uint rvalue + */ + ir_rvalue* + lower_pack_half_2x16(ir_rvalue *vec2_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * highp uint packHalf2x16 (mediump vec2 v) + * ---------------------------------------- + * Returns an unsigned integer obtained by converting the components of + * a two-component floating-point vector to the 16-bit floating-point + * representation found in the OpenGL ES Specification, and then packing + * these two 16-bit integers into a 32-bit unsigned integer. + * + * The first vector component specifies the 16 least- significant bits + * of the result; the second component specifies the 16 most-significant + * bits. + */ + + assert(vec2_rval->type == glsl_type::vec2_type); + + /* vec2 f = VEC2_RVAL; */ + ir_variable *f = factory.make_temp(glsl_type::vec2_type, + "tmp_pack_half_2x16_f"); + factory.emit(assign(f, vec2_rval)); + + /* uvec2 f32 = bitcast_f2u(f); */ + ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_f32"); + factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); + + /* uvec2 f16; */ + ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_f16"); + + /* Get f32's unshifted exponent bits. + * + * uvec2 e = f32 & 0x7f800000u; + */ + ir_variable *e = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_e"); + factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); + + /* Get f32's unshifted mantissa bits. + * + * uvec2 m = f32 & 0x007fffffu; + */ + ir_variable *m = factory.make_temp(glsl_type::uvec2_type, + "tmp_pack_half_2x16_m"); + factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); + + /* Set f16's exponent and mantissa bits. + * + * f16.x = pack_half_1x16_nosign(e.x, m.x); + * f16.y = pack_half_1y16_nosign(e.y, m.y); + */ + factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), + swizzle_x(e), + swizzle_x(m)), + WRITEMASK_X)); + factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), + swizzle_y(e), + swizzle_y(m)), + WRITEMASK_Y)); + + /* Set f16's sign bits. + * + * f16 |= (f32 & (1u << 31u) >> 16u; + */ + factory.emit( + assign(f16, bit_or(f16, + rshift(bit_and(f32, constant(1u << 31u)), + constant(16u))))); + + + /* return (f16.y << 16u) | f16.x; */ + ir_rvalue *result = bit_or(lshift(swizzle_y(f16), + constant(16u)), + swizzle_x(f16)); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** + * \brief Split packHalf2x16's vec2 operand into two floats. + * + * \param vec2_rval is packHalf2x16's input + * \return a uint rvalue + * + * Some code generators, such as the i965 fragment shader, require that all + * vector expressions be lowered to a sequence of scalar expressions. + * However, packHalf2x16 cannot be scalarized by the same mechanism as + * a true vector operation because its input and output have a differing + * number of vector components. + * + * This method scalarizes packHalf2x16 by transforming it from an unary + * operation having vector input to a binary operation having scalar input. + * That is, it transforms + * + * packHalf2x16(VEC2_RVAL); + * + * into + * + * vec2 v = VEC2_RVAL; + * return packHalf2x16_split(v.x, v.y); + */ + ir_rvalue* + split_pack_half_2x16(ir_rvalue *vec2_rval) + { + assert(vec2_rval->type == glsl_type::vec2_type); + + ir_variable *v = factory.make_temp(glsl_type::vec2_type, + "tmp_split_pack_half_2x16_v"); + factory.emit(assign(v, vec2_rval)); + + return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v)); + } + + /** + * \brief Lower the component-wise calculation of unpackHalf2x16. + * + * Given a uint that encodes a float16 in its lower 16 bits, this function + * returns a uint that encodes a float32 with the same value. The sign bit + * of the float16 is ignored. + * + * \param e_rval is the unshifted exponent bits of a float16 + * \param m_rval is the unshifted mantissa bits of a float16 + * \param a uint rvalue that encodes a float32 + */ + ir_rvalue* + unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) + { + assert(e_rval->type == glsl_type::uint_type); + assert(m_rval->type == glsl_type::uint_type); + + /* uint u32; */ + ir_variable *u32 = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_u32"); + + /* uint e = E_RVAL; */ + ir_variable *e = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_e"); + factory.emit(assign(e, e_rval)); + + /* uint m = M_RVAL; */ + ir_variable *m = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_half_1x16_m"); + factory.emit(assign(m, m_rval)); + + /* Preliminaries + * ------------- + * + * For a float16, the bit layout is: + * + * sign: 15 + * exponent: 10:14 + * mantissa: 0:9 + * + * Let f16 be a float16 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) + * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) + * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) + * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) + * if e16 = 31 and m16 != 0, then NaN (5) + * + * where 0 <= m16 < 2^10. + * + * For a float32, the bit layout is: + * + * sign: 31 + * exponent: 23:30 + * mantissa: 0:22 + * + * Let f32 be a float32 value. The sign, exponent, and mantissa + * determine its value thus: + * + * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) + * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) + * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) + * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) + * if e32 = 255 and m32 != 0, then NaN (14) + * + * where 0 <= m32 < 2^23. + * + * Calculation + * ----------- + * Our task is to compute s32, e32, m32 given f16. Since this function + * ignores the sign bit, assume that s32 = s16 = 0. There are several + * cases consider. + */ + + factory.emit( + + /* Case 1) f16 is zero or subnormal. + * + * The simplest method of calcuating f32 in this case is + * + * f32 = f16 (20) + * = 2^(-14) * (m16 / 2^10) (21) + * = m16 / 2^(-24) (22) + */ + + /* if (e16 == 0) { */ + if_tree(equal(e, constant(0u)), + + /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ + assign(u32, expr(ir_unop_bitcast_f2u, + div(u2f(m), constant((float)(1 << 24))))), + + /* Case 2) f16 is normal. + * + * The equation + * + * f32 = f16 (30) + * 2^(e32 - 127) * (1 + m32 / 2^23) = (31) + * 2^(e16 - 15) * (1 + m16 / 2^10) + * + * can be decomposed into two + * + * 2^(e32 - 127) = 2^(e16 - 15) (32) + * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33) + * + * which solve to + * + * e32 = e16 + 112 (34) + * m32 = m16 * 2^13 (35) + */ + + /* } else if (e16 < 31)) { */ + if_tree(less(e, constant(31u << 10u)), + + /* u32 = ((e + (112 << 10)) | m) << 13; + */ + assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), + constant(13u))), + + + /* Case 3) f16 is infinite. */ + if_tree(equal(m, constant(0u)), + + assign(u32, constant(255u << 23u)), + + /* Case 4) f16 is NaN. */ + /* } else { */ + + assign(u32, constant(0x7fffffffu)))))); + + /* } */ + + return deref(u32).val; + } + + /** + * \brief Lower an unpackHalf2x16 expression. + * + * \param uint_rval is unpackHalf2x16's input + * \return unpackHalf2x16's output as a vec2 rvalue + */ + ir_rvalue* + lower_unpack_half_2x16(ir_rvalue *uint_rval) + { + /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: + * + * mediump vec2 unpackHalf2x16 (highp uint v) + * ------------------------------------------ + * Returns a two-component floating-point vector with components + * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit + * values, interpreting those values as 16-bit floating-point numbers + * according to the OpenGL ES Specification, and converting them to + * 32-bit floating-point values. + * + * The first component of the vector is obtained from the + * 16 least-significant bits of v; the second component is obtained + * from the 16 most-significant bits of v. + */ + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = RVALUE; + * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); + */ + ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_f16"); + factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); + + /* uvec2 f32; */ + ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_f32"); + + /* Get f16's unshifted exponent bits. + * + * uvec2 e = f16 & 0x7c00u; + */ + ir_variable *e = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_e"); + factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); + + /* Get f16's unshifted mantissa bits. + * + * uvec2 m = f16 & 0x03ffu; + */ + ir_variable *m = factory.make_temp(glsl_type::uvec2_type, + "tmp_unpack_half_2x16_m"); + factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); + + /* Set f32's exponent and mantissa bits. + * + * f32.x = unpack_half_1x16_nosign(e.x, m.x); + * f32.y = unpack_half_1x16_nosign(e.y, m.y); + */ + factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), + swizzle_x(m)), + WRITEMASK_X)); + factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), + swizzle_y(m)), + WRITEMASK_Y)); + + /* Set f32's sign bit. + * + * f32 |= (f16 & 0x8000u) << 16u; + */ + factory.emit(assign(f32, bit_or(f32, + lshift(bit_and(f16, + constant(0x8000u)), + constant(16u))))); + + /* return bitcast_u2f(f32); */ + ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); + assert(result->type == glsl_type::vec2_type); + return result; + } + + /** + * \brief Split unpackHalf2x16 into two operations. + * + * \param uint_rval is unpackHalf2x16's input + * \return a vec2 rvalue + * + * Some code generators, such as the i965 fragment shader, require that all + * vector expressions be lowered to a sequence of scalar expressions. + * However, unpackHalf2x16 cannot be scalarized by the same method as + * a true vector operation because the number of components of its input + * and output differ. + * + * This method scalarizes unpackHalf2x16 by transforming it from a single + * operation having vec2 output to a pair of operations each having float + * output. That is, it transforms + * + * unpackHalf2x16(UINT_RVAL) + * + * into + * + * uint u = UINT_RVAL; + * vec2 v; + * + * v.x = unpackHalf2x16_split_x(u); + * v.y = unpackHalf2x16_split_y(u); + * + * return v; + */ + ir_rvalue* + split_unpack_half_2x16(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = uint_rval; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_split_unpack_half_2x16_u"); + factory.emit(assign(u, uint_rval)); + + /* vec2 v; */ + ir_variable *v = factory.make_temp(glsl_type::vec2_type, + "tmp_split_unpack_half_2x16_v"); + + /* v.x = unpack_half_2x16_split_x(u); */ + factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u), + WRITEMASK_X)); + + /* v.y = unpack_half_2x16_split_y(u); */ + factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u), + WRITEMASK_Y)); + + return deref(v).val; + } +}; + +} // namespace anonymous + +/** + * \brief Lower the builtin packing functions. + * + * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. + */ +bool +lower_packing_builtins(exec_list *instructions, int op_mask) +{ + lower_packing_builtins_visitor v(op_mask); + visit_list_elements(&v, instructions, true); + return v.get_progress(); +} diff --git a/src/compiler/glsl/lower_shared_reference.cpp b/src/compiler/glsl/lower_shared_reference.cpp new file mode 100644 index 00000000000..533cd9202f4 --- /dev/null +++ b/src/compiler/glsl/lower_shared_reference.cpp @@ -0,0 +1,496 @@ +/* + * Copyright (c) 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_shared_reference.cpp + * + * IR lower pass to replace dereferences of compute shader shared variables + * with intrinsic function calls. + * + * This relieves drivers of the responsibility of allocating space for the + * shared variables in the shared memory region. + */ + +#include "lower_buffer_access.h" +#include "ir_builder.h" +#include "main/macros.h" +#include "util/list.h" +#include "glsl_parser_extras.h" + +using namespace ir_builder; + +namespace { + +struct var_offset { + struct list_head node; + const ir_variable *var; + unsigned offset; +}; + +class lower_shared_reference_visitor : + public lower_buffer_access::lower_buffer_access { +public: + + lower_shared_reference_visitor(struct gl_shader *shader) + : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u) + { + list_inithead(&var_offsets); + } + + ~lower_shared_reference_visitor() + { + ralloc_free(list_ctx); + } + + enum { + shared_load_access, + shared_store_access, + shared_atomic_access, + } buffer_access_type; + + void insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, + unsigned mask, int channel); + + void handle_rvalue(ir_rvalue **rvalue); + ir_visitor_status visit_enter(ir_assignment *ir); + void handle_assignment(ir_assignment *ir); + + ir_call *lower_shared_atomic_intrinsic(ir_call *ir); + ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); + ir_visitor_status visit_enter(ir_call *ir); + + unsigned get_shared_offset(const ir_variable *); + + ir_call *shared_load(void *mem_ctx, const struct glsl_type *type, + ir_rvalue *offset); + ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, + unsigned write_mask); + + void *list_ctx; + struct gl_shader *shader; + struct list_head var_offsets; + unsigned shared_size; + bool progress; +}; + +unsigned +lower_shared_reference_visitor::get_shared_offset(const ir_variable *var) +{ + list_for_each_entry(var_offset, var_entry, &var_offsets, node) { + if (var_entry->var == var) + return var_entry->offset; + } + + struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset); + list_add(&new_entry->node, &var_offsets); + new_entry->var = var; + + unsigned var_align = var->type->std430_base_alignment(false); + new_entry->offset = glsl_align(shared_size, var_align); + + unsigned var_size = var->type->std430_size(false); + shared_size = new_entry->offset + var_size; + + return new_entry->offset; +} + +void +lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + if (!deref) + return; + + ir_variable *var = deref->variable_referenced(); + if (!var || var->data.mode != ir_var_shader_shared) + return; + + buffer_access_type = shared_load_access; + + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + assert(var->get_interface_type() == NULL); + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + /* Now that we've calculated the offset to the start of the + * dereference, walk over the type and emit loads into a temporary. + */ + const glsl_type *type = (*rvalue)->type; + ir_variable *load_var = new(mem_ctx) ir_variable(type, + "shared_load_temp", + ir_var_temporary); + base_ir->insert_before(load_var); + + ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, + "shared_load_temp_offset", + ir_var_temporary); + base_ir->insert_before(load_offset); + base_ir->insert_before(assign(load_offset, offset)); + + deref = new(mem_ctx) ir_dereference_variable(load_var); + + emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major, + matrix_columns, packing, 0); + + *rvalue = deref; + + progress = true; +} + +void +lower_shared_reference_visitor::handle_assignment(ir_assignment *ir) +{ + if (!ir || !ir->lhs) + return; + + ir_rvalue *rvalue = ir->lhs->as_rvalue(); + if (!rvalue) + return; + + ir_dereference *deref = ir->lhs->as_dereference(); + if (!deref) + return; + + ir_variable *var = ir->lhs->variable_referenced(); + if (!var || var->data.mode != ir_var_shader_shared) + return; + + buffer_access_type = shared_store_access; + + /* We have a write to a shared variable, so declare a temporary and rewrite + * the assignment so that the temporary is the LHS. + */ + void *mem_ctx = ralloc_parent(shader->ir); + + const glsl_type *type = rvalue->type; + ir_variable *store_var = new(mem_ctx) ir_variable(type, + "shared_store_temp", + ir_var_temporary); + base_ir->insert_before(store_var); + ir->lhs = new(mem_ctx) ir_dereference_variable(store_var); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + assert(var->get_interface_type() == NULL); + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + deref = new(mem_ctx) ir_dereference_variable(store_var); + + ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, + "shared_store_temp_offset", + ir_var_temporary); + base_ir->insert_before(store_offset); + base_ir->insert_before(assign(store_offset, offset)); + + /* Now we have to write the value assigned to the temporary back to memory */ + emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major, + matrix_columns, packing, ir->write_mask); + + progress = true; +} + +ir_visitor_status +lower_shared_reference_visitor::visit_enter(ir_assignment *ir) +{ + handle_assignment(ir); + return rvalue_visit(ir); +} + +void +lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx, + ir_dereference *deref, + const glsl_type *type, + ir_rvalue *offset, + unsigned mask, + int channel) +{ + if (buffer_access_type == shared_store_access) { + ir_call *store = shared_store(mem_ctx, deref, offset, mask); + base_ir->insert_after(store); + } else { + ir_call *load = shared_load(mem_ctx, type, offset); + base_ir->insert_before(load); + ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL); + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), + value)); + } +} + +static bool +compute_shader_enabled(const _mesa_glsl_parse_state *state) +{ + return state->stage == MESA_SHADER_COMPUTE; +} + +ir_call * +lower_shared_reference_visitor::shared_store(void *mem_ctx, + ir_rvalue *deref, + ir_rvalue *offset, + unsigned write_mask) +{ + exec_list sig_params; + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_variable *val_ref = new(mem_ctx) + ir_variable(deref->type, "value" , ir_var_function_in); + sig_params.push_tail(val_ref); + + ir_variable *writemask_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); + sig_params.push_tail(writemask_ref); + + ir_function_signature *sig = new(mem_ctx) + ir_function_signature(glsl_type::void_type, compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared"); + f->add_signature(sig); + + exec_list call_params; + call_params.push_tail(offset->clone(mem_ctx, NULL)); + call_params.push_tail(deref->clone(mem_ctx, NULL)); + call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); + return new(mem_ctx) ir_call(sig, NULL, &call_params); +} + +ir_call * +lower_shared_reference_visitor::shared_load(void *mem_ctx, + const struct glsl_type *type, + ir_rvalue *offset) +{ + exec_list sig_params; + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(type, compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared"); + f->add_signature(sig); + + ir_variable *result = new(mem_ctx) + ir_variable(type, "shared_load_result", ir_var_temporary); + base_ir->insert_before(result); + ir_dereference_variable *deref_result = new(mem_ctx) + ir_dereference_variable(result); + + exec_list call_params; + call_params.push_tail(offset->clone(mem_ctx, NULL)); + + return new(mem_ctx) ir_call(sig, deref_result, &call_params); +} + +/* Lowers the intrinsic call to a new internal intrinsic that swaps the access + * to the shared variable in the first parameter by an offset. This involves + * creating the new internal intrinsic (i.e. the new function signature). + */ +ir_call * +lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) +{ + /* Shared atomics usually have 2 parameters, the shared variable and an + * integer argument. The exception is CompSwap, that has an additional + * integer parameter. + */ + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* First argument must be a scalar integer shared variable */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + assert(inst->ir_type == ir_type_dereference_variable || + inst->ir_type == ir_type_dereference_array || + inst->ir_type == ir_type_dereference_record || + inst->ir_type == ir_type_swizzle); + + ir_rvalue *deref = (ir_rvalue *) inst; + assert(deref->type->is_scalar() && deref->type->is_integer()); + + ir_variable *var = deref->variable_referenced(); + assert(var); + + /* Compute the offset to the start if the dereference + */ + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset = get_shared_offset(var); + bool row_major; + int matrix_columns; + assert(var->get_interface_type() == NULL); + const unsigned packing = GLSL_INTERFACE_PACKING_STD430; + buffer_access_type = shared_atomic_access; + + setup_buffer_access(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, packing); + + assert(offset); + assert(!row_major); + assert(matrix_columns == 1); + + ir_rvalue *deref_offset = + add(offset, new(mem_ctx) ir_constant(const_offset)); + + /* Create the new internal function signature that will take an offset + * instead of a shared variable + */ + exec_list sig_params; + ir_variable *sig_param = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(sig_param); + + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? + glsl_type::int_type : glsl_type::uint_type; + sig_param = new(mem_ctx) + ir_variable(type, "data1", ir_var_function_in); + sig_params.push_tail(sig_param); + + if (param_count == 3) { + sig_param = new(mem_ctx) + ir_variable(type, "data2", ir_var_function_in); + sig_params.push_tail(sig_param); + } + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(deref->type, + compute_shader_enabled); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + char func_name[64]; + sprintf(func_name, "%s_shared", ir->callee_name()); + ir_function *f = new(mem_ctx) ir_function(func_name); + f->add_signature(sig); + + /* Now, create the call to the internal intrinsic */ + exec_list call_params; + call_params.push_tail(deref_offset); + param = ir->actual_parameters.get_head()->get_next(); + ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + if (param_count == 3) { + param = param->get_next(); + param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + } + ir_dereference_variable *return_deref = + ir->return_deref->clone(mem_ctx, NULL); + return new(mem_ctx) ir_call(sig, return_deref, &call_params); +} + +ir_call * +lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir) +{ + exec_list& params = ir->actual_parameters; + + if (params.length() < 2 || params.length() > 3) + return ir; + + ir_rvalue *rvalue = + ((ir_instruction *) params.get_head())->as_rvalue(); + if (!rvalue) + return ir; + + ir_variable *var = rvalue->variable_referenced(); + if (!var || var->data.mode != ir_var_shader_shared) + return ir; + + const char *callee = ir->callee_name(); + if (!strcmp("__intrinsic_atomic_add", callee) || + !strcmp("__intrinsic_atomic_min", callee) || + !strcmp("__intrinsic_atomic_max", callee) || + !strcmp("__intrinsic_atomic_and", callee) || + !strcmp("__intrinsic_atomic_or", callee) || + !strcmp("__intrinsic_atomic_xor", callee) || + !strcmp("__intrinsic_atomic_exchange", callee) || + !strcmp("__intrinsic_atomic_comp_swap", callee)) { + return lower_shared_atomic_intrinsic(ir); + } + + return ir; +} + +ir_visitor_status +lower_shared_reference_visitor::visit_enter(ir_call *ir) +{ + ir_call *new_ir = check_for_shared_atomic_intrinsic(ir); + if (new_ir != ir) { + progress = true; + base_ir->replace_with(new_ir); + return visit_continue_with_parent; + } + + return rvalue_visit(ir); +} + +} /* unnamed namespace */ + +void +lower_shared_reference(struct gl_shader *shader, unsigned *shared_size) +{ + if (shader->Stage != MESA_SHADER_COMPUTE) + return; + + lower_shared_reference_visitor v(shader); + + /* Loop over the instructions lowering references, because we take a deref + * of an shared variable array using a shared variable dereference as the + * index will produce a collection of instructions all of which have cloned + * shared variable dereferences for that array index. + */ + do { + v.progress = false; + visit_list_elements(&v, shader->ir); + } while (v.progress); + + *shared_size = v.shared_size; +} diff --git a/src/compiler/glsl/lower_subroutine.cpp b/src/compiler/glsl/lower_subroutine.cpp new file mode 100644 index 00000000000..e80c1be768a --- /dev/null +++ b/src/compiler/glsl/lower_subroutine.cpp @@ -0,0 +1,123 @@ +/* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_subroutine.cpp + * + * lowers subroutines to an if ladder. + */ + +#include "compiler/glsl_types.h" +#include "glsl_parser_extras.h" +#include "ir.h" +#include "ir_builder.h" + +using namespace ir_builder; +namespace { + +class lower_subroutine_visitor : public ir_hierarchical_visitor { +public: + lower_subroutine_visitor(struct _mesa_glsl_parse_state *state) + : state(state) + { + this->progress = false; + } + + ir_visitor_status visit_leave(ir_call *); + ir_call *call_clone(ir_call *call, ir_function_signature *callee); + bool progress; + struct _mesa_glsl_parse_state *state; +}; + +} + +bool +lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state) +{ + lower_subroutine_visitor v(state); + visit_list_elements(&v, instructions); + return v.progress; +} + +ir_call * +lower_subroutine_visitor::call_clone(ir_call *call, ir_function_signature *callee) +{ + void *mem_ctx = ralloc_parent(call); + ir_dereference_variable *new_return_ref = NULL; + if (call->return_deref != NULL) + new_return_ref = call->return_deref->clone(mem_ctx, NULL); + + exec_list new_parameters; + + foreach_in_list(ir_instruction, ir, &call->actual_parameters) { + new_parameters.push_tail(ir->clone(mem_ctx, NULL)); + } + + return new(mem_ctx) ir_call(callee, new_return_ref, &new_parameters); +} + +ir_visitor_status +lower_subroutine_visitor::visit_leave(ir_call *ir) +{ + if (!ir->sub_var) + return visit_continue; + + void *mem_ctx = ralloc_parent(ir); + ir_if *last_branch = NULL; + + for (int s = this->state->num_subroutines - 1; s >= 0; s--) { + ir_rvalue *var; + ir_constant *lc = new(mem_ctx)ir_constant(s); + ir_function *fn = this->state->subroutines[s]; + bool is_compat = false; + + for (int i = 0; i < fn->num_subroutine_types; i++) { + if (ir->sub_var->type->without_array() == fn->subroutine_types[i]) { + is_compat = true; + break; + } + } + if (is_compat == false) + continue; + + if (ir->array_idx != NULL) + var = ir->array_idx->clone(mem_ctx, NULL); + else + var = new(mem_ctx) ir_dereference_variable(ir->sub_var); + + ir_function_signature *sub_sig = + fn->exact_matching_signature(this->state, + &ir->actual_parameters); + + ir_call *new_call = call_clone(ir, sub_sig); + if (!last_branch) + last_branch = if_tree(equal(subr_to_int(var), lc), new_call); + else + last_branch = if_tree(equal(subr_to_int(var), lc), new_call, last_branch); + } + if (last_branch) + ir->insert_before(last_branch); + ir->remove(); + + return visit_continue; +} diff --git a/src/compiler/glsl/lower_tess_level.cpp b/src/compiler/glsl/lower_tess_level.cpp new file mode 100644 index 00000000000..bed2553222f --- /dev/null +++ b/src/compiler/glsl/lower_tess_level.cpp @@ -0,0 +1,459 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_tess_level.cpp + * + * This pass accounts for the difference between the way gl_TessLevelOuter + * and gl_TessLevelInner is declared in standard GLSL (as an array of + * floats), and the way it is frequently implemented in hardware (as a vec4 + * and vec2). + * + * The declaration of gl_TessLevel* is replaced with a declaration + * of gl_TessLevel*MESA, and any references to gl_TessLevel* are + * translated to refer to gl_TessLevel*MESA with the appropriate + * swizzling of array indices. For instance: + * + * gl_TessLevelOuter[i] + * + * is translated into: + * + * gl_TessLevelOuterMESA[i] + * + * Since some hardware may not internally represent gl_TessLevel* as a pair + * of vec4's, this lowering pass is optional. To enable it, set the + * LowerTessLevel flag in gl_shader_compiler_options to true. + */ + +#include "glsl_symbol_table.h" +#include "ir_rvalue_visitor.h" +#include "ir.h" +#include "program/prog_instruction.h" /* For WRITEMASK_* */ + +namespace { + +class lower_tess_level_visitor : public ir_rvalue_visitor { +public: + explicit lower_tess_level_visitor(gl_shader_stage shader_stage) + : progress(false), old_tess_level_outer_var(NULL), + old_tess_level_inner_var(NULL), new_tess_level_outer_var(NULL), + new_tess_level_inner_var(NULL), shader_stage(shader_stage) + { + } + + virtual ir_visitor_status visit(ir_variable *); + bool is_tess_level_array(ir_rvalue *ir); + ir_rvalue *lower_tess_level_array(ir_rvalue *ir); + virtual ir_visitor_status visit_leave(ir_assignment *); + void visit_new_assignment(ir_assignment *ir); + virtual ir_visitor_status visit_leave(ir_call *); + + virtual void handle_rvalue(ir_rvalue **rvalue); + + void fix_lhs(ir_assignment *); + + bool progress; + + /** + * Pointer to the declaration of gl_TessLevel*, if found. + */ + ir_variable *old_tess_level_outer_var; + ir_variable *old_tess_level_inner_var; + + /** + * Pointer to the newly-created gl_TessLevel*MESA variables. + */ + ir_variable *new_tess_level_outer_var; + ir_variable *new_tess_level_inner_var; + + /** + * Type of shader we are compiling (e.g. MESA_SHADER_TESS_CTRL) + */ + const gl_shader_stage shader_stage; +}; + +} /* anonymous namespace */ + +/** + * Replace any declaration of gl_TessLevel* as an array of floats with a + * declaration of gl_TessLevel*MESA as a vec4. + */ +ir_visitor_status +lower_tess_level_visitor::visit(ir_variable *ir) +{ + if ((!ir->name) || + ((strcmp(ir->name, "gl_TessLevelInner") != 0) && + (strcmp(ir->name, "gl_TessLevelOuter") != 0))) + return visit_continue; + + assert (ir->type->is_array()); + + if (strcmp(ir->name, "gl_TessLevelOuter") == 0) { + if (this->old_tess_level_outer_var) + return visit_continue; + + old_tess_level_outer_var = ir; + assert(ir->type->fields.array == glsl_type::float_type); + + /* Clone the old var so that we inherit all of its properties */ + new_tess_level_outer_var = ir->clone(ralloc_parent(ir), NULL); + + /* And change the properties that we need to change */ + new_tess_level_outer_var->name = ralloc_strdup(new_tess_level_outer_var, + "gl_TessLevelOuterMESA"); + new_tess_level_outer_var->type = glsl_type::vec4_type; + new_tess_level_outer_var->data.max_array_access = 0; + + ir->replace_with(new_tess_level_outer_var); + } else if (strcmp(ir->name, "gl_TessLevelInner") == 0) { + if (this->old_tess_level_inner_var) + return visit_continue; + + old_tess_level_inner_var = ir; + assert(ir->type->fields.array == glsl_type::float_type); + + /* Clone the old var so that we inherit all of its properties */ + new_tess_level_inner_var = ir->clone(ralloc_parent(ir), NULL); + + /* And change the properties that we need to change */ + new_tess_level_inner_var->name = ralloc_strdup(new_tess_level_inner_var, + "gl_TessLevelInnerMESA"); + new_tess_level_inner_var->type = glsl_type::vec2_type; + new_tess_level_inner_var->data.max_array_access = 0; + + ir->replace_with(new_tess_level_inner_var); + } else { + assert(0); + } + + this->progress = true; + + return visit_continue; +} + + +/** + * Determine whether the given rvalue describes an array of floats that + * needs to be lowered to a vec4; that is, determine whether it + * matches one of the following patterns: + * + * - gl_TessLevelOuter + * - gl_TessLevelInner + */ +bool +lower_tess_level_visitor::is_tess_level_array(ir_rvalue *ir) +{ + if (!ir->type->is_array()) + return false; + if (ir->type->fields.array != glsl_type::float_type) + return false; + + if (this->old_tess_level_outer_var) { + if (ir->variable_referenced() == this->old_tess_level_outer_var) + return true; + } + if (this->old_tess_level_inner_var) { + if (ir->variable_referenced() == this->old_tess_level_inner_var) + return true; + } + return false; +} + + +/** + * If the given ir satisfies is_tess_level_array(), return new ir + * representing its lowered equivalent. That is, map: + * + * - gl_TessLevelOuter => gl_TessLevelOuterMESA + * - gl_TessLevelInner => gl_TessLevelInnerMESA + * + * Otherwise return NULL. + */ +ir_rvalue * +lower_tess_level_visitor::lower_tess_level_array(ir_rvalue *ir) +{ + if (!ir->type->is_array()) + return NULL; + if (ir->type->fields.array != glsl_type::float_type) + return NULL; + + ir_variable **new_var = NULL; + + if (this->old_tess_level_outer_var) { + if (ir->variable_referenced() == this->old_tess_level_outer_var) + new_var = &this->new_tess_level_outer_var; + } + if (this->old_tess_level_inner_var) { + if (ir->variable_referenced() == this->old_tess_level_inner_var) + new_var = &this->new_tess_level_inner_var; + } + + if (new_var == NULL) + return NULL; + + assert(ir->as_dereference_variable()); + return new(ralloc_parent(ir)) ir_dereference_variable(*new_var); +} + + +void +lower_tess_level_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL) + return; + + ir_dereference_array *const array_deref = (*rv)->as_dereference_array(); + if (array_deref == NULL) + return; + + /* Replace any expression that indexes one of the floats in gl_TessLevel* + * with an expression that indexes into one of the vec4's + * gl_TessLevel*MESA and accesses the appropriate component. + */ + ir_rvalue *lowered_vec4 = + this->lower_tess_level_array(array_deref->array); + if (lowered_vec4 != NULL) { + this->progress = true; + void *mem_ctx = ralloc_parent(array_deref); + + ir_expression *const expr = + new(mem_ctx) ir_expression(ir_binop_vector_extract, + lowered_vec4, + array_deref->array_index); + + *rv = expr; + } +} + +void +lower_tess_level_visitor::fix_lhs(ir_assignment *ir) +{ + if (ir->lhs->ir_type != ir_type_expression) + return; + void *mem_ctx = ralloc_parent(ir); + ir_expression *const expr = (ir_expression *) ir->lhs; + + /* The expression must be of the form: + * + * (vector_extract gl_TessLevel*MESA, j). + */ + assert(expr->operation == ir_binop_vector_extract); + assert(expr->operands[0]->ir_type == ir_type_dereference_variable); + assert((expr->operands[0]->type == glsl_type::vec4_type) || + (expr->operands[0]->type == glsl_type::vec2_type)); + + ir_dereference *const new_lhs = (ir_dereference *) expr->operands[0]; + + ir_constant *old_index_constant = expr->operands[1]->constant_expression_value(); + if (!old_index_constant) { + ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, + expr->operands[0]->type, + new_lhs->clone(mem_ctx, NULL), + ir->rhs, + expr->operands[1]); + } + ir->set_lhs(new_lhs); + + if (old_index_constant) { + /* gl_TessLevel* is being accessed via a constant index. Don't bother + * creating a vector insert op. Just use a write mask. + */ + ir->write_mask = 1 << old_index_constant->get_int_component(0); + } else { + ir->write_mask = (1 << expr->operands[0]->type->vector_elements) - 1; + } +} + +/** + * Replace any assignment having a gl_TessLevel* (undereferenced) as + * its LHS or RHS with a sequence of assignments, one for each component of + * the array. Each of these assignments is lowered to refer to + * gl_TessLevel*MESA as appropriate. + */ +ir_visitor_status +lower_tess_level_visitor::visit_leave(ir_assignment *ir) +{ + /* First invoke the base class visitor. This causes handle_rvalue() to be + * called on ir->rhs and ir->condition. + */ + ir_rvalue_visitor::visit_leave(ir); + + if (this->is_tess_level_array(ir->lhs) || + this->is_tess_level_array(ir->rhs)) { + /* LHS or RHS of the assignment is the entire gl_TessLevel* array. + * Since we are + * reshaping gl_TessLevel* from an array of floats to a + * vec4, this isn't going to work as a bulk assignment anymore, so + * unroll it to element-by-element assignments and lower each of them. + * + * Note: to unroll into element-by-element assignments, we need to make + * clones of the LHS and RHS. This is safe because expressions and + * l-values are side-effect free. + */ + void *ctx = ralloc_parent(ir); + int array_size = ir->lhs->type->array_size(); + for (int i = 0; i < array_size; ++i) { + ir_dereference_array *new_lhs = new(ctx) ir_dereference_array( + ir->lhs->clone(ctx, NULL), new(ctx) ir_constant(i)); + ir_dereference_array *new_rhs = new(ctx) ir_dereference_array( + ir->rhs->clone(ctx, NULL), new(ctx) ir_constant(i)); + this->handle_rvalue((ir_rvalue **) &new_rhs); + + /* Handle the LHS after creating the new assignment. This must + * happen in this order because handle_rvalue may replace the old LHS + * with an ir_expression of ir_binop_vector_extract. Since this is + * not a valide l-value, this will cause an assertion in the + * ir_assignment constructor to fail. + * + * If this occurs, replace the mangled LHS with a dereference of the + * vector, and replace the RHS with an ir_triop_vector_insert. + */ + ir_assignment *const assign = new(ctx) ir_assignment(new_lhs, new_rhs); + this->handle_rvalue((ir_rvalue **) &assign->lhs); + this->fix_lhs(assign); + + this->base_ir->insert_before(assign); + } + ir->remove(); + + return visit_continue; + } + + /* Handle the LHS as if it were an r-value. Normally + * rvalue_visit(ir_assignment *) only visits the RHS, but we need to lower + * expressions in the LHS as well. + * + * This may cause the LHS to get replaced with an ir_expression of + * ir_binop_vector_extract. If this occurs, replace it with a dereference + * of the vector, and replace the RHS with an ir_triop_vector_insert. + */ + handle_rvalue((ir_rvalue **)&ir->lhs); + this->fix_lhs(ir); + + return rvalue_visit(ir); +} + + +/** + * Set up base_ir properly and call visit_leave() on a newly created + * ir_assignment node. This is used in cases where we have to insert an + * ir_assignment in a place where we know the hierarchical visitor won't see + * it. + */ +void +lower_tess_level_visitor::visit_new_assignment(ir_assignment *ir) +{ + ir_instruction *old_base_ir = this->base_ir; + this->base_ir = ir; + ir->accept(this); + this->base_ir = old_base_ir; +} + + +/** + * If a gl_TessLevel* variable appears as an argument in an ir_call + * expression, replace it with a temporary variable, and make sure the ir_call + * is preceded and/or followed by assignments that copy the contents of the + * temporary variable to and/or from gl_TessLevel*. Each of these + * assignments is then lowered to refer to gl_TessLevel*MESA. + */ +ir_visitor_status +lower_tess_level_visitor::visit_leave(ir_call *ir) +{ + void *ctx = ralloc_parent(ir); + + const exec_node *formal_param_node = ir->callee->parameters.head; + const exec_node *actual_param_node = ir->actual_parameters.head; + while (!actual_param_node->is_tail_sentinel()) { + ir_variable *formal_param = (ir_variable *) formal_param_node; + ir_rvalue *actual_param = (ir_rvalue *) actual_param_node; + + /* Advance formal_param_node and actual_param_node now so that we can + * safely replace actual_param with another node, if necessary, below. + */ + formal_param_node = formal_param_node->next; + actual_param_node = actual_param_node->next; + + if (!this->is_tess_level_array(actual_param)) + continue; + + /* User is trying to pass a whole gl_TessLevel* array to a function + * call. Since we are reshaping gl_TessLevel* from an array of floats + * to a vec4, this isn't going to work anymore, so use a temporary + * array instead. + */ + ir_variable *temp = new(ctx) ir_variable( + actual_param->type, "temp_tess_level", ir_var_temporary); + this->base_ir->insert_before(temp); + actual_param->replace_with( + new(ctx) ir_dereference_variable(temp)); + if (formal_param->data.mode == ir_var_function_in + || formal_param->data.mode == ir_var_function_inout) { + /* Copy from gl_TessLevel* to the temporary before the call. + * Since we are going to insert this copy before the current + * instruction, we need to visit it afterwards to make sure it + * gets lowered. + */ + ir_assignment *new_assignment = new(ctx) ir_assignment( + new(ctx) ir_dereference_variable(temp), + actual_param->clone(ctx, NULL)); + this->base_ir->insert_before(new_assignment); + this->visit_new_assignment(new_assignment); + } + if (formal_param->data.mode == ir_var_function_out + || formal_param->data.mode == ir_var_function_inout) { + /* Copy from the temporary to gl_TessLevel* after the call. + * Since visit_list_elements() has already decided which + * instruction it's going to visit next, we need to visit + * afterwards to make sure it gets lowered. + */ + ir_assignment *new_assignment = new(ctx) ir_assignment( + actual_param->clone(ctx, NULL), + new(ctx) ir_dereference_variable(temp)); + this->base_ir->insert_after(new_assignment); + this->visit_new_assignment(new_assignment); + } + } + + return rvalue_visit(ir); +} + + +bool +lower_tess_level(gl_shader *shader) +{ + if ((shader->Stage != MESA_SHADER_TESS_CTRL) && + (shader->Stage != MESA_SHADER_TESS_EVAL)) + return false; + + lower_tess_level_visitor v(shader->Stage); + + visit_list_elements(&v, shader->ir); + + if (v.new_tess_level_outer_var) + shader->symbols->add_variable(v.new_tess_level_outer_var); + if (v.new_tess_level_inner_var) + shader->symbols->add_variable(v.new_tess_level_inner_var); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_texture_projection.cpp b/src/compiler/glsl/lower_texture_projection.cpp new file mode 100644 index 00000000000..95df106d93f --- /dev/null +++ b/src/compiler/glsl/lower_texture_projection.cpp @@ -0,0 +1,103 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_texture_projection.cpp + * + * IR lower pass to perform the division of texture coordinates by the texture + * projector if present. + * + * Many GPUs have a texture sampling opcode that takes the projector + * and does the divide internally, thus the presence of the projector + * in the IR. For GPUs that don't, this saves the driver needing the + * logic for handling the divide. + * + * \author Eric Anholt + */ + +#include "ir.h" + +namespace { + +class lower_texture_projection_visitor : public ir_hierarchical_visitor { +public: + lower_texture_projection_visitor() + { + progress = false; + } + + ir_visitor_status visit_leave(ir_texture *ir); + + bool progress; +}; + +} /* anonymous namespace */ + +ir_visitor_status +lower_texture_projection_visitor::visit_leave(ir_texture *ir) +{ + if (!ir->projector) + return visit_continue; + + void *mem_ctx = ralloc_parent(ir); + + ir_variable *var = new(mem_ctx) ir_variable(ir->projector->type, + "projector", ir_var_temporary); + base_ir->insert_before(var); + ir_dereference *deref = new(mem_ctx) ir_dereference_variable(var); + ir_expression *expr = new(mem_ctx) ir_expression(ir_unop_rcp, + ir->projector->type, + ir->projector, + NULL); + ir_assignment *assign = new(mem_ctx) ir_assignment(deref, expr, NULL); + base_ir->insert_before(assign); + + deref = new(mem_ctx) ir_dereference_variable(var); + ir->coordinate = new(mem_ctx) ir_expression(ir_binop_mul, + ir->coordinate->type, + ir->coordinate, + deref); + + if (ir->shadow_comparitor) { + deref = new(mem_ctx) ir_dereference_variable(var); + ir->shadow_comparitor = new(mem_ctx) ir_expression(ir_binop_mul, + ir->shadow_comparitor->type, + ir->shadow_comparitor, + deref); + } + + ir->projector = NULL; + + progress = true; + return visit_continue; +} + +bool +do_lower_texture_projection(exec_list *instructions) +{ + lower_texture_projection_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_ubo_reference.cpp b/src/compiler/glsl/lower_ubo_reference.cpp new file mode 100644 index 00000000000..a172054bac8 --- /dev/null +++ b/src/compiler/glsl/lower_ubo_reference.cpp @@ -0,0 +1,1042 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_ubo_reference.cpp + * + * IR lower pass to replace dereferences of variables in a uniform + * buffer object with usage of ir_binop_ubo_load expressions, each of + * which can read data up to the size of a vec4. + * + * This relieves drivers of the responsibility to deal with tricky UBO + * layout issues like std140 structures and row_major matrices on + * their own. + */ + +#include "lower_buffer_access.h" +#include "ir_builder.h" +#include "main/macros.h" +#include "glsl_parser_extras.h" + +using namespace ir_builder; + +namespace { +class lower_ubo_reference_visitor : + public lower_buffer_access::lower_buffer_access { +public: + lower_ubo_reference_visitor(struct gl_shader *shader) + : shader(shader) + { + } + + void handle_rvalue(ir_rvalue **rvalue); + ir_visitor_status visit_enter(ir_assignment *ir); + + void setup_for_load_or_store(void *mem_ctx, + ir_variable *var, + ir_rvalue *deref, + ir_rvalue **offset, + unsigned *const_offset, + bool *row_major, + int *matrix_columns, + unsigned packing); + ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, + ir_rvalue *offset); + ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, + ir_rvalue *offset); + + bool check_for_buffer_array_copy(ir_assignment *ir); + bool check_for_buffer_struct_copy(ir_assignment *ir); + void check_for_ssbo_store(ir_assignment *ir); + void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, + ir_variable *write_var, unsigned write_mask); + ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, + unsigned write_mask); + + enum { + ubo_load_access, + ssbo_load_access, + ssbo_store_access, + ssbo_unsized_array_length_access, + ssbo_atomic_access, + } buffer_access_type; + + void insert_buffer_access(void *mem_ctx, ir_dereference *deref, + const glsl_type *type, ir_rvalue *offset, + unsigned mask, int channel); + + ir_visitor_status visit_enter(class ir_expression *); + ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); + void check_ssbo_unsized_array_length_expression(class ir_expression *); + void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); + + ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, + ir_dereference *, + ir_variable *); + ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); + + unsigned calculate_unsized_array_stride(ir_dereference *deref, + unsigned packing); + + ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir); + ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); + ir_visitor_status visit_enter(ir_call *ir); + + struct gl_shader *shader; + struct gl_uniform_buffer_variable *ubo_var; + ir_rvalue *uniform_block; + bool progress; +}; + +/** + * Determine the name of the interface block field + * + * This is the name of the specific member as it would appear in the + * \c gl_uniform_buffer_variable::Name field in the shader's + * \c UniformBlocks array. + */ +static const char * +interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, + ir_rvalue **nonconst_block_index) +{ + *nonconst_block_index = NULL; + char *name_copy = NULL; + size_t base_length = 0; + + /* Loop back through the IR until we find the uniform block */ + ir_rvalue *ir = d; + while (ir != NULL) { + switch (ir->ir_type) { + case ir_type_dereference_variable: { + /* Exit loop */ + ir = NULL; + break; + } + + case ir_type_dereference_record: { + ir_dereference_record *r = (ir_dereference_record *) ir; + ir = r->record->as_dereference(); + + /* If we got here it means any previous array subscripts belong to + * block members and not the block itself so skip over them in the + * next pass. + */ + d = ir; + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *a = (ir_dereference_array *) ir; + ir = a->array->as_dereference(); + break; + } + + case ir_type_swizzle: { + ir_swizzle *s = (ir_swizzle *) ir; + ir = s->val->as_dereference(); + /* Skip swizzle in the next pass */ + d = ir; + break; + } + + default: + assert(!"Should not get here."); + break; + } + } + + while (d != NULL) { + switch (d->ir_type) { + case ir_type_dereference_variable: { + ir_dereference_variable *v = (ir_dereference_variable *) d; + if (name_copy != NULL && + v->var->is_interface_instance() && + v->var->type->is_array()) { + return name_copy; + } else { + *nonconst_block_index = NULL; + return base_name; + } + + break; + } + + case ir_type_dereference_array: { + ir_dereference_array *a = (ir_dereference_array *) d; + size_t new_length; + + if (name_copy == NULL) { + name_copy = ralloc_strdup(mem_ctx, base_name); + base_length = strlen(name_copy); + } + + /* For arrays of arrays we start at the innermost array and work our + * way out so we need to insert the subscript at the base of the + * name string rather than just attaching it to the end. + */ + new_length = base_length; + ir_constant *const_index = a->array_index->as_constant(); + char *end = ralloc_strdup(NULL, &name_copy[new_length]); + if (!const_index) { + ir_rvalue *array_index = a->array_index; + if (array_index->type != glsl_type::uint_type) + array_index = i2u(array_index); + + if (a->array->type->is_array() && + a->array->type->fields.array->is_array()) { + ir_constant *base_size = new(mem_ctx) + ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); + array_index = mul(array_index, base_size); + } + + if (*nonconst_block_index) { + *nonconst_block_index = add(*nonconst_block_index, array_index); + } else { + *nonconst_block_index = array_index; + } + + ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", + end); + } else { + ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", + const_index->get_uint_component(0), + end); + } + ralloc_free(end); + + d = a->array->as_dereference(); + + break; + } + + default: + assert(!"Should not get here."); + break; + } + } + + assert(!"Should not get here."); + return NULL; +} + +void +lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, + ir_variable *var, + ir_rvalue *deref, + ir_rvalue **offset, + unsigned *const_offset, + bool *row_major, + int *matrix_columns, + unsigned packing) +{ + /* Determine the name of the interface block */ + ir_rvalue *nonconst_block_index; + const char *const field_name = + interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, + deref, &nonconst_block_index); + + /* Locate the block by interface name */ + unsigned num_blocks; + struct gl_uniform_block **blocks; + if (this->buffer_access_type != ubo_load_access) { + num_blocks = shader->NumShaderStorageBlocks; + blocks = shader->ShaderStorageBlocks; + } else { + num_blocks = shader->NumUniformBlocks; + blocks = shader->UniformBlocks; + } + this->uniform_block = NULL; + for (unsigned i = 0; i < num_blocks; i++) { + if (strcmp(field_name, blocks[i]->Name) == 0) { + + ir_constant *index = new(mem_ctx) ir_constant(i); + + if (nonconst_block_index) { + this->uniform_block = add(nonconst_block_index, index); + } else { + this->uniform_block = index; + } + + this->ubo_var = var->is_interface_instance() + ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location]; + + break; + } + } + + assert(this->uniform_block); + + *const_offset = ubo_var->Offset; + + setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major, + matrix_columns, packing); +} + +void +lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + if (!deref) + return; + + ir_variable *var = deref->variable_referenced(); + if (!var || !var->is_in_buffer_block()) + return; + + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset; + bool row_major; + int matrix_columns; + unsigned packing = var->get_interface_type()->interface_packing; + + this->buffer_access_type = + var->is_in_shader_storage_block() ? + ssbo_load_access : ubo_load_access; + + /* Compute the offset to the start if the dereference as well as other + * information we need to configure the write + */ + setup_for_load_or_store(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, + packing); + assert(offset); + + /* Now that we've calculated the offset to the start of the + * dereference, walk over the type and emit loads into a temporary. + */ + const glsl_type *type = (*rvalue)->type; + ir_variable *load_var = new(mem_ctx) ir_variable(type, + "ubo_load_temp", + ir_var_temporary); + base_ir->insert_before(load_var); + + ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, + "ubo_load_temp_offset", + ir_var_temporary); + base_ir->insert_before(load_offset); + base_ir->insert_before(assign(load_offset, offset)); + + deref = new(mem_ctx) ir_dereference_variable(load_var); + emit_access(mem_ctx, false, deref, load_offset, const_offset, + row_major, matrix_columns, packing, 0); + *rvalue = deref; + + progress = true; +} + +ir_expression * +lower_ubo_reference_visitor::ubo_load(void *mem_ctx, + const glsl_type *type, + ir_rvalue *offset) +{ + ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); + return new(mem_ctx) + ir_expression(ir_binop_ubo_load, + type, + block_ref, + offset); + +} + +static bool +shader_storage_buffer_object(const _mesa_glsl_parse_state *state) +{ + return state->ARB_shader_storage_buffer_object_enable; +} + +ir_call * +lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, + ir_rvalue *deref, + ir_rvalue *offset, + unsigned write_mask) +{ + exec_list sig_params; + + ir_variable *block_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); + sig_params.push_tail(block_ref); + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_variable *val_ref = new(mem_ctx) + ir_variable(deref->type, "value" , ir_var_function_in); + sig_params.push_tail(val_ref); + + ir_variable *writemask_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); + sig_params.push_tail(writemask_ref); + + ir_function_signature *sig = new(mem_ctx) + ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); + f->add_signature(sig); + + exec_list call_params; + call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); + call_params.push_tail(offset->clone(mem_ctx, NULL)); + call_params.push_tail(deref->clone(mem_ctx, NULL)); + call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); + return new(mem_ctx) ir_call(sig, NULL, &call_params); +} + +ir_call * +lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, + const struct glsl_type *type, + ir_rvalue *offset) +{ + exec_list sig_params; + + ir_variable *block_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); + sig_params.push_tail(block_ref); + + ir_variable *offset_ref = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); + sig_params.push_tail(offset_ref); + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo"); + f->add_signature(sig); + + ir_variable *result = new(mem_ctx) + ir_variable(type, "ssbo_load_result", ir_var_temporary); + base_ir->insert_before(result); + ir_dereference_variable *deref_result = new(mem_ctx) + ir_dereference_variable(result); + + exec_list call_params; + call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); + call_params.push_tail(offset->clone(mem_ctx, NULL)); + + return new(mem_ctx) ir_call(sig, deref_result, &call_params); +} + +void +lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, + ir_dereference *deref, + const glsl_type *type, + ir_rvalue *offset, + unsigned mask, + int channel) +{ + switch (this->buffer_access_type) { + case ubo_load_access: + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), + ubo_load(mem_ctx, type, offset), + mask)); + break; + case ssbo_load_access: { + ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); + base_ir->insert_before(load_ssbo); + ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); + ir_assignment *assignment = + assign(deref->clone(mem_ctx, NULL), value, mask); + base_ir->insert_before(assignment); + break; + } + case ssbo_store_access: + if (channel >= 0) { + base_ir->insert_after(ssbo_store(mem_ctx, + swizzle(deref, channel, 1), + offset, 1)); + } else { + base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); + } + break; + default: + unreachable("invalid buffer_access_type in insert_buffer_access"); + } +} + +void +lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, + ir_dereference *deref, + ir_variable *var, + ir_variable *write_var, + unsigned write_mask) +{ + ir_rvalue *offset = NULL; + unsigned const_offset; + bool row_major; + int matrix_columns; + unsigned packing = var->get_interface_type()->interface_packing; + + this->buffer_access_type = ssbo_store_access; + + /* Compute the offset to the start if the dereference as well as other + * information we need to configure the write + */ + setup_for_load_or_store(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, + packing); + assert(offset); + + /* Now emit writes from the temporary to memory */ + ir_variable *write_offset = + new(mem_ctx) ir_variable(glsl_type::uint_type, + "ssbo_store_temp_offset", + ir_var_temporary); + + base_ir->insert_before(write_offset); + base_ir->insert_before(assign(write_offset, offset)); + + deref = new(mem_ctx) ir_dereference_variable(write_var); + emit_access(mem_ctx, true, deref, write_offset, const_offset, + row_major, matrix_columns, packing, write_mask); +} + +ir_visitor_status +lower_ubo_reference_visitor::visit_enter(ir_expression *ir) +{ + check_ssbo_unsized_array_length_expression(ir); + return rvalue_visit(ir); +} + +ir_expression * +lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr) +{ + if (expr->operation != + ir_expression_operation(ir_unop_ssbo_unsized_array_length)) + return NULL; + + ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); + if (!rvalue || + !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) + return NULL; + + ir_dereference *deref = expr->operands[0]->as_dereference(); + if (!deref) + return NULL; + + ir_variable *var = expr->operands[0]->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return NULL; + return process_ssbo_unsized_array_length(&rvalue, deref, var); +} + +void +lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir) +{ + if (ir->operation == + ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { + /* Don't replace this unop if it is found alone. It is going to be + * removed by the optimization passes or replaced if it is part of + * an ir_assignment or another ir_expression. + */ + return; + } + + for (unsigned i = 0; i < ir->get_num_operands(); i++) { + if (ir->operands[i]->ir_type != ir_type_expression) + continue; + ir_expression *expr = (ir_expression *) ir->operands[i]; + ir_expression *temp = calculate_ssbo_unsized_array_length(expr); + if (!temp) + continue; + + delete expr; + ir->operands[i] = temp; + } +} + +void +lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) +{ + if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) + return; + + ir_expression *expr = (ir_expression *) ir->rhs; + ir_expression *temp = calculate_ssbo_unsized_array_length(expr); + if (!temp) + return; + + delete expr; + ir->rhs = temp; + return; +} + +ir_expression * +lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) +{ + ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); + return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, + glsl_type::int_type, + block_ref); +} + +unsigned +lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref, + unsigned packing) +{ + unsigned array_stride = 0; + + switch (deref->ir_type) { + case ir_type_dereference_variable: + { + ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; + const struct glsl_type *unsized_array_type = NULL; + /* An unsized array can be sized by other lowering passes, so pick + * the first field of the array which has the data type of the unsized + * array. + */ + unsized_array_type = deref_var->var->type->fields.array; + + /* Whether or not the field is row-major (because it might be a + * bvec2 or something) does not affect the array itself. We need + * to know whether an array element in its entirety is row-major. + */ + const bool array_row_major = + is_dereferenced_thing_row_major(deref_var); + + if (packing == GLSL_INTERFACE_PACKING_STD430) { + array_stride = unsized_array_type->std430_array_stride(array_row_major); + } else { + array_stride = unsized_array_type->std140_size(array_row_major); + array_stride = glsl_align(array_stride, 16); + } + break; + } + case ir_type_dereference_record: + { + ir_dereference_record *deref_record = (ir_dereference_record *) deref; + ir_dereference *interface_deref = + deref_record->record->as_dereference(); + assert(interface_deref != NULL); + const struct glsl_type *interface_type = interface_deref->type; + unsigned record_length = interface_type->length; + /* Unsized array is always the last element of the interface */ + const struct glsl_type *unsized_array_type = + interface_type->fields.structure[record_length - 1].type->fields.array; + + const bool array_row_major = + is_dereferenced_thing_row_major(deref_record); + + if (packing == GLSL_INTERFACE_PACKING_STD430) { + array_stride = unsized_array_type->std430_array_stride(array_row_major); + } else { + array_stride = unsized_array_type->std140_size(array_row_major); + array_stride = glsl_align(array_stride, 16); + } + break; + } + default: + unreachable("Unsupported dereference type"); + } + return array_stride; +} + +ir_expression * +lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, + ir_dereference *deref, + ir_variable *var) +{ + void *mem_ctx = ralloc_parent(*rvalue); + + ir_rvalue *base_offset = NULL; + unsigned const_offset; + bool row_major; + int matrix_columns; + unsigned packing = var->get_interface_type()->interface_packing; + int unsized_array_stride = calculate_unsized_array_stride(deref, packing); + + this->buffer_access_type = ssbo_unsized_array_length_access; + + /* Compute the offset to the start if the dereference as well as other + * information we need to calculate the length. + */ + setup_for_load_or_store(mem_ctx, var, deref, + &base_offset, &const_offset, + &row_major, &matrix_columns, + packing); + /* array.length() = + * max((buffer_object_size - offset_of_array) / stride_of_array, 0) + */ + ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); + + ir_expression *offset_of_array = new(mem_ctx) + ir_expression(ir_binop_add, base_offset, + new(mem_ctx) ir_constant(const_offset)); + ir_expression *offset_of_array_int = new(mem_ctx) + ir_expression(ir_unop_u2i, offset_of_array); + + ir_expression *sub = new(mem_ctx) + ir_expression(ir_binop_sub, buffer_size, offset_of_array_int); + ir_expression *div = new(mem_ctx) + ir_expression(ir_binop_div, sub, + new(mem_ctx) ir_constant(unsized_array_stride)); + ir_expression *max = new(mem_ctx) + ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0)); + + return max; +} + +void +lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) +{ + if (!ir || !ir->lhs) + return; + + ir_rvalue *rvalue = ir->lhs->as_rvalue(); + if (!rvalue) + return; + + ir_dereference *deref = ir->lhs->as_dereference(); + if (!deref) + return; + + ir_variable *var = ir->lhs->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return; + + /* We have a write to a buffer variable, so declare a temporary and rewrite + * the assignment so that the temporary is the LHS. + */ + void *mem_ctx = ralloc_parent(shader->ir); + + const glsl_type *type = rvalue->type; + ir_variable *write_var = new(mem_ctx) ir_variable(type, + "ssbo_store_temp", + ir_var_temporary); + base_ir->insert_before(write_var); + ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); + + /* Now we have to write the value assigned to the temporary back to memory */ + write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); + progress = true; +} + +static bool +is_buffer_backed_variable(ir_variable *var) +{ + return var->is_in_buffer_block() || + var->data.mode == ir_var_shader_shared; +} + +bool +lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) +{ + if (!ir || !ir->lhs || !ir->rhs) + return false; + + /* LHS and RHS must be arrays + * FIXME: arrays of arrays? + */ + if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) + return false; + + /* RHS must be a buffer-backed variable. This is what can cause the problem + * since it would lead to a series of loads that need to live until we + * see the writes to the LHS. + */ + ir_variable *rhs_var = ir->rhs->variable_referenced(); + if (!rhs_var || !is_buffer_backed_variable(rhs_var)) + return false; + + /* Split the array copy into individual element copies to reduce + * register pressure + */ + ir_dereference *rhs_deref = ir->rhs->as_dereference(); + if (!rhs_deref) + return false; + + ir_dereference *lhs_deref = ir->lhs->as_dereference(); + if (!lhs_deref) + return false; + + assert(lhs_deref->type->length == rhs_deref->type->length); + void *mem_ctx = ralloc_parent(shader->ir); + + for (unsigned i = 0; i < lhs_deref->type->length; i++) { + ir_dereference *lhs_i = + new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + + ir_dereference *rhs_i = + new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), + new(mem_ctx) ir_constant(i)); + ir->insert_after(assign(lhs_i, rhs_i)); + } + + ir->remove(); + progress = true; + return true; +} + +bool +lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) +{ + if (!ir || !ir->lhs || !ir->rhs) + return false; + + /* LHS and RHS must be records */ + if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record()) + return false; + + /* RHS must be a buffer-backed variable. This is what can cause the problem + * since it would lead to a series of loads that need to live until we + * see the writes to the LHS. + */ + ir_variable *rhs_var = ir->rhs->variable_referenced(); + if (!rhs_var || !is_buffer_backed_variable(rhs_var)) + return false; + + /* Split the struct copy into individual element copies to reduce + * register pressure + */ + ir_dereference *rhs_deref = ir->rhs->as_dereference(); + if (!rhs_deref) + return false; + + ir_dereference *lhs_deref = ir->lhs->as_dereference(); + if (!lhs_deref) + return false; + + assert(lhs_deref->type->record_compare(rhs_deref->type)); + void *mem_ctx = ralloc_parent(shader->ir); + + for (unsigned i = 0; i < lhs_deref->type->length; i++) { + const char *field_name = lhs_deref->type->fields.structure[i].name; + ir_dereference *lhs_field = + new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), + field_name); + ir_dereference *rhs_field = + new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), + field_name); + ir->insert_after(assign(lhs_field, rhs_field)); + } + + ir->remove(); + progress = true; + return true; +} + +ir_visitor_status +lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) +{ + /* Array and struct copies could involve large amounts of load/store + * operations. To improve register pressure we want to special-case + * these and split them into individual element copies. + * This way we avoid emitting all the loads for the RHS first and + * all the writes for the LHS second and register usage is more + * efficient. + */ + if (check_for_buffer_array_copy(ir)) + return visit_continue_with_parent; + + if (check_for_buffer_struct_copy(ir)) + return visit_continue_with_parent; + + check_ssbo_unsized_array_length_assignment(ir); + check_for_ssbo_store(ir); + return rvalue_visit(ir); +} + +/* Lowers the intrinsic call to a new internal intrinsic that swaps the + * access to the buffer variable in the first parameter by an offset + * and block index. This involves creating the new internal intrinsic + * (i.e. the new function signature). + */ +ir_call * +lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) +{ + /* SSBO atomics usually have 2 parameters, the buffer variable and an + * integer argument. The exception is CompSwap, that has an additional + * integer parameter. + */ + int param_count = ir->actual_parameters.length(); + assert(param_count == 2 || param_count == 3); + + /* First argument must be a scalar integer buffer variable */ + exec_node *param = ir->actual_parameters.get_head(); + ir_instruction *inst = (ir_instruction *) param; + assert(inst->ir_type == ir_type_dereference_variable || + inst->ir_type == ir_type_dereference_array || + inst->ir_type == ir_type_dereference_record || + inst->ir_type == ir_type_swizzle); + + ir_rvalue *deref = (ir_rvalue *) inst; + assert(deref->type->is_scalar() && deref->type->is_integer()); + + ir_variable *var = deref->variable_referenced(); + assert(var); + + /* Compute the offset to the start if the dereference and the + * block index + */ + void *mem_ctx = ralloc_parent(shader->ir); + + ir_rvalue *offset = NULL; + unsigned const_offset; + bool row_major; + int matrix_columns; + unsigned packing = var->get_interface_type()->interface_packing; + + this->buffer_access_type = ssbo_atomic_access; + + setup_for_load_or_store(mem_ctx, var, deref, + &offset, &const_offset, + &row_major, &matrix_columns, + packing); + assert(offset); + assert(!row_major); + assert(matrix_columns == 1); + + ir_rvalue *deref_offset = + add(offset, new(mem_ctx) ir_constant(const_offset)); + ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL); + + /* Create the new internal function signature that will take a block + * index and offset instead of a buffer variable + */ + exec_list sig_params; + ir_variable *sig_param = new(mem_ctx) + ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); + sig_params.push_tail(sig_param); + + sig_param = new(mem_ctx) + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); + sig_params.push_tail(sig_param); + + const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? + glsl_type::int_type : glsl_type::uint_type; + sig_param = new(mem_ctx) + ir_variable(type, "data1", ir_var_function_in); + sig_params.push_tail(sig_param); + + if (param_count == 3) { + sig_param = new(mem_ctx) + ir_variable(type, "data2", ir_var_function_in); + sig_params.push_tail(sig_param); + } + + ir_function_signature *sig = + new(mem_ctx) ir_function_signature(deref->type, + shader_storage_buffer_object); + assert(sig); + sig->replace_parameters(&sig_params); + sig->is_intrinsic = true; + + char func_name[64]; + sprintf(func_name, "%s_ssbo", ir->callee_name()); + ir_function *f = new(mem_ctx) ir_function(func_name); + f->add_signature(sig); + + /* Now, create the call to the internal intrinsic */ + exec_list call_params; + call_params.push_tail(block_index); + call_params.push_tail(deref_offset); + param = ir->actual_parameters.get_head()->get_next(); + ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + if (param_count == 3) { + param = param->get_next(); + param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); + call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); + } + ir_dereference_variable *return_deref = + ir->return_deref->clone(mem_ctx, NULL); + return new(mem_ctx) ir_call(sig, return_deref, &call_params); +} + +ir_call * +lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) +{ + exec_list& params = ir->actual_parameters; + + if (params.length() < 2 || params.length() > 3) + return ir; + + ir_rvalue *rvalue = + ((ir_instruction *) params.get_head())->as_rvalue(); + if (!rvalue) + return ir; + + ir_variable *var = rvalue->variable_referenced(); + if (!var || !var->is_in_shader_storage_block()) + return ir; + + const char *callee = ir->callee_name(); + if (!strcmp("__intrinsic_atomic_add", callee) || + !strcmp("__intrinsic_atomic_min", callee) || + !strcmp("__intrinsic_atomic_max", callee) || + !strcmp("__intrinsic_atomic_and", callee) || + !strcmp("__intrinsic_atomic_or", callee) || + !strcmp("__intrinsic_atomic_xor", callee) || + !strcmp("__intrinsic_atomic_exchange", callee) || + !strcmp("__intrinsic_atomic_comp_swap", callee)) { + return lower_ssbo_atomic_intrinsic(ir); + } + + return ir; +} + + +ir_visitor_status +lower_ubo_reference_visitor::visit_enter(ir_call *ir) +{ + ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir); + if (new_ir != ir) { + progress = true; + base_ir->replace_with(new_ir); + return visit_continue_with_parent; + } + + return rvalue_visit(ir); +} + + +} /* unnamed namespace */ + +void +lower_ubo_reference(struct gl_shader *shader) +{ + lower_ubo_reference_visitor v(shader); + + /* Loop over the instructions lowering references, because we take + * a deref of a UBO array using a UBO dereference as the index will + * produce a collection of instructions all of which have cloned + * UBO dereferences for that array index. + */ + do { + v.progress = false; + visit_list_elements(&v, shader->ir); + } while (v.progress); +} diff --git a/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp new file mode 100644 index 00000000000..278d5450bfb --- /dev/null +++ b/src/compiler/glsl/lower_variable_index_to_cond_assign.cpp @@ -0,0 +1,585 @@ +/* + * Copyright © 2010 Luca Barbieri + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_variable_index_to_cond_assign.cpp + * + * Turns non-constant indexing into array types to a series of + * conditional moves of each element into a temporary. + * + * Pre-DX10 GPUs often don't have a native way to do this operation, + * and this works around that. + * + * The lowering process proceeds as follows. Each non-constant index + * found in an r-value is converted to a canonical form \c array[i]. Each + * element of the array is conditionally assigned to a temporary by comparing + * \c i to a constant index. This is done by cloning the canonical form and + * replacing all occurances of \c i with a constant. Each remaining occurance + * of the canonical form in the IR is replaced with a dereference of the + * temporary variable. + * + * L-values with non-constant indices are handled similarly. In this case, + * the RHS of the assignment is assigned to a temporary. The non-constant + * index is replace with the canonical form (just like for r-values). The + * temporary is conditionally assigned to each element of the canonical form + * by comparing \c i with each index. The same clone-and-replace scheme is + * used. + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "main/macros.h" + +/** + * Generate a comparison value for a block of indices + * + * Lowering passes for non-constant indexing of arrays, matrices, or vectors + * can use this to generate blocks of index comparison values. + * + * \param instructions List where new instructions will be appended + * \param index \c ir_variable containing the desired index + * \param base Base value for this block of comparisons + * \param components Number of unique index values to compare. This must + * be on the range [1, 4]. + * \param mem_ctx ralloc memory context to be used for all allocations. + * + * \returns + * An \c ir_rvalue that \b must be cloned for each use in conditional + * assignments, etc. + */ +ir_rvalue * +compare_index_block(exec_list *instructions, ir_variable *index, + unsigned base, unsigned components, void *mem_ctx) +{ + ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); + + assert(index->type->is_scalar()); + assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT); + assert(components >= 1 && components <= 4); + + if (components > 1) { + const ir_swizzle_mask m = { 0, 0, 0, 0, components, false }; + broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m); + } + + /* Compare the desired index value with the next block of four indices. + */ + ir_constant_data test_indices_data; + memset(&test_indices_data, 0, sizeof(test_indices_data)); + test_indices_data.i[0] = base; + test_indices_data.i[1] = base + 1; + test_indices_data.i[2] = base + 2; + test_indices_data.i[3] = base + 3; + + ir_constant *const test_indices = + new(mem_ctx) ir_constant(broadcast_index->type, + &test_indices_data); + + ir_rvalue *const condition_val = + new(mem_ctx) ir_expression(ir_binop_equal, + glsl_type::bvec(components), + broadcast_index, + test_indices); + + ir_variable *const condition = + new(mem_ctx) ir_variable(condition_val->type, + "dereference_condition", + ir_var_temporary); + instructions->push_tail(condition); + + ir_rvalue *const cond_deref = + new(mem_ctx) ir_dereference_variable(condition); + instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); + + return cond_deref; +} + +static inline bool +is_array_or_matrix(const ir_rvalue *ir) +{ + return (ir->type->is_array() || ir->type->is_matrix()); +} + +namespace { +/** + * Replace a dereference of a variable with a specified r-value + * + * Each time a dereference of the specified value is replaced, the r-value + * tree is cloned. + */ +class deref_replacer : public ir_rvalue_visitor { +public: + deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value) + : variable_to_replace(variable_to_replace), value(value), + progress(false) + { + assert(this->variable_to_replace != NULL); + assert(this->value != NULL); + } + + virtual void handle_rvalue(ir_rvalue **rvalue) + { + ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); + + if ((dv != NULL) && (dv->var == this->variable_to_replace)) { + this->progress = true; + *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); + } + } + + const ir_variable *variable_to_replace; + ir_rvalue *value; + bool progress; +}; + +/** + * Find a variable index dereference of an array in an rvalue tree + */ +class find_variable_index : public ir_hierarchical_visitor { +public: + find_variable_index() + : deref(NULL) + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *ir) + { + if (is_array_or_matrix(ir->array) + && (ir->array_index->as_constant() == NULL)) { + this->deref = ir; + return visit_stop; + } + + return visit_continue; + } + + /** + * First array dereference found in the tree that has a non-constant index. + */ + ir_dereference_array *deref; +}; + +struct assignment_generator +{ + ir_instruction* base_ir; + ir_dereference *rvalue; + ir_variable *old_index; + bool is_write; + unsigned int write_mask; + ir_variable* var; + + assignment_generator() + : base_ir(NULL), + rvalue(NULL), + old_index(NULL), + is_write(false), + write_mask(0), + var(NULL) + { + } + + void generate(unsigned i, ir_rvalue* condition, exec_list *list) const + { + /* Just clone the rest of the deref chain when trying to get at the + * underlying variable. + */ + void *mem_ctx = ralloc_parent(base_ir); + + /* Clone the old r-value in its entirety. Then replace any occurances of + * the old variable index with the new constant index. + */ + ir_dereference *element = this->rvalue->clone(mem_ctx, NULL); + ir_constant *const index = new(mem_ctx) ir_constant(i); + deref_replacer r(this->old_index, index); + element->accept(&r); + assert(r.progress); + + /* Generate a conditional assignment to (or from) the constant indexed + * array dereference. + */ + ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); + ir_assignment *const assignment = (is_write) + ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) + : new(mem_ctx) ir_assignment(variable, element, condition); + + list->push_tail(assignment); + } +}; + +struct switch_generator +{ + /* make TFunction a template parameter if you need to use other generators */ + typedef assignment_generator TFunction; + const TFunction& generator; + + ir_variable* index; + unsigned linear_sequence_max_length; + unsigned condition_components; + + void *mem_ctx; + + switch_generator(const TFunction& generator, ir_variable *index, + unsigned linear_sequence_max_length, + unsigned condition_components) + : generator(generator), index(index), + linear_sequence_max_length(linear_sequence_max_length), + condition_components(condition_components) + { + this->mem_ctx = ralloc_parent(index); + } + + void linear_sequence(unsigned begin, unsigned end, exec_list *list) + { + if (begin == end) + return; + + /* If the array access is a read, read the first element of this subregion + * unconditionally. The remaining tests will possibly overwrite this + * value with one of the other array elements. + * + * This optimization cannot be done for writes because it will cause the + * first element of the subregion to be written possibly *in addition* to + * one of the other elements. + */ + unsigned first; + if (!this->generator.is_write) { + this->generator.generate(begin, 0, list); + first = begin + 1; + } else { + first = begin; + } + + for (unsigned i = first; i < end; i += 4) { + const unsigned comps = MIN2(condition_components, end - i); + + ir_rvalue *const cond_deref = + compare_index_block(list, index, i, comps, this->mem_ctx); + + if (comps == 1) { + this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL), + list); + } else { + for (unsigned j = 0; j < comps; j++) { + ir_rvalue *const cond_swiz = + new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL), + j, 0, 0, 0, 1); + + this->generator.generate(i + j, cond_swiz, list); + } + } + } + } + + void bisect(unsigned begin, unsigned end, exec_list *list) + { + unsigned middle = (begin + end) >> 1; + + assert(index->type->is_integer()); + + ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT) + ? new(this->mem_ctx) ir_constant((unsigned)middle) + : new(this->mem_ctx) ir_constant((int)middle); + + + ir_dereference_variable *deref = + new(this->mem_ctx) ir_dereference_variable(this->index); + + ir_expression *less = + new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type, + deref, middle_c); + + ir_if *if_less = new(this->mem_ctx) ir_if(less); + + generate(begin, middle, &if_less->then_instructions); + generate(middle, end, &if_less->else_instructions); + + list->push_tail(if_less); + } + + void generate(unsigned begin, unsigned end, exec_list *list) + { + unsigned length = end - begin; + if (length <= this->linear_sequence_max_length) + return linear_sequence(begin, end, list); + else + return bisect(begin, end, list); + } +}; + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor { +public: + variable_index_to_cond_assign_visitor(gl_shader_stage stage, + bool lower_input, + bool lower_output, + bool lower_temp, + bool lower_uniform) + { + this->progress = false; + this->stage = stage; + this->lower_inputs = lower_input; + this->lower_outputs = lower_output; + this->lower_temps = lower_temp; + this->lower_uniforms = lower_uniform; + } + + bool progress; + + gl_shader_stage stage; + bool lower_inputs; + bool lower_outputs; + bool lower_temps; + bool lower_uniforms; + + bool storage_type_needs_lowering(ir_dereference_array *deref) const + { + /* If a variable isn't eventually the target of this dereference, then + * it must be a constant or some sort of anonymous temporary storage. + * + * FINISHME: Is this correct? Most drivers treat arrays of constants as + * FINISHME: uniforms. It seems like this should do the same. + */ + const ir_variable *const var = deref->array->variable_referenced(); + if (var == NULL) + return this->lower_temps; + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_temporary: + return this->lower_temps; + + case ir_var_uniform: + case ir_var_shader_storage: + return this->lower_uniforms; + + case ir_var_shader_shared: + return false; + + case ir_var_function_in: + case ir_var_const_in: + return this->lower_temps; + + case ir_var_shader_in: + /* The input array size is unknown at compiler time for non-patch + * inputs in TCS and TES. The arrays are sized to + * the implementation-dependent limit "gl_MaxPatchVertices", but + * the real size is stored in the "gl_PatchVerticesIn" built-in + * uniform. + * + * The TCS input array size is specified by + * glPatchParameteri(GL_PATCH_VERTICES). + * + * The TES input array size is specified by the "vertices" output + * layout qualifier in TCS. + */ + if ((stage == MESA_SHADER_TESS_CTRL || + stage == MESA_SHADER_TESS_EVAL) && !var->data.patch) + return false; + return this->lower_inputs; + + case ir_var_function_out: + /* TCS non-patch outputs can only be indexed with "gl_InvocationID". + * Other expressions are not allowed. + */ + if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch) + return false; + return this->lower_temps; + + case ir_var_shader_out: + return this->lower_outputs; + + case ir_var_function_inout: + return this->lower_temps; + } + + assert(!"Should not get here."); + return false; + } + + bool needs_lowering(ir_dereference_array *deref) const + { + if (deref == NULL || deref->array_index->as_constant() + || !is_array_or_matrix(deref->array)) + return false; + + return this->storage_type_needs_lowering(deref); + } + + ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, + ir_assignment* orig_assign, + ir_dereference *orig_base) + { + assert(is_array_or_matrix(orig_deref->array)); + + const unsigned length = (orig_deref->array->type->is_array()) + ? orig_deref->array->type->length + : orig_deref->array->type->matrix_columns; + + void *const mem_ctx = ralloc_parent(base_ir); + + /* Temporary storage for either the result of the dereference of + * the array, or the RHS that's being assigned into the + * dereference of the array. + */ + ir_variable *var; + + if (orig_assign) { + var = new(mem_ctx) ir_variable(orig_assign->rhs->type, + "dereference_array_value", + ir_var_temporary); + base_ir->insert_before(var); + + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var); + ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, + orig_assign->rhs, + NULL); + + base_ir->insert_before(assign); + } else { + var = new(mem_ctx) ir_variable(orig_deref->type, + "dereference_array_value", + ir_var_temporary); + base_ir->insert_before(var); + } + + /* Store the index to a temporary to avoid reusing its tree. */ + ir_variable *index = + new(mem_ctx) ir_variable(orig_deref->array_index->type, + "dereference_array_index", ir_var_temporary); + base_ir->insert_before(index); + + ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index); + ir_assignment *assign = + new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); + base_ir->insert_before(assign); + + orig_deref->array_index = lhs->clone(mem_ctx, NULL); + + assignment_generator ag; + ag.rvalue = orig_base; + ag.base_ir = base_ir; + ag.old_index = index; + ag.var = var; + if (orig_assign) { + ag.is_write = true; + ag.write_mask = orig_assign->write_mask; + } else { + ag.is_write = false; + } + + switch_generator sg(ag, index, 4, 4); + + /* If the original assignment has a condition, respect that original + * condition! This is acomplished by wrapping the new conditional + * assignments in an if-statement that uses the original condition. + */ + if ((orig_assign != NULL) && (orig_assign->condition != NULL)) { + /* No need to clone the condition because the IR that it hangs on is + * going to be removed from the instruction sequence. + */ + ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition); + + sg.generate(0, length, &if_stmt->then_instructions); + base_ir->insert_before(if_stmt); + } else { + exec_list list; + + sg.generate(0, length, &list); + base_ir->insert_before(&list); + } + + return var; + } + + virtual void handle_rvalue(ir_rvalue **pir) + { + if (this->in_assignee) + return; + + if (!*pir) + return; + + ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); + if (needs_lowering(orig_deref)) { + ir_variable *var = + convert_dereference_array(orig_deref, NULL, orig_deref); + assert(var); + *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var); + this->progress = true; + } + } + + ir_visitor_status + visit_leave(ir_assignment *ir) + { + ir_rvalue_visitor::visit_leave(ir); + + find_variable_index f; + ir->lhs->accept(&f); + + if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) { + convert_dereference_array(f.deref, ir, ir->lhs); + ir->remove(); + this->progress = true; + } + + return visit_continue; + } +}; + +} /* anonymous namespace */ + +bool +lower_variable_index_to_cond_assign(gl_shader_stage stage, + exec_list *instructions, + bool lower_input, + bool lower_output, + bool lower_temp, + bool lower_uniform) +{ + variable_index_to_cond_assign_visitor v(stage, + lower_input, + lower_output, + lower_temp, + lower_uniform); + + /* Continue lowering until no progress is made. If there are multiple + * levels of indirection (e.g., non-constant indexing of array elements and + * matrix columns of an array of matrix), each pass will only lower one + * level of indirection. + */ + bool progress_ever = false; + do { + v.progress = false; + visit_list_elements(&v, instructions); + progress_ever = v.progress || progress_ever; + } while (v.progress); + + return progress_ever; +} diff --git a/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp new file mode 100644 index 00000000000..784db085924 --- /dev/null +++ b/src/compiler/glsl/lower_vec_index_to_cond_assign.cpp @@ -0,0 +1,239 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_vec_index_to_cond_assign.cpp + * + * Turns indexing into vector types to a series of conditional moves + * of each channel's swizzle into a temporary. + * + * Most GPUs don't have a native way to do this operation, and this + * works around that. For drivers using both this pass and + * ir_vec_index_to_swizzle, there's a risk that this pass will happen + * before sufficient constant folding to find that the array index is + * constant. However, we hope that other optimization passes, + * particularly constant folding of assignment conditions and copy + * propagation, will result in the same code in the end. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +class ir_vec_index_to_cond_assign_visitor : public ir_hierarchical_visitor { +public: + ir_vec_index_to_cond_assign_visitor() + { + progress = false; + } + + ir_rvalue *convert_vec_index_to_cond_assign(void *mem_ctx, + ir_rvalue *orig_vector, + ir_rvalue *orig_index, + const glsl_type *type); + + ir_rvalue *convert_vector_extract_to_cond_assign(ir_rvalue *ir); + + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_leave(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_return *); + virtual ir_visitor_status visit_enter(ir_call *); + virtual ir_visitor_status visit_enter(ir_if *); + + bool progress; +}; + +} /* anonymous namespace */ + +ir_rvalue * +ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ctx, + ir_rvalue *orig_vector, + ir_rvalue *orig_index, + const glsl_type *type) +{ + ir_assignment *assign, *value_assign; + ir_variable *index, *var, *value; + ir_dereference *deref, *deref_value; + unsigned i; + + + exec_list list; + + /* Store the index to a temporary to avoid reusing its tree. */ + assert(orig_index->type == glsl_type::int_type || + orig_index->type == glsl_type::uint_type); + index = new(base_ir) ir_variable(orig_index->type, + "vec_index_tmp_i", + ir_var_temporary); + list.push_tail(index); + deref = new(base_ir) ir_dereference_variable(index); + assign = new(base_ir) ir_assignment(deref, orig_index, NULL); + list.push_tail(assign); + + /* Store the value inside a temp, thus avoiding matrixes duplication */ + value = new(base_ir) ir_variable(orig_vector->type, "vec_value_tmp", + ir_var_temporary); + list.push_tail(value); + deref_value = new(base_ir) ir_dereference_variable(value); + value_assign = new(base_ir) ir_assignment(deref_value, orig_vector); + list.push_tail(value_assign); + + /* Temporary where we store whichever value we swizzle out. */ + var = new(base_ir) ir_variable(type, "vec_index_tmp_v", + ir_var_temporary); + list.push_tail(var); + + /* Generate a single comparison condition "mask" for all of the components + * in the vector. + */ + ir_rvalue *const cond_deref = + compare_index_block(&list, index, 0, + orig_vector->type->vector_elements, + mem_ctx); + + /* Generate a conditional move of each vector element to the temp. */ + for (i = 0; i < orig_vector->type->vector_elements; i++) { + ir_rvalue *condition_swizzle = + new(base_ir) ir_swizzle(cond_deref->clone(mem_ctx, NULL), + i, 0, 0, 0, 1); + + /* Just clone the rest of the deref chain when trying to get at the + * underlying variable. + */ + ir_rvalue *swizzle = + new(base_ir) ir_swizzle(deref_value->clone(mem_ctx, NULL), + i, 0, 0, 0, 1); + + deref = new(base_ir) ir_dereference_variable(var); + assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle); + list.push_tail(assign); + } + + /* Put all of the new instructions in the IR stream before the old + * instruction. + */ + base_ir->insert_before(&list); + + this->progress = true; + return new(base_ir) ir_dereference_variable(var); +} + +ir_rvalue * +ir_vec_index_to_cond_assign_visitor::convert_vector_extract_to_cond_assign(ir_rvalue *ir) +{ + ir_expression *const expr = ir->as_expression(); + + if (expr == NULL || expr->operation != ir_binop_vector_extract) + return ir; + + return convert_vec_index_to_cond_assign(ralloc_parent(ir), + expr->operands[0], + expr->operands[1], + ir->type); +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_expression *ir) +{ + unsigned int i; + + for (i = 0; i < ir->get_num_operands(); i++) { + ir->operands[i] = convert_vector_extract_to_cond_assign(ir->operands[i]); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_swizzle *ir) +{ + /* Can't be hit from normal GLSL, since you can't swizzle a scalar (which + * the result of indexing a vector is. But maybe at some point we'll end up + * using swizzling of scalars for vector construction. + */ + ir->val = convert_vector_extract_to_cond_assign(ir->val); + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir) +{ + ir->rhs = convert_vector_extract_to_cond_assign(ir->rhs); + + if (ir->condition) { + ir->condition = convert_vector_extract_to_cond_assign(ir->condition); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_call *ir) +{ + foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { + ir_rvalue *new_param = convert_vector_extract_to_cond_assign(param); + + if (new_param != param) { + param->replace_with(new_param); + } + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_return *ir) +{ + if (ir->value) { + ir->value = convert_vector_extract_to_cond_assign(ir->value); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_cond_assign_visitor::visit_enter(ir_if *ir) +{ + ir->condition = convert_vector_extract_to_cond_assign(ir->condition); + + return visit_continue; +} + +bool +do_vec_index_to_cond_assign(exec_list *instructions) +{ + ir_vec_index_to_cond_assign_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vec_index_to_swizzle.cpp b/src/compiler/glsl/lower_vec_index_to_swizzle.cpp new file mode 100644 index 00000000000..8b18e95509c --- /dev/null +++ b/src/compiler/glsl/lower_vec_index_to_swizzle.cpp @@ -0,0 +1,171 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_vec_index_to_swizzle.cpp + * + * Turns constant indexing into vector types to swizzles. This will + * let other swizzle-aware optimization passes catch these constructs, + * and codegen backends not have to worry about this case. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "main/macros.h" + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +namespace { + +class ir_vec_index_to_swizzle_visitor : public ir_hierarchical_visitor { +public: + ir_vec_index_to_swizzle_visitor() + { + progress = false; + } + + ir_rvalue *convert_vector_extract_to_swizzle(ir_rvalue *val); + + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_return *); + virtual ir_visitor_status visit_enter(ir_call *); + virtual ir_visitor_status visit_enter(ir_if *); + + bool progress; +}; + +} /* anonymous namespace */ + +ir_rvalue * +ir_vec_index_to_swizzle_visitor::convert_vector_extract_to_swizzle(ir_rvalue *ir) +{ + ir_expression *const expr = ir->as_expression(); + if (expr == NULL || expr->operation != ir_binop_vector_extract) + return ir; + + ir_constant *const idx = expr->operands[1]->constant_expression_value(); + if (idx == NULL) + return ir; + + void *ctx = ralloc_parent(ir); + this->progress = true; + + /* Page 40 of the GLSL 1.20 spec says: + * + * "When indexing with non-constant expressions, behavior is undefined + * if the index is negative, or greater than or equal to the size of + * the vector." + * + * The quoted spec text mentions non-constant expressions, but this code + * operates on constants. These constants are the result of non-constant + * expressions that have been optimized to constants. The common case here + * is a loop counter from an unrolled loop that is used to index a vector. + * + * The ir_swizzle constructor gets angry if the index is negative or too + * large. For simplicity sake, just clamp the index to [0, size-1]. + */ + const int i = CLAMP(idx->value.i[0], 0, + (int) expr->operands[0]->type->vector_elements - 1); + + return new(ctx) ir_swizzle(expr->operands[0], i, 0, 0, 0, 1); +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_expression *ir) +{ + unsigned int i; + + for (i = 0; i < ir->get_num_operands(); i++) { + ir->operands[i] = convert_vector_extract_to_swizzle(ir->operands[i]); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_swizzle *ir) +{ + /* Can't be hit from normal GLSL, since you can't swizzle a scalar (which + * the result of indexing a vector is. But maybe at some point we'll end up + * using swizzling of scalars for vector construction. + */ + ir->val = convert_vector_extract_to_swizzle(ir->val); + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_assignment *ir) +{ + ir->rhs = convert_vector_extract_to_swizzle(ir->rhs); + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_call *ir) +{ + foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { + ir_rvalue *new_param = convert_vector_extract_to_swizzle(param); + + if (new_param != param) { + param->replace_with(new_param); + } + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_return *ir) +{ + if (ir->value) { + ir->value = convert_vector_extract_to_swizzle(ir->value); + } + + return visit_continue; +} + +ir_visitor_status +ir_vec_index_to_swizzle_visitor::visit_enter(ir_if *ir) +{ + ir->condition = convert_vector_extract_to_swizzle(ir->condition); + + return visit_continue; +} + +bool +do_vec_index_to_swizzle(exec_list *instructions) +{ + ir_vec_index_to_swizzle_visitor v; + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vector.cpp b/src/compiler/glsl/lower_vector.cpp new file mode 100644 index 00000000000..a658410ae6f --- /dev/null +++ b/src/compiler/glsl/lower_vector.cpp @@ -0,0 +1,228 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_vector.cpp + * IR lowering pass to remove some types of ir_quadop_vector + * + * \author Ian Romanick + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" + +namespace { + +class lower_vector_visitor : public ir_rvalue_visitor { +public: + lower_vector_visitor() : dont_lower_swz(false), progress(false) + { + /* empty */ + } + + void handle_rvalue(ir_rvalue **rvalue); + + /** + * Should SWZ-like expressions be lowered? + */ + bool dont_lower_swz; + + bool progress; +}; + +} /* anonymous namespace */ + +/** + * Determine if an IR expression tree looks like an extended swizzle + * + * Extended swizzles consist of access of a single vector source (with possible + * per component negation) and the constants -1, 0, or 1. + */ +bool +is_extended_swizzle(ir_expression *ir) +{ + /* Track any variables that are accessed by this expression. + */ + ir_variable *var = NULL; + + assert(ir->operation == ir_quadop_vector); + + for (unsigned i = 0; i < ir->type->vector_elements; i++) { + ir_rvalue *op = ir->operands[i]; + + while (op != NULL) { + switch (op->ir_type) { + case ir_type_constant: { + const ir_constant *const c = op->as_constant(); + + if (!c->is_one() && !c->is_zero() && !c->is_negative_one()) + return false; + + op = NULL; + break; + } + + case ir_type_dereference_variable: { + ir_dereference_variable *const d = (ir_dereference_variable *) op; + + if ((var != NULL) && (var != d->var)) + return false; + + var = d->var; + op = NULL; + break; + } + + case ir_type_expression: { + ir_expression *const ex = (ir_expression *) op; + + if (ex->operation != ir_unop_neg) + return false; + + op = ex->operands[0]; + break; + } + + case ir_type_swizzle: + op = ((ir_swizzle *) op)->val; + break; + + default: + return false; + } + } + } + + return true; +} + +void +lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if ((expr == NULL) || (expr->operation != ir_quadop_vector)) + return; + + if (this->dont_lower_swz && is_extended_swizzle(expr)) + return; + + /* FINISHME: Is this the right thing to use for the ralloc context? + */ + void *const mem_ctx = expr; + + assert(expr->type->vector_elements == expr->get_num_operands()); + + /* Generate a temporary with the same type as the ir_quadop_operation. + */ + ir_variable *const temp = + new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary); + + this->base_ir->insert_before(temp); + + /* Counter of the number of components collected so far. + */ + unsigned assigned; + + /* Write-mask in the destination that receives counted by 'assigned'. + */ + unsigned write_mask; + + + /* Generate upto four assignments to that variable. Try to group component + * assignments together: + * + * - All constant components can be assigned at once. + * - All assigments of components from a single variable with the same + * unary operator can be assigned at once. + */ + ir_constant_data d = { { 0 } }; + + assigned = 0; + write_mask = 0; + for (unsigned i = 0; i < expr->type->vector_elements; i++) { + const ir_constant *const c = expr->operands[i]->as_constant(); + + if (c == NULL) + continue; + + switch (expr->type->base_type) { + case GLSL_TYPE_UINT: d.u[assigned] = c->value.u[0]; break; + case GLSL_TYPE_INT: d.i[assigned] = c->value.i[0]; break; + case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break; + case GLSL_TYPE_BOOL: d.b[assigned] = c->value.b[0]; break; + default: assert(!"Should not get here."); break; + } + + write_mask |= (1U << i); + assigned++; + } + + assert((write_mask == 0) == (assigned == 0)); + + /* If there were constant values, generate an assignment. + */ + if (assigned > 0) { + ir_constant *const c = + new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type, + assigned, 1), + &d); + ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); + ir_assignment *const assign = + new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask); + + this->base_ir->insert_before(assign); + } + + /* FINISHME: This should try to coalesce assignments. + */ + for (unsigned i = 0; i < expr->type->vector_elements; i++) { + if (expr->operands[i]->ir_type == ir_type_constant) + continue; + + ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); + ir_assignment *const assign = + new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i)); + + this->base_ir->insert_before(assign); + assigned++; + } + + assert(assigned == expr->type->vector_elements); + + *rvalue = new(mem_ctx) ir_dereference_variable(temp); + this->progress = true; +} + +bool +lower_quadop_vector(exec_list *instructions, bool dont_lower_swz) +{ + lower_vector_visitor v; + + v.dont_lower_swz = dont_lower_swz; + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vector_derefs.cpp b/src/compiler/glsl/lower_vector_derefs.cpp new file mode 100644 index 00000000000..4a5d6f0da4c --- /dev/null +++ b/src/compiler/glsl/lower_vector_derefs.cpp @@ -0,0 +1,104 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "ir.h" +#include "ir_builder.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" + +using namespace ir_builder; + +namespace { + +class vector_deref_visitor : public ir_rvalue_enter_visitor { +public: + vector_deref_visitor() + : progress(false) + { + } + + virtual ~vector_deref_visitor() + { + } + + virtual void handle_rvalue(ir_rvalue **rv); + virtual ir_visitor_status visit_enter(ir_assignment *ir); + + bool progress; +}; + +} /* anonymous namespace */ + +ir_visitor_status +vector_deref_visitor::visit_enter(ir_assignment *ir) +{ + if (!ir->lhs || ir->lhs->ir_type != ir_type_dereference_array) + return ir_rvalue_enter_visitor::visit_enter(ir); + + ir_dereference_array *const deref = (ir_dereference_array *) ir->lhs; + if (!deref->array->type->is_vector()) + return ir_rvalue_enter_visitor::visit_enter(ir); + + ir_dereference *const new_lhs = (ir_dereference *) deref->array; + ir->set_lhs(new_lhs); + + ir_constant *old_index_constant = deref->array_index->constant_expression_value(); + void *mem_ctx = ralloc_parent(ir); + if (!old_index_constant) { + ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, + new_lhs->type, + new_lhs->clone(mem_ctx, NULL), + ir->rhs, + deref->array_index); + ir->write_mask = (1 << new_lhs->type->vector_elements) - 1; + } else { + ir->write_mask = 1 << old_index_constant->get_int_component(0); + } + + return ir_rvalue_enter_visitor::visit_enter(ir); +} + +void +vector_deref_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL || (*rv)->ir_type != ir_type_dereference_array) + return; + + ir_dereference_array *const deref = (ir_dereference_array *) *rv; + if (!deref->array->type->is_vector()) + return; + + void *mem_ctx = ralloc_parent(deref); + *rv = new(mem_ctx) ir_expression(ir_binop_vector_extract, + deref->array, + deref->array_index); +} + +bool +lower_vector_derefs(gl_shader *shader) +{ + vector_deref_visitor v; + + visit_list_elements(&v, shader->ir); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vector_insert.cpp b/src/compiler/glsl/lower_vector_insert.cpp new file mode 100644 index 00000000000..26d31b03c12 --- /dev/null +++ b/src/compiler/glsl/lower_vector_insert.cpp @@ -0,0 +1,146 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include "ir.h" +#include "ir_builder.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" + +using namespace ir_builder; + +namespace { + +class vector_insert_visitor : public ir_rvalue_visitor { +public: + vector_insert_visitor(bool lower_nonconstant_index) + : progress(false), lower_nonconstant_index(lower_nonconstant_index) + { + factory.instructions = &factory_instructions; + } + + virtual ~vector_insert_visitor() + { + assert(factory_instructions.is_empty()); + } + + virtual void handle_rvalue(ir_rvalue **rv); + + ir_factory factory; + exec_list factory_instructions; + bool progress; + bool lower_nonconstant_index; +}; + +} /* anonymous namespace */ + +void +vector_insert_visitor::handle_rvalue(ir_rvalue **rv) +{ + if (*rv == NULL || (*rv)->ir_type != ir_type_expression) + return; + + ir_expression *const expr = (ir_expression *) *rv; + + if (likely(expr->operation != ir_triop_vector_insert)) + return; + + factory.mem_ctx = ralloc_parent(expr); + + ir_constant *const idx = expr->operands[2]->constant_expression_value(); + if (idx != NULL) { + /* Replace (vector_insert (vec) (scalar) (index)) with a dereference of + * a new temporary. The new temporary gets assigned as + * + * t = vec + * t.mask = scalar + * + * where mask is the component selected by index. + */ + ir_variable *const temp = + factory.make_temp(expr->operands[0]->type, "vec_tmp"); + + const int mask = 1 << idx->value.i[0]; + + factory.emit(assign(temp, expr->operands[0])); + factory.emit(assign(temp, expr->operands[1], mask)); + + this->progress = true; + *rv = new(factory.mem_ctx) ir_dereference_variable(temp); + } else if (this->lower_nonconstant_index) { + /* Replace (vector_insert (vec) (scalar) (index)) with a dereference of + * a new temporary. The new temporary gets assigned as + * + * t = vec + * if (index == 0) + * t.x = scalar + * if (index == 1) + * t.y = scalar + * if (index == 2) + * t.z = scalar + * if (index == 3) + * t.w = scalar + */ + ir_variable *const temp = + factory.make_temp(expr->operands[0]->type, "vec_tmp"); + + ir_variable *const src_temp = + factory.make_temp(expr->operands[1]->type, "src_temp"); + + factory.emit(assign(temp, expr->operands[0])); + factory.emit(assign(src_temp, expr->operands[1])); + + assert(expr->operands[2]->type == glsl_type::int_type || + expr->operands[2]->type == glsl_type::uint_type); + + for (unsigned i = 0; i < expr->type->vector_elements; i++) { + ir_constant *const cmp_index = + ir_constant::zero(factory.mem_ctx, expr->operands[2]->type); + cmp_index->value.u[0] = i; + + ir_variable *const cmp_result = + factory.make_temp(glsl_type::bool_type, "index_condition"); + + factory.emit(assign(cmp_result, + equal(expr->operands[2]->clone(factory.mem_ctx, + NULL), + cmp_index))); + + factory.emit(if_tree(cmp_result, + assign(temp, src_temp, WRITEMASK_X << i))); + } + + this->progress = true; + *rv = new(factory.mem_ctx) ir_dereference_variable(temp); + } + + base_ir->insert_before(factory.instructions); +} + +bool +lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index) +{ + vector_insert_visitor v(lower_nonconstant_index); + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/lower_vertex_id.cpp b/src/compiler/glsl/lower_vertex_id.cpp new file mode 100644 index 00000000000..3da7a2f1b3b --- /dev/null +++ b/src/compiler/glsl/lower_vertex_id.cpp @@ -0,0 +1,144 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file lower_vertex_id.cpp + * + * There exists hardware, such as i965, that does not implement the OpenGL + * semantic for gl_VertexID. Instead, that hardware does not include the + * value of basevertex in the gl_VertexID value. To implement the OpenGL + * semantic, we'll have to convert gl_Vertex_ID to + * gl_VertexIDMESA+gl_BaseVertexMESA. + */ + +#include "glsl_symbol_table.h" +#include "ir_hierarchical_visitor.h" +#include "ir.h" +#include "ir_builder.h" +#include "linker.h" +#include "program/prog_statevars.h" + +namespace { + +class lower_vertex_id_visitor : public ir_hierarchical_visitor { +public: + explicit lower_vertex_id_visitor(ir_function_signature *main_sig, + exec_list *ir_list) + : progress(false), VertexID(NULL), gl_VertexID(NULL), + gl_BaseVertex(NULL), main_sig(main_sig), ir_list(ir_list) + { + foreach_in_list(ir_instruction, ir, ir_list) { + ir_variable *const var = ir->as_variable(); + + if (var != NULL && var->data.mode == ir_var_system_value && + var->data.location == SYSTEM_VALUE_BASE_VERTEX) { + gl_BaseVertex = var; + break; + } + } + } + + virtual ir_visitor_status visit(ir_dereference_variable *); + + bool progress; + +private: + ir_variable *VertexID; + ir_variable *gl_VertexID; + ir_variable *gl_BaseVertex; + + ir_function_signature *main_sig; + exec_list *ir_list; +}; + +} /* anonymous namespace */ + +ir_visitor_status +lower_vertex_id_visitor::visit(ir_dereference_variable *ir) +{ + if (ir->var->data.mode != ir_var_system_value || + ir->var->data.location != SYSTEM_VALUE_VERTEX_ID) + return visit_continue; + + if (VertexID == NULL) { + const glsl_type *const int_t = glsl_type::int_type; + void *const mem_ctx = ralloc_parent(ir); + + VertexID = new(mem_ctx) ir_variable(int_t, "__VertexID", + ir_var_temporary); + ir_list->push_head(VertexID); + + gl_VertexID = new(mem_ctx) ir_variable(int_t, "gl_VertexIDMESA", + ir_var_system_value); + gl_VertexID->data.how_declared = ir_var_declared_implicitly; + gl_VertexID->data.read_only = true; + gl_VertexID->data.location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + gl_VertexID->data.explicit_location = true; + gl_VertexID->data.explicit_index = 0; + ir_list->push_head(gl_VertexID); + + if (gl_BaseVertex == NULL) { + gl_BaseVertex = new(mem_ctx) ir_variable(int_t, "gl_BaseVertex", + ir_var_system_value); + gl_BaseVertex->data.how_declared = ir_var_declared_implicitly; + gl_BaseVertex->data.read_only = true; + gl_BaseVertex->data.location = SYSTEM_VALUE_BASE_VERTEX; + gl_BaseVertex->data.explicit_location = true; + gl_BaseVertex->data.explicit_index = 0; + ir_list->push_head(gl_BaseVertex); + } + + ir_instruction *const inst = + ir_builder::assign(VertexID, + ir_builder::add(gl_VertexID, gl_BaseVertex)); + + main_sig->body.push_head(inst); + } + + ir->var = VertexID; + progress = true; + + return visit_continue; +} + +bool +lower_vertex_id(gl_shader *shader) +{ + /* gl_VertexID only exists in the vertex shader. + */ + if (shader->Stage != MESA_SHADER_VERTEX) + return false; + + ir_function_signature *const main_sig = + _mesa_get_main_function_signature(shader); + if (main_sig == NULL) { + assert(main_sig != NULL); + return false; + } + + lower_vertex_id_visitor v(main_sig, shader->ir); + + v.run(shader->ir); + + return v.progress; +} diff --git a/src/compiler/glsl/main.cpp b/src/compiler/glsl/main.cpp new file mode 100644 index 00000000000..df93a013ede --- /dev/null +++ b/src/compiler/glsl/main.cpp @@ -0,0 +1,431 @@ +/* + * Copyright © 2008, 2009 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include + +/** @file main.cpp + * + * This file is the main() routine and scaffolding for producing + * builtin_compiler (which doesn't include builtins itself and is used + * to generate the profile information for builtin_function.cpp), and + * for glsl_compiler (which does include builtins and can be used to + * offline compile GLSL code and examine the resulting GLSL IR. + */ + +#include "ast.h" +#include "glsl_parser_extras.h" +#include "ir_optimization.h" +#include "program.h" +#include "program/hash_table.h" +#include "loop_analysis.h" +#include "standalone_scaffolding.h" + +static int glsl_version = 330; + +static void +initialize_context(struct gl_context *ctx, gl_api api) +{ + initialize_context_to_defaults(ctx, api); + + /* The standalone compiler needs to claim support for almost + * everything in order to compile the built-in functions. + */ + ctx->Const.GLSLVersion = glsl_version; + ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Const.MaxComputeWorkGroupCount[0] = 65535; + ctx->Const.MaxComputeWorkGroupCount[1] = 65535; + ctx->Const.MaxComputeWorkGroupCount[2] = 65535; + ctx->Const.MaxComputeWorkGroupSize[0] = 1024; + ctx->Const.MaxComputeWorkGroupSize[1] = 1024; + ctx->Const.MaxComputeWorkGroupSize[2] = 64; + ctx->Const.MaxComputeWorkGroupInvocations = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ + + switch (ctx->Const.GLSLVersion) { + case 100: + ctx->Const.MaxClipPlanes = 0; + ctx->Const.MaxCombinedTextureImageUnits = 8; + ctx->Const.MaxDrawBuffers = 2; + ctx->Const.MinProgramTexelOffset = 0; + ctx->Const.MaxProgramTexelOffset = 0; + ctx->Const.MaxLights = 0; + ctx->Const.MaxTextureCoordUnits = 0; + ctx->Const.MaxTextureUnits = 8; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 8; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 128 * 4; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = + ctx->Const.MaxCombinedTextureImageUnits; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 16 * 4; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4; + break; + case 110: + case 120: + ctx->Const.MaxClipPlanes = 6; + ctx->Const.MaxCombinedTextureImageUnits = 2; + ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MinProgramTexelOffset = 0; + ctx->Const.MaxProgramTexelOffset = 0; + ctx->Const.MaxLights = 8; + ctx->Const.MaxTextureCoordUnits = 2; + ctx->Const.MaxTextureUnits = 2; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = + ctx->Const.MaxCombinedTextureImageUnits; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4; + break; + case 130: + case 140: + ctx->Const.MaxClipPlanes = 8; + ctx->Const.MaxCombinedTextureImageUnits = 16; + ctx->Const.MaxDrawBuffers = 8; + ctx->Const.MinProgramTexelOffset = -8; + ctx->Const.MaxProgramTexelOffset = 7; + ctx->Const.MaxLights = 8; + ctx->Const.MaxTextureCoordUnits = 8; + ctx->Const.MaxTextureUnits = 2; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4; + break; + case 150: + case 330: + ctx->Const.MaxClipPlanes = 8; + ctx->Const.MaxDrawBuffers = 8; + ctx->Const.MinProgramTexelOffset = -8; + ctx->Const.MaxProgramTexelOffset = 7; + ctx->Const.MaxLights = 8; + ctx->Const.MaxTextureCoordUnits = 8; + ctx->Const.MaxTextureUnits = 2; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64; + + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxCombinedTextureImageUnits = + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits + + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; + + ctx->Const.MaxGeometryOutputVertices = 256; + ctx->Const.MaxGeometryTotalOutputComponents = 1024; + + ctx->Const.MaxVarying = 60 / 4; + break; + case 300: + ctx->Const.MaxClipPlanes = 8; + ctx->Const.MaxCombinedTextureImageUnits = 32; + ctx->Const.MaxDrawBuffers = 4; + ctx->Const.MinProgramTexelOffset = -8; + ctx->Const.MaxProgramTexelOffset = 7; + ctx->Const.MaxLights = 0; + ctx->Const.MaxTextureCoordUnits = 0; + ctx->Const.MaxTextureUnits = 0; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 16 * 4; + + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 224; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 15 * 4; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ + + ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents / 4; + break; + } + + ctx->Const.GenerateTemporaryNames = true; + ctx->Const.MaxPatchVertices = 32; + + ctx->Driver.NewShader = _mesa_new_shader; +} + +/* Returned string will have 'ctx' as its ralloc owner. */ +static char * +load_text_file(void *ctx, const char *file_name) +{ + char *text = NULL; + size_t size; + size_t total_read = 0; + FILE *fp = fopen(file_name, "rb"); + + if (!fp) { + return NULL; + } + + fseek(fp, 0L, SEEK_END); + size = ftell(fp); + fseek(fp, 0L, SEEK_SET); + + text = (char *) ralloc_size(ctx, size + 1); + if (text != NULL) { + do { + size_t bytes = fread(text + total_read, + 1, size - total_read, fp); + if (bytes < size - total_read) { + free(text); + text = NULL; + goto error; + } + + if (bytes == 0) { + break; + } + + total_read += bytes; + } while (total_read < size); + + text[total_read] = '\0'; +error:; + } + + fclose(fp); + + return text; +} + +int dump_ast = 0; +int dump_hir = 0; +int dump_lir = 0; +int do_link = 0; + +const struct option compiler_opts[] = { + { "dump-ast", no_argument, &dump_ast, 1 }, + { "dump-hir", no_argument, &dump_hir, 1 }, + { "dump-lir", no_argument, &dump_lir, 1 }, + { "link", no_argument, &do_link, 1 }, + { "version", required_argument, NULL, 'v' }, + { NULL, 0, NULL, 0 } +}; + +/** + * \brief Print proper usage and exit with failure. + */ +void +usage_fail(const char *name) +{ + + const char *header = + "usage: %s [options] \n" + "\n" + "Possible options are:\n"; + printf(header, name); + for (const struct option *o = compiler_opts; o->name != 0; ++o) { + printf(" --%s\n", o->name); + } + exit(EXIT_FAILURE); +} + + +void +compile_shader(struct gl_context *ctx, struct gl_shader *shader) +{ + struct _mesa_glsl_parse_state *state = + new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); + + _mesa_glsl_compile_shader(ctx, shader, dump_ast, dump_hir); + + /* Print out the resulting IR */ + if (!state->error && dump_lir) { + _mesa_print_ir(stdout, shader->ir, state); + } + + return; +} + +int +main(int argc, char **argv) +{ + int status = EXIT_SUCCESS; + struct gl_context local_ctx; + struct gl_context *ctx = &local_ctx; + bool glsl_es = false; + + int c; + int idx = 0; + while ((c = getopt_long(argc, argv, "", compiler_opts, &idx)) != -1) { + switch (c) { + case 'v': + glsl_version = strtol(optarg, NULL, 10); + switch (glsl_version) { + case 100: + case 300: + glsl_es = true; + break; + case 110: + case 120: + case 130: + case 140: + case 150: + case 330: + glsl_es = false; + break; + default: + fprintf(stderr, "Unrecognized GLSL version `%s'\n", optarg); + usage_fail(argv[0]); + break; + } + break; + default: + break; + } + } + + + if (argc <= optind) + usage_fail(argv[0]); + + initialize_context(ctx, (glsl_es) ? API_OPENGLES2 : API_OPENGL_COMPAT); + + struct gl_shader_program *whole_program; + + whole_program = rzalloc (NULL, struct gl_shader_program); + assert(whole_program != NULL); + whole_program->InfoLog = ralloc_strdup(whole_program, ""); + + /* Created just to avoid segmentation faults */ + whole_program->AttributeBindings = new string_to_uint_map; + whole_program->FragDataBindings = new string_to_uint_map; + whole_program->FragDataIndexBindings = new string_to_uint_map; + + for (/* empty */; argc > optind; optind++) { + whole_program->Shaders = + reralloc(whole_program, whole_program->Shaders, + struct gl_shader *, whole_program->NumShaders + 1); + assert(whole_program->Shaders != NULL); + + struct gl_shader *shader = rzalloc(whole_program, gl_shader); + + whole_program->Shaders[whole_program->NumShaders] = shader; + whole_program->NumShaders++; + + const unsigned len = strlen(argv[optind]); + if (len < 6) + usage_fail(argv[0]); + + const char *const ext = & argv[optind][len - 5]; + if (strncmp(".vert", ext, 5) == 0 || strncmp(".glsl", ext, 5) == 0) + shader->Type = GL_VERTEX_SHADER; + else if (strncmp(".tesc", ext, 5) == 0) + shader->Type = GL_TESS_CONTROL_SHADER; + else if (strncmp(".tese", ext, 5) == 0) + shader->Type = GL_TESS_EVALUATION_SHADER; + else if (strncmp(".geom", ext, 5) == 0) + shader->Type = GL_GEOMETRY_SHADER; + else if (strncmp(".frag", ext, 5) == 0) + shader->Type = GL_FRAGMENT_SHADER; + else if (strncmp(".comp", ext, 5) == 0) + shader->Type = GL_COMPUTE_SHADER; + else + usage_fail(argv[0]); + shader->Stage = _mesa_shader_enum_to_shader_stage(shader->Type); + + shader->Source = load_text_file(whole_program, argv[optind]); + if (shader->Source == NULL) { + printf("File \"%s\" does not exist.\n", argv[optind]); + exit(EXIT_FAILURE); + } + + compile_shader(ctx, shader); + + if (strlen(shader->InfoLog) > 0) + printf("Info log for %s:\n%s\n", argv[optind], shader->InfoLog); + + if (!shader->CompileStatus) { + status = EXIT_FAILURE; + break; + } + } + + if ((status == EXIT_SUCCESS) && do_link) { + _mesa_clear_shader_program_data(whole_program); + + link_shaders(ctx, whole_program); + status = (whole_program->LinkStatus) ? EXIT_SUCCESS : EXIT_FAILURE; + + if (strlen(whole_program->InfoLog) > 0) + printf("Info log for linking:\n%s\n", whole_program->InfoLog); + } + + for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) + ralloc_free(whole_program->_LinkedShaders[i]); + + delete whole_program->AttributeBindings; + delete whole_program->FragDataBindings; + delete whole_program->FragDataIndexBindings; + + ralloc_free(whole_program); + _mesa_glsl_release_types(); + _mesa_glsl_release_builtin_functions(); + + return status; +} diff --git a/src/compiler/glsl/opt_algebraic.cpp b/src/compiler/glsl/opt_algebraic.cpp new file mode 100644 index 00000000000..1e58062cb0d --- /dev/null +++ b/src/compiler/glsl/opt_algebraic.cpp @@ -0,0 +1,984 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_algebraic.cpp + * + * Takes advantage of association, commutivity, and other algebraic + * properties to simplify expressions. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "ir_builder.h" +#include "compiler/glsl_types.h" + +using namespace ir_builder; + +namespace { + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +class ir_algebraic_visitor : public ir_rvalue_visitor { +public: + ir_algebraic_visitor(bool native_integers, + const struct gl_shader_compiler_options *options) + : options(options) + { + this->progress = false; + this->mem_ctx = NULL; + this->native_integers = native_integers; + } + + virtual ~ir_algebraic_visitor() + { + } + + ir_rvalue *handle_expression(ir_expression *ir); + void handle_rvalue(ir_rvalue **rvalue); + bool reassociate_constant(ir_expression *ir1, + int const_index, + ir_constant *constant, + ir_expression *ir2); + void reassociate_operands(ir_expression *ir1, + int op1, + ir_expression *ir2, + int op2); + ir_rvalue *swizzle_if_required(ir_expression *expr, + ir_rvalue *operand); + + const struct gl_shader_compiler_options *options; + void *mem_ctx; + + bool native_integers; + bool progress; +}; + +} /* unnamed namespace */ + +static inline bool +is_vec_zero(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_zero(); +} + +static inline bool +is_vec_one(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_one(); +} + +static inline bool +is_vec_two(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_value(2.0, 2); +} + +static inline bool +is_vec_four(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_value(4.0, 4); +} + +static inline bool +is_vec_negative_one(ir_constant *ir) +{ + return (ir == NULL) ? false : ir->is_negative_one(); +} + +static inline bool +is_valid_vec_const(ir_constant *ir) +{ + if (ir == NULL) + return false; + + if (!ir->type->is_scalar() && !ir->type->is_vector()) + return false; + + return true; +} + +static inline bool +is_less_than_one(ir_constant *ir) +{ + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + + if (!is_valid_vec_const(ir)) + return false; + + unsigned component = 0; + for (int c = 0; c < ir->type->vector_elements; c++) { + if (ir->get_float_component(c) < 1.0f) + component++; + } + + return (component == ir->type->vector_elements); +} + +static inline bool +is_greater_than_zero(ir_constant *ir) +{ + assert(ir->type->base_type == GLSL_TYPE_FLOAT); + + if (!is_valid_vec_const(ir)) + return false; + + unsigned component = 0; + for (int c = 0; c < ir->type->vector_elements; c++) { + if (ir->get_float_component(c) > 0.0f) + component++; + } + + return (component == ir->type->vector_elements); +} + +static void +update_type(ir_expression *ir) +{ + if (ir->operands[0]->type->is_vector()) + ir->type = ir->operands[0]->type; + else + ir->type = ir->operands[1]->type; +} + +/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ +static ir_expression * +try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx) +{ + if (expr0 && expr0->operation == ir_binop_add && + expr0->type->is_float() && + expr1 && expr1->operation == ir_binop_add && + expr1->type->is_float()) { + ir_swizzle *x = expr0->operands[0]->as_swizzle(); + ir_swizzle *y = expr0->operands[1]->as_swizzle(); + ir_swizzle *z = expr1->operands[0]->as_swizzle(); + ir_swizzle *w = expr1->operands[1]->as_swizzle(); + + if (!x || x->mask.num_components != 1 || + !y || y->mask.num_components != 1 || + !z || z->mask.num_components != 1 || + !w || w->mask.num_components != 1) { + return NULL; + } + + bool swiz_seen[4] = {false, false, false, false}; + swiz_seen[x->mask.x] = true; + swiz_seen[y->mask.x] = true; + swiz_seen[z->mask.x] = true; + swiz_seen[w->mask.x] = true; + + if (!swiz_seen[0] || !swiz_seen[1] || + !swiz_seen[2] || !swiz_seen[3]) { + return NULL; + } + + if (x->val->equals(y->val) && + x->val->equals(z->val) && + x->val->equals(w->val)) { + return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); + } + } + return NULL; +} + +void +ir_algebraic_visitor::reassociate_operands(ir_expression *ir1, + int op1, + ir_expression *ir2, + int op2) +{ + ir_rvalue *temp = ir2->operands[op2]; + ir2->operands[op2] = ir1->operands[op1]; + ir1->operands[op1] = temp; + + /* Update the type of ir2. The type of ir1 won't have changed -- + * base types matched, and at least one of the operands of the 2 + * binops is still a vector if any of them were. + */ + update_type(ir2); + + this->progress = true; +} + +/** + * Reassociates a constant down a tree of adds or multiplies. + * + * Consider (2 * (a * (b * 0.5))). We want to send up with a * b. + */ +bool +ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index, + ir_constant *constant, + ir_expression *ir2) +{ + if (!ir2 || ir1->operation != ir2->operation) + return false; + + /* Don't want to even think about matrices. */ + if (ir1->operands[0]->type->is_matrix() || + ir1->operands[1]->type->is_matrix() || + ir2->operands[0]->type->is_matrix() || + ir2->operands[1]->type->is_matrix()) + return false; + + ir_constant *ir2_const[2]; + ir2_const[0] = ir2->operands[0]->constant_expression_value(); + ir2_const[1] = ir2->operands[1]->constant_expression_value(); + + if (ir2_const[0] && ir2_const[1]) + return false; + + if (ir2_const[0]) { + reassociate_operands(ir1, const_index, ir2, 1); + return true; + } else if (ir2_const[1]) { + reassociate_operands(ir1, const_index, ir2, 0); + return true; + } + + if (reassociate_constant(ir1, const_index, constant, + ir2->operands[0]->as_expression())) { + update_type(ir2); + return true; + } + + if (reassociate_constant(ir1, const_index, constant, + ir2->operands[1]->as_expression())) { + update_type(ir2); + return true; + } + + return false; +} + +/* When eliminating an expression and just returning one of its operands, + * we may need to swizzle that operand out to a vector if the expression was + * vector type. + */ +ir_rvalue * +ir_algebraic_visitor::swizzle_if_required(ir_expression *expr, + ir_rvalue *operand) +{ + if (expr->type->is_vector() && operand->type->is_scalar()) { + return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0, + expr->type->vector_elements); + } else + return operand; +} + +ir_rvalue * +ir_algebraic_visitor::handle_expression(ir_expression *ir) +{ + ir_constant *op_const[4] = {NULL, NULL, NULL, NULL}; + ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL}; + unsigned int i; + + if (ir->operation == ir_binop_mul && + ir->operands[0]->type->is_matrix() && + ir->operands[1]->type->is_vector()) { + ir_expression *matrix_mul = ir->operands[0]->as_expression(); + + if (matrix_mul && matrix_mul->operation == ir_binop_mul && + matrix_mul->operands[0]->type->is_matrix() && + matrix_mul->operands[1]->type->is_matrix()) { + + return mul(matrix_mul->operands[0], + mul(matrix_mul->operands[1], ir->operands[1])); + } + } + + assert(ir->get_num_operands() <= 4); + for (i = 0; i < ir->get_num_operands(); i++) { + if (ir->operands[i]->type->is_matrix()) + return ir; + + op_const[i] = ir->operands[i]->constant_expression_value(); + op_expr[i] = ir->operands[i]->as_expression(); + } + + if (this->mem_ctx == NULL) + this->mem_ctx = ralloc_parent(ir); + + switch (ir->operation) { + case ir_unop_bit_not: + if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not) + return op_expr[0]->operands[0]; + break; + + case ir_unop_abs: + if (op_expr[0] == NULL) + break; + + switch (op_expr[0]->operation) { + case ir_unop_abs: + case ir_unop_neg: + return abs(op_expr[0]->operands[0]); + default: + break; + } + break; + + case ir_unop_neg: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_neg) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_exp: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_log) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_log: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_exp) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_exp2: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_log2) { + return op_expr[0]->operands[0]; + } + + if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) { + for (int log2_pos = 0; log2_pos < 2; log2_pos++) { + ir_expression *log2_expr = + op_expr[0]->operands[log2_pos]->as_expression(); + + if (log2_expr && log2_expr->operation == ir_unop_log2) { + return new(mem_ctx) ir_expression(ir_binop_pow, + ir->type, + log2_expr->operands[0], + op_expr[0]->operands[1 - log2_pos]); + } + } + } + break; + + case ir_unop_log2: + if (op_expr[0] == NULL) + break; + + if (op_expr[0]->operation == ir_unop_exp2) { + return op_expr[0]->operands[0]; + } + break; + + case ir_unop_f2i: + case ir_unop_f2u: + if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) { + return new(mem_ctx) ir_expression(ir->operation, + ir->type, + op_expr[0]->operands[0]); + } + break; + + case ir_unop_logic_not: { + enum ir_expression_operation new_op = ir_unop_logic_not; + + if (op_expr[0] == NULL) + break; + + switch (op_expr[0]->operation) { + case ir_binop_less: new_op = ir_binop_gequal; break; + case ir_binop_greater: new_op = ir_binop_lequal; break; + case ir_binop_lequal: new_op = ir_binop_greater; break; + case ir_binop_gequal: new_op = ir_binop_less; break; + case ir_binop_equal: new_op = ir_binop_nequal; break; + case ir_binop_nequal: new_op = ir_binop_equal; break; + case ir_binop_all_equal: new_op = ir_binop_any_nequal; break; + case ir_binop_any_nequal: new_op = ir_binop_all_equal; break; + + default: + /* The default case handler is here to silence a warning from GCC. + */ + break; + } + + if (new_op != ir_unop_logic_not) { + return new(mem_ctx) ir_expression(new_op, + ir->type, + op_expr[0]->operands[0], + op_expr[0]->operands[1]); + } + + break; + } + + case ir_unop_saturate: + if (op_expr[0] && op_expr[0]->operation == ir_binop_add) { + ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression(); + ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression(); + + if (b2f_0 && b2f_0->operation == ir_unop_b2f && + b2f_1 && b2f_1->operation == ir_unop_b2f) { + return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0])); + } + } + break; + + case ir_binop_add: + if (is_vec_zero(op_const[0])) + return ir->operands[1]; + if (is_vec_zero(op_const[1])) + return ir->operands[0]; + + /* Reassociate addition of constants so that we can do constant + * folding. + */ + if (op_const[0] && !op_const[1]) + reassociate_constant(ir, 0, op_const[0], op_expr[1]); + if (op_const[1] && !op_const[0]) + reassociate_constant(ir, 1, op_const[1], op_expr[0]); + + /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ + if (options->OptimizeForAOS) { + ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], + mem_ctx); + if (expr) + return expr; + } + + /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). + * + * (-x + y) * a + x + * (x * -a) + (y * a) + x + * x + (x * -a) + (y * a) + * x * (1 - a) + y * a + * lrp(x, y, a) + */ + for (int mul_pos = 0; mul_pos < 2; mul_pos++) { + ir_expression *mul = op_expr[mul_pos]; + + if (!mul || mul->operation != ir_binop_mul) + continue; + + /* Multiply found on one of the operands. Now check for an + * inner addition operation. + */ + for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) { + ir_expression *inner_add = + mul->operands[inner_add_pos]->as_expression(); + + if (!inner_add || inner_add->operation != ir_binop_add) + continue; + + /* Inner addition found on one of the operands. Now check for + * one of the operands of the inner addition to be the negative + * of x_operand. + */ + for (int neg_pos = 0; neg_pos < 2; neg_pos++) { + ir_expression *neg = + inner_add->operands[neg_pos]->as_expression(); + + if (!neg || neg->operation != ir_unop_neg) + continue; + + ir_rvalue *x_operand = ir->operands[1 - mul_pos]; + + if (!neg->operands[0]->equals(x_operand)) + continue; + + ir_rvalue *y_operand = inner_add->operands[1 - neg_pos]; + ir_rvalue *a_operand = mul->operands[1 - inner_add_pos]; + + if (x_operand->type != y_operand->type || + x_operand->type != a_operand->type) + continue; + + return lrp(x_operand, y_operand, a_operand); + } + } + } + + break; + + case ir_binop_sub: + if (is_vec_zero(op_const[0])) + return neg(ir->operands[1]); + if (is_vec_zero(op_const[1])) + return ir->operands[0]; + break; + + case ir_binop_mul: + if (is_vec_one(op_const[0])) + return ir->operands[1]; + if (is_vec_one(op_const[1])) + return ir->operands[0]; + + if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) + return ir_constant::zero(ir, ir->type); + + if (is_vec_negative_one(op_const[0])) + return neg(ir->operands[1]); + if (is_vec_negative_one(op_const[1])) + return neg(ir->operands[0]); + + if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f && + op_expr[1] && op_expr[1]->operation == ir_unop_b2f) { + return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0])); + } + + /* Reassociate multiplication of constants so that we can do + * constant folding. + */ + if (op_const[0] && !op_const[1]) + reassociate_constant(ir, 0, op_const[0], op_expr[1]); + if (op_const[1] && !op_const[0]) + reassociate_constant(ir, 1, op_const[1], op_expr[0]); + + /* Optimizes + * + * (mul (floor (add (abs x) 0.5) (sign x))) + * + * into + * + * (trunc (add x (mul (sign x) 0.5))) + */ + for (int i = 0; i < 2; i++) { + ir_expression *sign_expr = ir->operands[i]->as_expression(); + ir_expression *floor_expr = ir->operands[1 - i]->as_expression(); + + if (!sign_expr || sign_expr->operation != ir_unop_sign || + !floor_expr || floor_expr->operation != ir_unop_floor) + continue; + + ir_expression *add_expr = floor_expr->operands[0]->as_expression(); + if (!add_expr || add_expr->operation != ir_binop_add) + continue; + + for (int j = 0; j < 2; j++) { + ir_expression *abs_expr = add_expr->operands[j]->as_expression(); + if (!abs_expr || abs_expr->operation != ir_unop_abs) + continue; + + ir_constant *point_five = add_expr->operands[1 - j]->as_constant(); + if (!point_five || !point_five->is_value(0.5, 0)) + continue; + + if (abs_expr->operands[0]->equals(sign_expr->operands[0])) { + return trunc(add(abs_expr->operands[0], + mul(sign_expr, point_five))); + } + } + } + break; + + case ir_binop_div: + if (is_vec_one(op_const[0]) && ( + ir->type->base_type == GLSL_TYPE_FLOAT || + ir->type->base_type == GLSL_TYPE_DOUBLE)) { + return new(mem_ctx) ir_expression(ir_unop_rcp, + ir->operands[1]->type, + ir->operands[1], + NULL); + } + if (is_vec_one(op_const[1])) + return ir->operands[0]; + break; + + case ir_binop_dot: + if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) + return ir_constant::zero(mem_ctx, ir->type); + + for (int i = 0; i < 2; i++) { + if (!op_const[i]) + continue; + + unsigned components[4] = { 0 }, count = 0; + + for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) { + if (op_const[i]->is_zero()) + continue; + + components[count] = c; + count++; + } + + /* No channels had zero values; bail. */ + if (count >= op_const[i]->type->vector_elements) + break; + + ir_expression_operation op = count == 1 ? + ir_binop_mul : ir_binop_dot; + + /* Swizzle both operands to remove the channels that were zero. */ + return new(mem_ctx) + ir_expression(op, ir->type, + new(mem_ctx) ir_swizzle(ir->operands[0], + components, count), + new(mem_ctx) ir_swizzle(ir->operands[1], + components, count)); + } + break; + + case ir_binop_less: + case ir_binop_lequal: + case ir_binop_greater: + case ir_binop_gequal: + case ir_binop_equal: + case ir_binop_nequal: + for (int add_pos = 0; add_pos < 2; add_pos++) { + ir_expression *add = op_expr[add_pos]; + + if (!add || add->operation != ir_binop_add) + continue; + + ir_constant *zero = op_const[1 - add_pos]; + if (!is_vec_zero(zero)) + continue; + + /* Depending of the zero position we want to optimize + * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y) + */ + if (add_pos == 1) { + return new(mem_ctx) ir_expression(ir->operation, + neg(add->operands[0]), + add->operands[1]); + } else { + return new(mem_ctx) ir_expression(ir->operation, + add->operands[0], + neg(add->operands[1])); + } + } + break; + + case ir_binop_all_equal: + case ir_binop_any_nequal: + if (ir->operands[0]->type->is_scalar() && + ir->operands[1]->type->is_scalar()) + return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal + ? ir_binop_equal : ir_binop_nequal, + ir->operands[0], + ir->operands[1]); + break; + + case ir_binop_rshift: + case ir_binop_lshift: + /* 0 >> x == 0 */ + if (is_vec_zero(op_const[0])) + return ir->operands[0]; + /* x >> 0 == x */ + if (is_vec_zero(op_const[1])) + return ir->operands[0]; + break; + + case ir_binop_logic_and: + if (is_vec_one(op_const[0])) { + return ir->operands[1]; + } else if (is_vec_one(op_const[1])) { + return ir->operands[0]; + } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { + return ir_constant::zero(mem_ctx, ir->type); + } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && + op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { + /* De Morgan's Law: + * (not A) and (not B) === not (A or B) + */ + return logic_not(logic_or(op_expr[0]->operands[0], + op_expr[1]->operands[0])); + } else if (ir->operands[0]->equals(ir->operands[1])) { + /* (a && a) == a */ + return ir->operands[0]; + } + break; + + case ir_binop_logic_xor: + if (is_vec_zero(op_const[0])) { + return ir->operands[1]; + } else if (is_vec_zero(op_const[1])) { + return ir->operands[0]; + } else if (is_vec_one(op_const[0])) { + return logic_not(ir->operands[1]); + } else if (is_vec_one(op_const[1])) { + return logic_not(ir->operands[0]); + } else if (ir->operands[0]->equals(ir->operands[1])) { + /* (a ^^ a) == false */ + return ir_constant::zero(mem_ctx, ir->type); + } + break; + + case ir_binop_logic_or: + if (is_vec_zero(op_const[0])) { + return ir->operands[1]; + } else if (is_vec_zero(op_const[1])) { + return ir->operands[0]; + } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) { + ir_constant_data data; + + for (unsigned i = 0; i < 16; i++) + data.b[i] = true; + + return new(mem_ctx) ir_constant(ir->type, &data); + } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && + op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { + /* De Morgan's Law: + * (not A) or (not B) === not (A and B) + */ + return logic_not(logic_and(op_expr[0]->operands[0], + op_expr[1]->operands[0])); + } else if (ir->operands[0]->equals(ir->operands[1])) { + /* (a || a) == a */ + return ir->operands[0]; + } + break; + + case ir_binop_pow: + /* 1^x == 1 */ + if (is_vec_one(op_const[0])) + return op_const[0]; + + /* x^1 == x */ + if (is_vec_one(op_const[1])) + return ir->operands[0]; + + /* pow(2,x) == exp2(x) */ + if (is_vec_two(op_const[0])) + return expr(ir_unop_exp2, ir->operands[1]); + + if (is_vec_two(op_const[1])) { + ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", + ir_var_temporary); + base_ir->insert_before(x); + base_ir->insert_before(assign(x, ir->operands[0])); + return mul(x, x); + } + + if (is_vec_four(op_const[1])) { + ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", + ir_var_temporary); + base_ir->insert_before(x); + base_ir->insert_before(assign(x, ir->operands[0])); + + ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type, + "squared", + ir_var_temporary); + base_ir->insert_before(squared); + base_ir->insert_before(assign(squared, mul(x, x))); + return mul(squared, squared); + } + + break; + + case ir_binop_min: + case ir_binop_max: + if (ir->type->base_type != GLSL_TYPE_FLOAT || options->EmitNoSat) + break; + + /* Replace min(max) operations and its commutative combinations with + * a saturate operation + */ + for (int op = 0; op < 2; op++) { + ir_expression *inner_expr = op_expr[op]; + ir_constant *outer_const = op_const[1 - op]; + ir_expression_operation op_cond = (ir->operation == ir_binop_max) ? + ir_binop_min : ir_binop_max; + + if (!inner_expr || !outer_const || (inner_expr->operation != op_cond)) + continue; + + /* One of these has to be a constant */ + if (!inner_expr->operands[0]->as_constant() && + !inner_expr->operands[1]->as_constant()) + break; + + /* Found a min(max) combination. Now try to see if its operands + * meet our conditions that we can do just a single saturate operation + */ + for (int minmax_op = 0; minmax_op < 2; minmax_op++) { + ir_rvalue *x = inner_expr->operands[minmax_op]; + ir_rvalue *y = inner_expr->operands[1 - minmax_op]; + + ir_constant *inner_const = y->as_constant(); + if (!inner_const) + continue; + + /* min(max(x, 0.0), 1.0) is sat(x) */ + if (ir->operation == ir_binop_min && + inner_const->is_zero() && + outer_const->is_one()) + return saturate(x); + + /* max(min(x, 1.0), 0.0) is sat(x) */ + if (ir->operation == ir_binop_max && + inner_const->is_one() && + outer_const->is_zero()) + return saturate(x); + + /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */ + if (ir->operation == ir_binop_min && + inner_const->is_zero() && + is_less_than_one(outer_const)) + return saturate(expr(ir_binop_min, x, outer_const)); + + /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */ + if (ir->operation == ir_binop_max && + is_less_than_one(inner_const) && + outer_const->is_zero()) + return saturate(expr(ir_binop_min, x, inner_const)); + + /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */ + if (ir->operation == ir_binop_max && + inner_const->is_one() && + is_greater_than_zero(outer_const)) + return saturate(expr(ir_binop_max, x, outer_const)); + + /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */ + if (ir->operation == ir_binop_min && + is_greater_than_zero(inner_const) && + outer_const->is_one()) + return saturate(expr(ir_binop_max, x, inner_const)); + } + } + + break; + + case ir_unop_rcp: + if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) + return op_expr[0]->operands[0]; + + if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 || + op_expr[0]->operation == ir_unop_exp)) { + return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type, + neg(op_expr[0]->operands[0])); + } + + /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at + * its IR level, so we can always apply this transformation. + */ + if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq) + return sqrt(op_expr[0]->operands[0]); + + /* As far as we know, all backends are OK with rsq. */ + if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) { + return rsq(op_expr[0]->operands[0]); + } + + break; + + case ir_triop_fma: + /* Operands are op0 * op1 + op2. */ + if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { + return ir->operands[2]; + } else if (is_vec_zero(op_const[2])) { + return mul(ir->operands[0], ir->operands[1]); + } else if (is_vec_one(op_const[0])) { + return add(ir->operands[1], ir->operands[2]); + } else if (is_vec_one(op_const[1])) { + return add(ir->operands[0], ir->operands[2]); + } + break; + + case ir_triop_lrp: + /* Operands are (x, y, a). */ + if (is_vec_zero(op_const[2])) { + return ir->operands[0]; + } else if (is_vec_one(op_const[2])) { + return ir->operands[1]; + } else if (ir->operands[0]->equals(ir->operands[1])) { + return ir->operands[0]; + } else if (is_vec_zero(op_const[0])) { + return mul(ir->operands[1], ir->operands[2]); + } else if (is_vec_zero(op_const[1])) { + unsigned op2_components = ir->operands[2]->type->vector_elements; + ir_constant *one; + + switch (ir->type->base_type) { + case GLSL_TYPE_FLOAT: + one = new(mem_ctx) ir_constant(1.0f, op2_components); + break; + case GLSL_TYPE_DOUBLE: + one = new(mem_ctx) ir_constant(1.0, op2_components); + break; + default: + one = NULL; + unreachable("unexpected type"); + } + + return mul(ir->operands[0], add(one, neg(ir->operands[2]))); + } + break; + + case ir_triop_csel: + if (is_vec_one(op_const[0])) + return ir->operands[1]; + if (is_vec_zero(op_const[0])) + return ir->operands[2]; + break; + + default: + break; + } + + return ir; +} + +void +ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr || expr->operation == ir_quadop_vector) + return; + + ir_rvalue *new_rvalue = handle_expression(expr); + if (new_rvalue == *rvalue) + return; + + /* If the expr used to be some vec OP scalar returning a vector, and the + * optimization gave us back a scalar, we still need to turn it into a + * vector. + */ + *rvalue = swizzle_if_required(expr, new_rvalue); + + this->progress = true; +} + +bool +do_algebraic(exec_list *instructions, bool native_integers, + const struct gl_shader_compiler_options *options) +{ + ir_algebraic_visitor v(native_integers, options); + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_array_splitting.cpp b/src/compiler/glsl/opt_array_splitting.cpp new file mode 100644 index 00000000000..cceec6b6431 --- /dev/null +++ b/src/compiler/glsl/opt_array_splitting.cpp @@ -0,0 +1,408 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_array_splitting.cpp + * + * If an array is always dereferenced with a constant index, then + * split it apart into its elements, making it more amenable to other + * optimization passes. + * + * This skips uniform/varying arrays, which would need careful + * handling due to their ir->location fields tying them to the GL API + * and other shader stages. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +static bool debug = false; + +namespace { + +namespace opt_array_splitting { + +class variable_entry : public exec_node +{ +public: + variable_entry(ir_variable *var) + { + this->var = var; + this->split = true; + this->declaration = false; + this->components = NULL; + this->mem_ctx = NULL; + if (var->type->is_array()) + this->size = var->type->length; + else + this->size = var->type->matrix_columns; + } + + ir_variable *var; /* The key: the variable's pointer. */ + unsigned size; /* array length or matrix columns */ + + /** Whether this array should be split or not. */ + bool split; + + /* If the variable had a decl we can work with in the instruction + * stream. We can't do splitting on function arguments, which + * don't get this variable set. + */ + bool declaration; + + ir_variable **components; + + /** ralloc_parent(this->var) -- the shader's talloc context. */ + void *mem_ctx; +}; + +} /* namespace */ + +using namespace opt_array_splitting; + +/** + * This class does a walk over the tree, coming up with the set of + * variables that could be split by looking to see if they are arrays + * that are only ever constant-index dereferenced. + */ +class ir_array_reference_visitor : public ir_hierarchical_visitor { +public: + ir_array_reference_visitor(void) + { + this->mem_ctx = ralloc_context(NULL); + this->variable_list.make_empty(); + } + + ~ir_array_reference_visitor(void) + { + ralloc_free(mem_ctx); + } + + bool get_split_list(exec_list *instructions, bool linked); + + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit(ir_dereference_variable *); + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_function_signature *); + + variable_entry *get_variable_entry(ir_variable *var); + + /* List of variable_entry */ + exec_list variable_list; + + void *mem_ctx; +}; + +} /* namespace */ + +variable_entry * +ir_array_reference_visitor::get_variable_entry(ir_variable *var) +{ + assert(var); + + if (var->data.mode != ir_var_auto && + var->data.mode != ir_var_temporary) + return NULL; + + if (!(var->type->is_array() || var->type->is_matrix())) + return NULL; + + /* If the array hasn't been sized yet, we can't split it. After + * linking, this should be resolved. + */ + if (var->type->is_unsized_array()) + return NULL; + + foreach_in_list(variable_entry, entry, &this->variable_list) { + if (entry->var == var) + return entry; + } + + variable_entry *entry = new(mem_ctx) variable_entry(var); + this->variable_list.push_tail(entry); + return entry; +} + + +ir_visitor_status +ir_array_reference_visitor::visit(ir_variable *ir) +{ + variable_entry *entry = this->get_variable_entry(ir); + + if (entry) + entry->declaration = true; + + return visit_continue; +} + +ir_visitor_status +ir_array_reference_visitor::visit(ir_dereference_variable *ir) +{ + variable_entry *entry = this->get_variable_entry(ir->var); + + /* If we made it to here without seeing an ir_dereference_array, + * then the dereference of this array didn't have a constant index + * (see the visit_continue_with_parent below), so we can't split + * the variable. + */ + if (entry) + entry->split = false; + + return visit_continue; +} + +ir_visitor_status +ir_array_reference_visitor::visit_enter(ir_dereference_array *ir) +{ + ir_dereference_variable *deref = ir->array->as_dereference_variable(); + if (!deref) + return visit_continue; + + variable_entry *entry = this->get_variable_entry(deref->var); + + /* If the access to the array has a variable index, we wouldn't + * know which split variable this dereference should go to. + */ + if (entry && !ir->array_index->as_constant()) + entry->split = false; + + /* If the index is also array dereference, visit index. */ + if (ir->array_index->as_dereference_array()) + visit_enter(ir->array_index->as_dereference_array()); + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_array_reference_visitor::visit_enter(ir_function_signature *ir) +{ + /* We don't have logic for array-splitting function arguments, + * so just look at the body instructions and not the parameter + * declarations. + */ + visit_list_elements(this, &ir->body); + return visit_continue_with_parent; +} + +bool +ir_array_reference_visitor::get_split_list(exec_list *instructions, + bool linked) +{ + visit_list_elements(this, instructions); + + /* If the shaders aren't linked yet, we can't mess with global + * declarations, which need to be matched by name across shaders. + */ + if (!linked) { + foreach_in_list(ir_instruction, node, instructions) { + ir_variable *var = node->as_variable(); + if (var) { + variable_entry *entry = get_variable_entry(var); + if (entry) + entry->remove(); + } + } + } + + /* Trim out variables we found that we can't split. */ + foreach_in_list_safe(variable_entry, entry, &variable_list) { + if (debug) { + printf("array %s@%p: decl %d, split %d\n", + entry->var->name, (void *) entry->var, entry->declaration, + entry->split); + } + + if (!(entry->declaration && entry->split)) { + entry->remove(); + } + } + + return !variable_list.is_empty(); +} + +/** + * This class rewrites the dereferences of arrays that have been split + * to use the newly created ir_variables for each component. + */ +class ir_array_splitting_visitor : public ir_rvalue_visitor { +public: + ir_array_splitting_visitor(exec_list *vars) + { + this->variable_list = vars; + } + + virtual ~ir_array_splitting_visitor() + { + } + + virtual ir_visitor_status visit_leave(ir_assignment *); + + void split_deref(ir_dereference **deref); + void handle_rvalue(ir_rvalue **rvalue); + variable_entry *get_splitting_entry(ir_variable *var); + + exec_list *variable_list; +}; + +variable_entry * +ir_array_splitting_visitor::get_splitting_entry(ir_variable *var) +{ + assert(var); + + foreach_in_list(variable_entry, entry, this->variable_list) { + if (entry->var == var) { + return entry; + } + } + + return NULL; +} + +void +ir_array_splitting_visitor::split_deref(ir_dereference **deref) +{ + ir_dereference_array *deref_array = (*deref)->as_dereference_array(); + if (!deref_array) + return; + + ir_dereference_variable *deref_var = deref_array->array->as_dereference_variable(); + if (!deref_var) + return; + ir_variable *var = deref_var->var; + + variable_entry *entry = get_splitting_entry(var); + if (!entry) + return; + + ir_constant *constant = deref_array->array_index->as_constant(); + assert(constant); + + if (constant->value.i[0] >= 0 && constant->value.i[0] < (int)entry->size) { + *deref = new(entry->mem_ctx) + ir_dereference_variable(entry->components[constant->value.i[0]]); + } else { + /* There was a constant array access beyond the end of the + * array. This might have happened due to constant folding + * after the initial parse. This produces an undefined value, + * but shouldn't crash. Just give them an uninitialized + * variable. + */ + ir_variable *temp = new(entry->mem_ctx) ir_variable(deref_array->type, + "undef", + ir_var_temporary); + entry->components[0]->insert_before(temp); + *deref = new(entry->mem_ctx) ir_dereference_variable(temp); + } +} + +void +ir_array_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + + if (!deref) + return; + + split_deref(&deref); + *rvalue = deref; +} + +ir_visitor_status +ir_array_splitting_visitor::visit_leave(ir_assignment *ir) +{ + /* The normal rvalue visitor skips the LHS of assignments, but we + * need to process those just the same. + */ + ir_rvalue *lhs = ir->lhs; + + handle_rvalue(&lhs); + ir->lhs = lhs->as_dereference(); + + ir->lhs->accept(this); + + handle_rvalue(&ir->rhs); + ir->rhs->accept(this); + + if (ir->condition) { + handle_rvalue(&ir->condition); + ir->condition->accept(this); + } + + return visit_continue; +} + +bool +optimize_split_arrays(exec_list *instructions, bool linked) +{ + ir_array_reference_visitor refs; + if (!refs.get_split_list(instructions, linked)) + return false; + + void *mem_ctx = ralloc_context(NULL); + + /* Replace the decls of the arrays to be split with their split + * components. + */ + foreach_in_list(variable_entry, entry, &refs.variable_list) { + const struct glsl_type *type = entry->var->type; + const struct glsl_type *subtype; + + if (type->is_matrix()) + subtype = type->column_type(); + else + subtype = type->fields.array; + + entry->mem_ctx = ralloc_parent(entry->var); + + entry->components = ralloc_array(mem_ctx, + ir_variable *, + entry->size); + + for (unsigned int i = 0; i < entry->size; i++) { + const char *name = ralloc_asprintf(mem_ctx, "%s_%d", + entry->var->name, i); + + entry->components[i] = + new(entry->mem_ctx) ir_variable(subtype, name, ir_var_temporary); + entry->var->insert_before(entry->components[i]); + } + + entry->var->remove(); + } + + ir_array_splitting_visitor split(&refs.variable_list); + visit_list_elements(&split, instructions); + + if (debug) + _mesa_print_ir(stdout, instructions, NULL); + + ralloc_free(mem_ctx); + + return true; + +} diff --git a/src/compiler/glsl/opt_conditional_discard.cpp b/src/compiler/glsl/opt_conditional_discard.cpp new file mode 100644 index 00000000000..1ca8803f643 --- /dev/null +++ b/src/compiler/glsl/opt_conditional_discard.cpp @@ -0,0 +1,81 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_conditional_discard.cpp + * + * Replace + * + * if (cond) discard; + * + * with + * + * (discard ) + */ + +#include "compiler/glsl_types.h" +#include "ir.h" + +namespace { + +class opt_conditional_discard_visitor : public ir_hierarchical_visitor { +public: + opt_conditional_discard_visitor() + { + progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + + bool progress; +}; + +} /* anonymous namespace */ + +bool +opt_conditional_discard(exec_list *instructions) +{ + opt_conditional_discard_visitor v; + v.run(instructions); + return v.progress; +} + +ir_visitor_status +opt_conditional_discard_visitor::visit_leave(ir_if *ir) +{ + /* Look for "if (...) discard" with no else clause or extra statements. */ + if (ir->then_instructions.is_empty() || + !ir->then_instructions.head->next->is_tail_sentinel() || + !((ir_instruction *) ir->then_instructions.head)->as_discard() || + !ir->else_instructions.is_empty()) + return visit_continue; + + /* Move the condition and replace the ir_if with the ir_discard. */ + ir_discard *discard = (ir_discard *) ir->then_instructions.head; + discard->condition = ir->condition; + ir->replace_with(discard); + + progress = true; + + return visit_continue; +} diff --git a/src/compiler/glsl/opt_constant_folding.cpp b/src/compiler/glsl/opt_constant_folding.cpp new file mode 100644 index 00000000000..150a17b2af6 --- /dev/null +++ b/src/compiler/glsl/opt_constant_folding.cpp @@ -0,0 +1,190 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_constant_folding.cpp + * Replace constant-valued expressions with references to constant values. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +/** + * Visitor class for replacing expressions with ir_constant values. + */ + +class ir_constant_folding_visitor : public ir_rvalue_visitor { +public: + ir_constant_folding_visitor() + { + this->progress = false; + } + + virtual ~ir_constant_folding_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_discard *ir); + virtual ir_visitor_status visit_enter(ir_assignment *ir); + virtual ir_visitor_status visit_enter(ir_call *ir); + + virtual void handle_rvalue(ir_rvalue **rvalue); + + bool progress; +}; + +} /* unnamed namespace */ + +void +ir_constant_folding_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (*rvalue == NULL || (*rvalue)->ir_type == ir_type_constant) + return; + + /* Note that we do rvalue visitoring on leaving. So if an + * expression has a non-constant operand, no need to go looking + * down it to find if it's constant. This cuts the time of this + * pass down drastically. + */ + ir_expression *expr = (*rvalue)->as_expression(); + if (expr) { + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + if (!expr->operands[i]->as_constant()) + return; + } + } + + /* Ditto for swizzles. */ + ir_swizzle *swiz = (*rvalue)->as_swizzle(); + if (swiz && !swiz->val->as_constant()) + return; + + ir_constant *constant = (*rvalue)->constant_expression_value(); + if (constant) { + *rvalue = constant; + this->progress = true; + } else { + (*rvalue)->accept(this); + } +} + +ir_visitor_status +ir_constant_folding_visitor::visit_enter(ir_discard *ir) +{ + if (ir->condition) { + ir->condition->accept(this); + handle_rvalue(&ir->condition); + + ir_constant *const_val = ir->condition->as_constant(); + /* If the condition is constant, either remove the condition or + * remove the never-executed assignment. + */ + if (const_val) { + if (const_val->value.b[0]) + ir->condition = NULL; + else + ir->remove(); + this->progress = true; + } + } + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_folding_visitor::visit_enter(ir_assignment *ir) +{ + ir->rhs->accept(this); + handle_rvalue(&ir->rhs); + + if (ir->condition) { + ir->condition->accept(this); + handle_rvalue(&ir->condition); + + ir_constant *const_val = ir->condition->as_constant(); + /* If the condition is constant, either remove the condition or + * remove the never-executed assignment. + */ + if (const_val) { + if (const_val->value.b[0]) + ir->condition = NULL; + else + ir->remove(); + this->progress = true; + } + } + + /* Don't descend into the LHS because we want it to stay as a + * variable dereference. FINISHME: We probably should to get array + * indices though. + */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_folding_visitor::visit_enter(ir_call *ir) +{ + /* Attempt to constant fold parameters */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_rvalue *param_rval = (ir_rvalue *) actual_node; + ir_variable *sig_param = (ir_variable *) formal_node; + + if (sig_param->data.mode == ir_var_function_in + || sig_param->data.mode == ir_var_const_in) { + ir_rvalue *new_param = param_rval; + + handle_rvalue(&new_param); + if (new_param != param_rval) { + param_rval->replace_with(new_param); + } + } + } + + /* Next, see if the call can be replaced with an assignment of a constant */ + ir_constant *const_val = ir->constant_expression_value(); + + if (const_val != NULL) { + ir_assignment *assignment = + new(ralloc_parent(ir)) ir_assignment(ir->return_deref, const_val); + ir->replace_with(assignment); + } + + return visit_continue_with_parent; +} + +bool +do_constant_folding(exec_list *instructions) +{ + ir_constant_folding_visitor constant_folding; + + visit_list_elements(&constant_folding, instructions); + + return constant_folding.progress; +} diff --git a/src/compiler/glsl/opt_constant_propagation.cpp b/src/compiler/glsl/opt_constant_propagation.cpp new file mode 100644 index 00000000000..416ba16a3c5 --- /dev/null +++ b/src/compiler/glsl/opt_constant_propagation.cpp @@ -0,0 +1,524 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * constant of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, constant, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above constantright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR CONSTANTRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_constant_propagation.cpp + * + * Tracks assignments of constants to channels of variables, and + * usage of those constant channels with direct usage of the constants. + * + * This can lead to constant folding and algebraic optimizations in + * those later expressions, while causing no increase in instruction + * count (due to constants being generally free to load from a + * constant push buffer or as instruction immediate values) and + * possibly reducing register pressure. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "util/hash_table.h" + +namespace { + +class acp_entry : public exec_node +{ +public: + acp_entry(ir_variable *var, unsigned write_mask, ir_constant *constant) + { + assert(var); + assert(constant); + this->var = var; + this->write_mask = write_mask; + this->constant = constant; + this->initial_values = write_mask; + } + + acp_entry(const acp_entry *src) + { + this->var = src->var; + this->write_mask = src->write_mask; + this->constant = src->constant; + this->initial_values = src->initial_values; + } + + ir_variable *var; + ir_constant *constant; + unsigned write_mask; + + /** Mask of values initially available in the constant. */ + unsigned initial_values; +}; + + +class kill_entry : public exec_node +{ +public: + kill_entry(ir_variable *var, unsigned write_mask) + { + assert(var); + this->var = var; + this->write_mask = write_mask; + } + + ir_variable *var; + unsigned write_mask; +}; + +class ir_constant_propagation_visitor : public ir_rvalue_visitor { +public: + ir_constant_propagation_visitor() + { + progress = false; + killed_all = false; + mem_ctx = ralloc_context(0); + this->acp = new(mem_ctx) exec_list; + this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + } + ~ir_constant_propagation_visitor() + { + ralloc_free(mem_ctx); + } + + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_enter(class ir_function *); + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_if *); + + void add_constant(ir_assignment *ir); + void constant_folding(ir_rvalue **rvalue); + void constant_propagation(ir_rvalue **rvalue); + void kill(ir_variable *ir, unsigned write_mask); + void handle_if_block(exec_list *instructions); + void handle_rvalue(ir_rvalue **rvalue); + + /** List of acp_entry: The available constants to propagate */ + exec_list *acp; + + /** + * List of kill_entry: The masks of variables whose values were + * killed in this block. + */ + hash_table *kills; + + bool progress; + + bool killed_all; + + void *mem_ctx; +}; + + +void +ir_constant_propagation_visitor::constant_folding(ir_rvalue **rvalue) { + + if (*rvalue == NULL || (*rvalue)->ir_type == ir_type_constant) + return; + + /* Note that we visit rvalues one leaving. So if an expression has a + * non-constant operand, no need to go looking down it to find if it's + * constant. This cuts the time of this pass down drastically. + */ + ir_expression *expr = (*rvalue)->as_expression(); + if (expr) { + for (unsigned int i = 0; i < expr->get_num_operands(); i++) { + if (!expr->operands[i]->as_constant()) + return; + } + } + + /* Ditto for swizzles. */ + ir_swizzle *swiz = (*rvalue)->as_swizzle(); + if (swiz && !swiz->val->as_constant()) + return; + + ir_constant *constant = (*rvalue)->constant_expression_value(); + if (constant) { + *rvalue = constant; + this->progress = true; + } +} + +void +ir_constant_propagation_visitor::constant_propagation(ir_rvalue **rvalue) { + + if (this->in_assignee || !*rvalue) + return; + + const glsl_type *type = (*rvalue)->type; + if (!type->is_scalar() && !type->is_vector()) + return; + + ir_swizzle *swiz = NULL; + ir_dereference_variable *deref = (*rvalue)->as_dereference_variable(); + if (!deref) { + swiz = (*rvalue)->as_swizzle(); + if (!swiz) + return; + + deref = swiz->val->as_dereference_variable(); + if (!deref) + return; + } + + ir_constant_data data; + memset(&data, 0, sizeof(data)); + + for (unsigned int i = 0; i < type->components(); i++) { + int channel; + acp_entry *found = NULL; + + if (swiz) { + switch (i) { + case 0: channel = swiz->mask.x; break; + case 1: channel = swiz->mask.y; break; + case 2: channel = swiz->mask.z; break; + case 3: channel = swiz->mask.w; break; + default: assert(!"shouldn't be reached"); channel = 0; break; + } + } else { + channel = i; + } + + foreach_in_list(acp_entry, entry, this->acp) { + if (entry->var == deref->var && entry->write_mask & (1 << channel)) { + found = entry; + break; + } + } + + if (!found) + return; + + int rhs_channel = 0; + for (int j = 0; j < 4; j++) { + if (j == channel) + break; + if (found->initial_values & (1 << j)) + rhs_channel++; + } + + switch (type->base_type) { + case GLSL_TYPE_FLOAT: + data.f[i] = found->constant->value.f[rhs_channel]; + break; + case GLSL_TYPE_DOUBLE: + data.d[i] = found->constant->value.d[rhs_channel]; + break; + case GLSL_TYPE_INT: + data.i[i] = found->constant->value.i[rhs_channel]; + break; + case GLSL_TYPE_UINT: + data.u[i] = found->constant->value.u[rhs_channel]; + break; + case GLSL_TYPE_BOOL: + data.b[i] = found->constant->value.b[rhs_channel]; + break; + default: + assert(!"not reached"); + break; + } + } + + *rvalue = new(ralloc_parent(deref)) ir_constant(type, &data); + this->progress = true; +} + +void +ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + constant_propagation(rvalue); + constant_folding(rvalue); +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir) +{ + /* Treat entry into a function signature as a completely separate + * block. Any instructions at global scope will be shuffled into + * main() at link time, so they're irrelevant to us. + */ + exec_list *orig_acp = this->acp; + hash_table *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + this->killed_all = false; + + visit_list_elements(this, &ir->body); + + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = orig_killed_all; + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_leave(ir_assignment *ir) +{ + constant_folding(&ir->rhs); + + if (this->in_assignee) + return visit_continue; + + unsigned kill_mask = ir->write_mask; + if (ir->lhs->as_dereference_array()) { + /* The LHS of the assignment uses an array indexing operator (e.g. v[i] + * = ...;). Since we only try to constant propagate vectors and + * scalars, this means that either (a) array indexing is being used to + * select a vector component, or (b) the variable in question is neither + * a scalar or a vector, so we don't care about it. In the former case, + * we want to kill the whole vector, since in general we can't predict + * which vector component will be selected by array indexing. In the + * latter case, it doesn't matter what we do, so go ahead and kill the + * whole variable anyway. + * + * Note that if the array index is constant (e.g. v[2] = ...;), we could + * in principle be smarter, but we don't need to, because a future + * optimization pass will convert it to a simple assignment with the + * correct mask. + */ + kill_mask = ~0; + } + kill(ir->lhs->variable_referenced(), kill_mask); + + add_constant(ir); + + return visit_continue; +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_function *ir) +{ + (void) ir; + return visit_continue; +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_call *ir) +{ + /* Do constant propagation on call parameters, but skip any out params */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *param = (ir_rvalue *) actual_node; + if (sig_param->data.mode != ir_var_function_out + && sig_param->data.mode != ir_var_function_inout) { + ir_rvalue *new_param = param; + handle_rvalue(&new_param); + if (new_param != param) + param->replace_with(new_param); + else + param->accept(this); + } + } + + /* Since we're unlinked, we don't (necssarily) know the side effects of + * this call. So kill all copies. + */ + acp->make_empty(); + this->killed_all = true; + + return visit_continue_with_parent; +} + +void +ir_constant_propagation_visitor::handle_if_block(exec_list *instructions) +{ + exec_list *orig_acp = this->acp; + hash_table *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + this->killed_all = false; + + /* Populate the initial acp with a constant of the original */ + foreach_in_list(acp_entry, a, orig_acp) { + this->acp->push_tail(new(this->mem_ctx) acp_entry(a)); + } + + visit_list_elements(this, instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + hash_table *new_kills = this->kills; + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + hash_entry *htk; + hash_table_foreach(new_kills, htk) { + kill_entry *k = (kill_entry *) htk->data; + kill(k->var, k->write_mask); + } +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_if *ir) +{ + ir->condition->accept(this); + handle_rvalue(&ir->condition); + + handle_if_block(&ir->then_instructions); + handle_if_block(&ir->else_instructions); + + /* handle_if_block() already descended into the children. */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_propagation_visitor::visit_enter(ir_loop *ir) +{ + exec_list *orig_acp = this->acp; + hash_table *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + /* FINISHME: For now, the initial acp for loops is totally empty. + * We could go through once, then go through again with the acp + * cloned minus the killed entries after the first run through. + */ + this->acp = new(mem_ctx) exec_list; + this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, + _mesa_key_pointer_equal); + this->killed_all = false; + + visit_list_elements(this, &ir->body_instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + hash_table *new_kills = this->kills; + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + hash_entry *htk; + hash_table_foreach(new_kills, htk) { + kill_entry *k = (kill_entry *) htk->data; + kill(k->var, k->write_mask); + } + + /* already descended into the children. */ + return visit_continue_with_parent; +} + +void +ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask) +{ + assert(var != NULL); + + /* We don't track non-vectors. */ + if (!var->type->is_vector() && !var->type->is_scalar()) + return; + + /* Remove any entries currently in the ACP for this kill. */ + foreach_in_list_safe(acp_entry, entry, this->acp) { + if (entry->var == var) { + entry->write_mask &= ~write_mask; + if (entry->write_mask == 0) + entry->remove(); + } + } + + /* Add this writemask of the variable to the list of killed + * variables in this block. + */ + hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var); + if (kill_hash_entry) { + kill_entry *entry = (kill_entry *) kill_hash_entry->data; + entry->write_mask |= write_mask; + return; + } + /* Not already in the list. Make new entry. */ + _mesa_hash_table_insert(this->kills, var, + new(this->mem_ctx) kill_entry(var, write_mask)); +} + +/** + * Adds an entry to the available constant list if it's a plain assignment + * of a variable to a variable. + */ +void +ir_constant_propagation_visitor::add_constant(ir_assignment *ir) +{ + acp_entry *entry; + + if (ir->condition) + return; + + if (!ir->write_mask) + return; + + ir_dereference_variable *deref = ir->lhs->as_dereference_variable(); + ir_constant *constant = ir->rhs->as_constant(); + + if (!deref || !constant) + return; + + /* Only do constant propagation on vectors. Constant matrices, + * arrays, or structures would require more work elsewhere. + */ + if (!deref->var->type->is_vector() && !deref->var->type->is_scalar()) + return; + + /* We can't do copy propagation on buffer variables, since the underlying + * memory storage is shared across multiple threads we can't be sure that + * the variable value isn't modified between this assignment and the next + * instruction where its value is read. + */ + if (deref->var->data.mode == ir_var_shader_storage || + deref->var->data.mode == ir_var_shader_shared) + return; + + entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant); + this->acp->push_tail(entry); +} + +} /* unnamed namespace */ + +/** + * Does a constant propagation pass on the code present in the instruction stream. + */ +bool +do_constant_propagation(exec_list *instructions) +{ + ir_constant_propagation_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_constant_variable.cpp b/src/compiler/glsl/opt_constant_variable.cpp new file mode 100644 index 00000000000..3ddb12904c7 --- /dev/null +++ b/src/compiler/glsl/opt_constant_variable.cpp @@ -0,0 +1,218 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_constant_variable.cpp + * + * Marks variables assigned a single constant value over the course + * of the program as constant. + * + * The goal here is to trigger further constant folding and then dead + * code elimination. This is common with vector/matrix constructors + * and calls to builtin functions. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "util/hash_table.h" + +namespace { + +struct assignment_entry { + int assignment_count; + ir_variable *var; + ir_constant *constval; + bool our_scope; +}; + +class ir_constant_variable_visitor : public ir_hierarchical_visitor { +public: + virtual ir_visitor_status visit_enter(ir_dereference_variable *); + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_call *); + + struct hash_table *ht; +}; + +} /* unnamed namespace */ + +static struct assignment_entry * +get_assignment_entry(ir_variable *var, struct hash_table *ht) +{ + struct hash_entry *hte = _mesa_hash_table_search(ht, var); + struct assignment_entry *entry; + + if (hte) { + entry = (struct assignment_entry *) hte->data; + } else { + entry = (struct assignment_entry *) calloc(1, sizeof(*entry)); + entry->var = var; + _mesa_hash_table_insert(ht, var, entry); + } + + return entry; +} + +ir_visitor_status +ir_constant_variable_visitor::visit(ir_variable *ir) +{ + struct assignment_entry *entry = get_assignment_entry(ir, this->ht); + entry->our_scope = true; + return visit_continue; +} + +/* Skip derefs of variables so that we can detect declarations. */ +ir_visitor_status +ir_constant_variable_visitor::visit_enter(ir_dereference_variable *ir) +{ + (void)ir; + return visit_continue_with_parent; +} + +ir_visitor_status +ir_constant_variable_visitor::visit_enter(ir_assignment *ir) +{ + ir_constant *constval; + struct assignment_entry *entry; + + entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht); + assert(entry); + entry->assignment_count++; + + /* If it's already constant, don't do the work. */ + if (entry->var->constant_value) + return visit_continue; + + /* OK, now find if we actually have all the right conditions for + * this to be a constant value assigned to the var. + */ + if (ir->condition) + return visit_continue; + + ir_variable *var = ir->whole_variable_written(); + if (!var) + return visit_continue; + + /* Ignore buffer variables, since the underlying storage is shared + * and we can't be sure that this variable won't be written by another + * thread. + */ + if (var->data.mode == ir_var_shader_storage || + var->data.mode == ir_var_shader_shared) + return visit_continue; + + constval = ir->rhs->constant_expression_value(); + if (!constval) + return visit_continue; + + /* Mark this entry as having a constant assignment (if the + * assignment count doesn't go >1). do_constant_variable will fix + * up the variable with the constant value later. + */ + entry->constval = constval; + + return visit_continue; +} + +ir_visitor_status +ir_constant_variable_visitor::visit_enter(ir_call *ir) +{ + /* Mark any out parameters as assigned to */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_rvalue *param_rval = (ir_rvalue *) actual_node; + ir_variable *param = (ir_variable *) formal_node; + + if (param->data.mode == ir_var_function_out || + param->data.mode == ir_var_function_inout) { + ir_variable *var = param_rval->variable_referenced(); + struct assignment_entry *entry; + + assert(var); + entry = get_assignment_entry(var, this->ht); + entry->assignment_count++; + } + } + + /* Mark the return storage as having been assigned to */ + if (ir->return_deref != NULL) { + ir_variable *var = ir->return_deref->variable_referenced(); + struct assignment_entry *entry; + + assert(var); + entry = get_assignment_entry(var, this->ht); + entry->assignment_count++; + } + + return visit_continue; +} + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_constant_variable(exec_list *instructions) +{ + bool progress = false; + ir_constant_variable_visitor v; + + v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + v.run(instructions); + + struct hash_entry *hte; + hash_table_foreach(v.ht, hte) { + struct assignment_entry *entry = (struct assignment_entry *) hte->data; + + if (entry->assignment_count == 1 && entry->constval && entry->our_scope) { + entry->var->constant_value = entry->constval; + progress = true; + } + hte->data = NULL; + free(entry); + } + _mesa_hash_table_destroy(v.ht, NULL); + + return progress; +} + +bool +do_constant_variable_unlinked(exec_list *instructions) +{ + bool progress = false; + + foreach_in_list(ir_instruction, ir, instructions) { + ir_function *f = ir->as_function(); + if (f) { + foreach_in_list(ir_function_signature, sig, &f->signatures) { + if (do_constant_variable(&sig->body)) + progress = true; + } + } + } + + return progress; +} diff --git a/src/compiler/glsl/opt_copy_propagation.cpp b/src/compiler/glsl/opt_copy_propagation.cpp new file mode 100644 index 00000000000..310708db868 --- /dev/null +++ b/src/compiler/glsl/opt_copy_propagation.cpp @@ -0,0 +1,352 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_copy_propagation.cpp + * + * Moves usage of recently-copied variables to the previous copy of + * the variable. + * + * This should reduce the number of MOV instructions in the generated + * programs unless copy propagation is also done on the LIR, and may + * help anyway by triggering other optimizations that live in the HIR. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +class acp_entry : public exec_node +{ +public: + acp_entry(ir_variable *lhs, ir_variable *rhs) + { + assert(lhs); + assert(rhs); + this->lhs = lhs; + this->rhs = rhs; + } + + ir_variable *lhs; + ir_variable *rhs; +}; + + +class kill_entry : public exec_node +{ +public: + kill_entry(ir_variable *var) + { + assert(var); + this->var = var; + } + + ir_variable *var; +}; + +class ir_copy_propagation_visitor : public ir_hierarchical_visitor { +public: + ir_copy_propagation_visitor() + { + progress = false; + mem_ctx = ralloc_context(0); + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + } + ~ir_copy_propagation_visitor() + { + ralloc_free(mem_ctx); + } + + virtual ir_visitor_status visit(class ir_dereference_variable *); + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_enter(class ir_function *); + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_if *); + + void add_copy(ir_assignment *ir); + void kill(ir_variable *ir); + void handle_if_block(exec_list *instructions); + + /** List of acp_entry: The available copies to propagate */ + exec_list *acp; + /** + * List of kill_entry: The variables whose values were killed in this + * block. + */ + exec_list *kills; + + bool progress; + + bool killed_all; + + void *mem_ctx; +}; + +} /* unnamed namespace */ + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_function_signature *ir) +{ + /* Treat entry into a function signature as a completely separate + * block. Any instructions at global scope will be shuffled into + * main() at link time, so they're irrelevant to us. + */ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + visit_list_elements(this, &ir->body); + + ralloc_free(this->acp); + ralloc_free(this->kills); + + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = orig_killed_all; + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_copy_propagation_visitor::visit_leave(ir_assignment *ir) +{ + kill(ir->lhs->variable_referenced()); + + add_copy(ir); + + return visit_continue; +} + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_function *ir) +{ + (void) ir; + return visit_continue; +} + +/** + * Replaces dereferences of ACP RHS variables with ACP LHS variables. + * + * This is where the actual copy propagation occurs. Note that the + * rewriting of ir_dereference means that the ir_dereference instance + * must not be shared by multiple IR operations! + */ +ir_visitor_status +ir_copy_propagation_visitor::visit(ir_dereference_variable *ir) +{ + if (this->in_assignee) + return visit_continue; + + ir_variable *var = ir->var; + + foreach_in_list(acp_entry, entry, this->acp) { + if (var == entry->lhs) { + ir->var = entry->rhs; + this->progress = true; + break; + } + } + + return visit_continue; +} + + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_call *ir) +{ + /* Do copy propagation on call parameters, but skip any out params */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *ir = (ir_rvalue *) actual_node; + if (sig_param->data.mode != ir_var_function_out + && sig_param->data.mode != ir_var_function_inout) { + ir->accept(this); + } + } + + /* Since we're unlinked, we don't (necessarily) know the side effects of + * this call. So kill all copies. + */ + acp->make_empty(); + this->killed_all = true; + + return visit_continue_with_parent; +} + +void +ir_copy_propagation_visitor::handle_if_block(exec_list *instructions) +{ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + /* Populate the initial acp with a copy of the original */ + foreach_in_list(acp_entry, a, orig_acp) { + this->acp->push_tail(new(this->acp) acp_entry(a->lhs, a->rhs)); + } + + visit_list_elements(this, instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + exec_list *new_kills = this->kills; + this->kills = orig_kills; + ralloc_free(this->acp); + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + foreach_in_list(kill_entry, k, new_kills) { + kill(k->var); + } + + ralloc_free(new_kills); +} + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_if *ir) +{ + ir->condition->accept(this); + + handle_if_block(&ir->then_instructions); + handle_if_block(&ir->else_instructions); + + /* handle_if_block() already descended into the children. */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_copy_propagation_visitor::visit_enter(ir_loop *ir) +{ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + /* FINISHME: For now, the initial acp for loops is totally empty. + * We could go through once, then go through again with the acp + * cloned minus the killed entries after the first run through. + */ + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + visit_list_elements(this, &ir->body_instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + exec_list *new_kills = this->kills; + this->kills = orig_kills; + ralloc_free(this->acp); + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + foreach_in_list(kill_entry, k, new_kills) { + kill(k->var); + } + + ralloc_free(new_kills); + + /* already descended into the children. */ + return visit_continue_with_parent; +} + +void +ir_copy_propagation_visitor::kill(ir_variable *var) +{ + assert(var != NULL); + + /* Remove any entries currently in the ACP for this kill. */ + foreach_in_list_safe(acp_entry, entry, acp) { + if (entry->lhs == var || entry->rhs == var) { + entry->remove(); + } + } + + /* Add the LHS variable to the list of killed variables in this block. + */ + this->kills->push_tail(new(this->kills) kill_entry(var)); +} + +/** + * Adds an entry to the available copy list if it's a plain assignment + * of a variable to a variable. + */ +void +ir_copy_propagation_visitor::add_copy(ir_assignment *ir) +{ + acp_entry *entry; + + if (ir->condition) + return; + + ir_variable *lhs_var = ir->whole_variable_written(); + ir_variable *rhs_var = ir->rhs->whole_variable_referenced(); + + if ((lhs_var != NULL) && (rhs_var != NULL)) { + if (lhs_var == rhs_var) { + /* This is a dumb assignment, but we've conveniently noticed + * it here. Removing it now would mess up the loop iteration + * calling us. Just flag it to not execute, and someone else + * will clean up the mess. + */ + ir->condition = new(ralloc_parent(ir)) ir_constant(false); + this->progress = true; + } else if (lhs_var->data.mode != ir_var_shader_storage && + lhs_var->data.mode != ir_var_shader_shared) { + entry = new(this->acp) acp_entry(lhs_var, rhs_var); + this->acp->push_tail(entry); + } + } +} + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_copy_propagation(exec_list *instructions) +{ + ir_copy_propagation_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_copy_propagation_elements.cpp b/src/compiler/glsl/opt_copy_propagation_elements.cpp new file mode 100644 index 00000000000..a6791801943 --- /dev/null +++ b/src/compiler/glsl/opt_copy_propagation_elements.cpp @@ -0,0 +1,509 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_copy_propagation_elements.cpp + * + * Replaces usage of recently-copied components of variables with the + * previous copy of the variable. + * + * This pass can be compared with opt_copy_propagation, which operands + * on arbitrary whole-variable copies. However, in order to handle + * the copy propagation of swizzled variables or writemasked writes, + * we want to track things on a channel-wise basis. I found that + * trying to mix the swizzled/writemasked support here with the + * whole-variable stuff in opt_copy_propagation.cpp just made a mess, + * so this is separate despite the ACP handling being somewhat + * similar. + * + * This should reduce the number of MOV instructions in the generated + * programs unless copy propagation is also done on the LIR, and may + * help anyway by triggering other optimizations that live in the HIR. + */ + +#include "ir.h" +#include "ir_rvalue_visitor.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +static bool debug = false; + +namespace { + +class acp_entry : public exec_node +{ +public: + acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4]) + { + this->lhs = lhs; + this->rhs = rhs; + this->write_mask = write_mask; + memcpy(this->swizzle, swizzle, sizeof(this->swizzle)); + } + + acp_entry(acp_entry *a) + { + this->lhs = a->lhs; + this->rhs = a->rhs; + this->write_mask = a->write_mask; + memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle)); + } + + ir_variable *lhs; + ir_variable *rhs; + unsigned int write_mask; + int swizzle[4]; +}; + + +class kill_entry : public exec_node +{ +public: + kill_entry(ir_variable *var, int write_mask) + { + this->var = var; + this->write_mask = write_mask; + } + + ir_variable *var; + unsigned int write_mask; +}; + +class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor { +public: + ir_copy_propagation_elements_visitor() + { + this->progress = false; + this->killed_all = false; + this->mem_ctx = ralloc_context(NULL); + this->shader_mem_ctx = NULL; + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + } + ~ir_copy_propagation_elements_visitor() + { + ralloc_free(mem_ctx); + } + + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_if *); + virtual ir_visitor_status visit_leave(class ir_swizzle *); + + void handle_rvalue(ir_rvalue **rvalue); + + void add_copy(ir_assignment *ir); + void kill(kill_entry *k); + void handle_if_block(exec_list *instructions); + + /** List of acp_entry: The available copies to propagate */ + exec_list *acp; + /** + * List of kill_entry: The variables whose values were killed in this + * block. + */ + exec_list *kills; + + bool progress; + + bool killed_all; + + /* Context for our local data structures. */ + void *mem_ctx; + /* Context for allocating new shader nodes. */ + void *shader_mem_ctx; +}; + +} /* unnamed namespace */ + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir) +{ + /* Treat entry into a function signature as a completely separate + * block. Any instructions at global scope will be shuffled into + * main() at link time, so they're irrelevant to us. + */ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + visit_list_elements(this, &ir->body); + + ralloc_free(this->acp); + ralloc_free(this->kills); + + this->kills = orig_kills; + this->acp = orig_acp; + this->killed_all = orig_killed_all; + + return visit_continue_with_parent; +} + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir) +{ + ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); + ir_variable *var = ir->lhs->variable_referenced(); + + if (var->type->is_scalar() || var->type->is_vector()) { + kill_entry *k; + + if (lhs) + k = new(this->kills) kill_entry(var, ir->write_mask); + else + k = new(this->kills) kill_entry(var, ~0); + + kill(k); + } + + add_copy(ir); + + return visit_continue; +} + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *) +{ + /* Don't visit the values of swizzles since they are handled while + * visiting the swizzle itself. + */ + return visit_continue; +} + +/** + * Replaces dereferences of ACP RHS variables with ACP LHS variables. + * + * This is where the actual copy propagation occurs. Note that the + * rewriting of ir_dereference means that the ir_dereference instance + * must not be shared by multiple IR operations! + */ +void +ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir) +{ + int swizzle_chan[4]; + ir_dereference_variable *deref_var; + ir_variable *source[4] = {NULL, NULL, NULL, NULL}; + int source_chan[4] = {0, 0, 0, 0}; + int chans; + bool noop_swizzle = true; + + if (!*ir) + return; + + ir_swizzle *swizzle = (*ir)->as_swizzle(); + if (swizzle) { + deref_var = swizzle->val->as_dereference_variable(); + if (!deref_var) + return; + + swizzle_chan[0] = swizzle->mask.x; + swizzle_chan[1] = swizzle->mask.y; + swizzle_chan[2] = swizzle->mask.z; + swizzle_chan[3] = swizzle->mask.w; + chans = swizzle->type->vector_elements; + } else { + deref_var = (*ir)->as_dereference_variable(); + if (!deref_var) + return; + + swizzle_chan[0] = 0; + swizzle_chan[1] = 1; + swizzle_chan[2] = 2; + swizzle_chan[3] = 3; + chans = deref_var->type->vector_elements; + } + + if (this->in_assignee) + return; + + ir_variable *var = deref_var->var; + + /* Try to find ACP entries covering swizzle_chan[], hoping they're + * the same source variable. + */ + foreach_in_list(acp_entry, entry, this->acp) { + if (var == entry->lhs) { + for (int c = 0; c < chans; c++) { + if (entry->write_mask & (1 << swizzle_chan[c])) { + source[c] = entry->rhs; + source_chan[c] = entry->swizzle[swizzle_chan[c]]; + + if (source_chan[c] != swizzle_chan[c]) + noop_swizzle = false; + } + } + } + } + + /* Make sure all channels are copying from the same source variable. */ + if (!source[0]) + return; + for (int c = 1; c < chans; c++) { + if (source[c] != source[0]) + return; + } + + if (!shader_mem_ctx) + shader_mem_ctx = ralloc_parent(deref_var); + + /* Don't pointlessly replace the rvalue with itself (or a noop swizzle + * of itself, which would just be deleted by opt_noop_swizzle). + */ + if (source[0] == var && noop_swizzle) + return; + + if (debug) { + printf("Copy propagation from:\n"); + (*ir)->print(); + } + + deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]); + *ir = new(shader_mem_ctx) ir_swizzle(deref_var, + source_chan[0], + source_chan[1], + source_chan[2], + source_chan[3], + chans); + progress = true; + + if (debug) { + printf("to:\n"); + (*ir)->print(); + printf("\n"); + } +} + + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir) +{ + /* Do copy propagation on call parameters, but skip any out params */ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *ir = (ir_rvalue *) actual_node; + if (sig_param->data.mode != ir_var_function_out + && sig_param->data.mode != ir_var_function_inout) { + ir->accept(this); + } + } + + /* Since we're unlinked, we don't (necessarily) know the side effects of + * this call. So kill all copies. + */ + acp->make_empty(); + this->killed_all = true; + + return visit_continue_with_parent; +} + +void +ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions) +{ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + /* Populate the initial acp with a copy of the original */ + foreach_in_list(acp_entry, a, orig_acp) { + this->acp->push_tail(new(this->acp) acp_entry(a)); + } + + visit_list_elements(this, instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + exec_list *new_kills = this->kills; + this->kills = orig_kills; + ralloc_free(this->acp); + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + /* Move the new kills into the parent block's list, removing them + * from the parent's ACP list in the process. + */ + foreach_in_list_safe(kill_entry, k, new_kills) { + kill(k); + } + + ralloc_free(new_kills); +} + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir) +{ + ir->condition->accept(this); + + handle_if_block(&ir->then_instructions); + handle_if_block(&ir->else_instructions); + + /* handle_if_block() already descended into the children. */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir) +{ + exec_list *orig_acp = this->acp; + exec_list *orig_kills = this->kills; + bool orig_killed_all = this->killed_all; + + /* FINISHME: For now, the initial acp for loops is totally empty. + * We could go through once, then go through again with the acp + * cloned minus the killed entries after the first run through. + */ + this->acp = new(mem_ctx) exec_list; + this->kills = new(mem_ctx) exec_list; + this->killed_all = false; + + visit_list_elements(this, &ir->body_instructions); + + if (this->killed_all) { + orig_acp->make_empty(); + } + + exec_list *new_kills = this->kills; + this->kills = orig_kills; + ralloc_free(this->acp); + this->acp = orig_acp; + this->killed_all = this->killed_all || orig_killed_all; + + foreach_in_list_safe(kill_entry, k, new_kills) { + kill(k); + } + + ralloc_free(new_kills); + + /* already descended into the children. */ + return visit_continue_with_parent; +} + +/* Remove any entries currently in the ACP for this kill. */ +void +ir_copy_propagation_elements_visitor::kill(kill_entry *k) +{ + foreach_in_list_safe(acp_entry, entry, acp) { + if (entry->lhs == k->var) { + entry->write_mask = entry->write_mask & ~k->write_mask; + if (entry->write_mask == 0) { + entry->remove(); + continue; + } + } + if (entry->rhs == k->var) { + entry->remove(); + } + } + + /* If we were on a list, remove ourselves before inserting */ + if (k->next) + k->remove(); + + ralloc_steal(this->kills, k); + this->kills->push_tail(k); +} + +/** + * Adds directly-copied channels between vector variables to the available + * copy propagation list. + */ +void +ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir) +{ + acp_entry *entry; + int orig_swizzle[4] = {0, 1, 2, 3}; + int swizzle[4]; + + if (ir->condition) + return; + + ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); + if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector())) + return; + + ir_dereference_variable *rhs = ir->rhs->as_dereference_variable(); + if (!rhs) { + ir_swizzle *swiz = ir->rhs->as_swizzle(); + if (!swiz) + return; + + rhs = swiz->val->as_dereference_variable(); + if (!rhs) + return; + + orig_swizzle[0] = swiz->mask.x; + orig_swizzle[1] = swiz->mask.y; + orig_swizzle[2] = swiz->mask.z; + orig_swizzle[3] = swiz->mask.w; + } + + /* Move the swizzle channels out to the positions they match in the + * destination. We don't want to have to rewrite the swizzle[] + * array every time we clear a bit of the write_mask. + */ + int j = 0; + for (int i = 0; i < 4; i++) { + if (ir->write_mask & (1 << i)) + swizzle[i] = orig_swizzle[j++]; + } + + int write_mask = ir->write_mask; + if (lhs->var == rhs->var) { + /* If this is a copy from the variable to itself, then we need + * to be sure not to include the updated channels from this + * instruction in the set of new source channels to be + * copy-propagated from. + */ + for (int i = 0; i < 4; i++) { + if (ir->write_mask & (1 << orig_swizzle[i])) + write_mask &= ~(1 << i); + } + } + + entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask, + swizzle); + this->acp->push_tail(entry); +} + +bool +do_copy_propagation_elements(exec_list *instructions) +{ + ir_copy_propagation_elements_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_dead_builtin_variables.cpp b/src/compiler/glsl/opt_dead_builtin_variables.cpp new file mode 100644 index 00000000000..03e578982b9 --- /dev/null +++ b/src/compiler/glsl/opt_dead_builtin_variables.cpp @@ -0,0 +1,103 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" + +/** + * Pre-linking, optimize unused built-in variables + * + * Uniforms, constants, system values, inputs (vertex shader only), and + * outputs (fragment shader only) that are not used can be removed. + */ +void +optimize_dead_builtin_variables(exec_list *instructions, + enum ir_variable_mode other) +{ + foreach_in_list_safe(ir_variable, var, instructions) { + if (var->ir_type != ir_type_variable || var->data.used) + continue; + + if (var->data.mode != ir_var_uniform + && var->data.mode != ir_var_auto + && var->data.mode != ir_var_system_value + && var->data.mode != other) + continue; + + /* So that linker rules can later be enforced, we cannot elimate + * variables that were redeclared in the shader code. + */ + if ((var->data.mode == other || var->data.mode == ir_var_system_value) + && var->data.how_declared != ir_var_declared_implicitly) + continue; + + if (!is_gl_identifier(var->name)) + continue; + + /* gl_ModelViewProjectionMatrix and gl_Vertex are special because they + * are used by ftransform. No other built-in variable is used by a + * built-in function. The forward declarations of these variables in + * the built-in function shader does not have the "state slot" + * information, so removing these variables from the user shader will + * cause problems later. + * + * For compute shaders, gl_GlobalInvocationID has some dependencies, so + * we avoid removing these dependencies. + * + * We also avoid removing gl_GlobalInvocationID at this stage because it + * might be used by a linked shader. In this case it still needs to be + * initialized by the main function. + * + * gl_GlobalInvocationID = + * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID + * + * Similarly, we initialize gl_LocalInvocationIndex in the main function: + * + * gl_LocalInvocationIndex = + * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + + * gl_LocalInvocationID.y * gl_WorkGroupSize.x + + * gl_LocalInvocationID.x; + * + * Matrix uniforms with "Transpose" are not eliminated because there's + * an optimization pass that can turn references to the regular matrix + * into references to the transpose matrix. Eliminating the transpose + * matrix would cause that pass to generate references to undeclareds + * variables (thank you, ir_validate). + * + * It doesn't seem worth the effort to track when the transpose could be + * eliminated (i.e., when the non-transpose was eliminated). + */ + if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0 + || strcmp(var->name, "gl_Vertex") == 0 + || strcmp(var->name, "gl_WorkGroupID") == 0 + || strcmp(var->name, "gl_WorkGroupSize") == 0 + || strcmp(var->name, "gl_LocalInvocationID") == 0 + || strcmp(var->name, "gl_GlobalInvocationID") == 0 + || strcmp(var->name, "gl_LocalInvocationIndex") == 0 + || strstr(var->name, "Transpose") != NULL) + continue; + + var->remove(); + } +} diff --git a/src/compiler/glsl/opt_dead_builtin_varyings.cpp b/src/compiler/glsl/opt_dead_builtin_varyings.cpp new file mode 100644 index 00000000000..37bcbccf0c5 --- /dev/null +++ b/src/compiler/glsl/opt_dead_builtin_varyings.cpp @@ -0,0 +1,606 @@ +/* + * Copyright © 2013 Marek Olšák + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_dead_builtin_varyings.cpp + * + * This eliminates the built-in shader outputs which are either not written + * at all or not used by the next stage. It also eliminates unused elements + * of gl_TexCoord inputs, which reduces the overall varying usage. + * The varyings handled here are the primary and secondary color, the fog, + * and the texture coordinates (gl_TexCoord). + * + * This pass is necessary, because the Mesa GLSL linker cannot eliminate + * built-in varyings like it eliminates user-defined varyings, because + * the built-in varyings have pre-assigned locations. Also, the elimination + * of unused gl_TexCoord elements requires its own lowering pass anyway. + * + * It's implemented by replacing all occurrences of dead varyings with + * temporary variables, which creates dead code. It is recommended to run + * a dead-code elimination pass after this. + * + * If any texture coordinate slots can be eliminated, the gl_TexCoord array is + * broken down into separate vec4 variables with locations equal to + * VARYING_SLOT_TEX0 + i. + * + * The same is done for the gl_FragData fragment shader output. + */ + +#include "main/core.h" /* for snprintf and ARRAY_SIZE */ +#include "ir.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "ir_print_visitor.h" +#include "compiler/glsl_types.h" +#include "link_varyings.h" + +namespace { + +/** + * This obtains detailed information about built-in varyings from shader code. + */ +class varying_info_visitor : public ir_hierarchical_visitor { +public: + /* "mode" can be either ir_var_shader_in or ir_var_shader_out */ + varying_info_visitor(ir_variable_mode mode, bool find_frag_outputs = false) + : lower_texcoord_array(true), + texcoord_array(NULL), + texcoord_usage(0), + find_frag_outputs(find_frag_outputs), + lower_fragdata_array(true), + fragdata_array(NULL), + fragdata_usage(0), + color_usage(0), + tfeedback_color_usage(0), + fog(NULL), + has_fog(false), + tfeedback_has_fog(false), + mode(mode) + { + memset(color, 0, sizeof(color)); + memset(backcolor, 0, sizeof(backcolor)); + } + + virtual ir_visitor_status visit_enter(ir_dereference_array *ir) + { + ir_variable *var = ir->variable_referenced(); + + if (!var || var->data.mode != this->mode || !var->type->is_array()) + return visit_continue; + + if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) { + this->fragdata_array = var; + + ir_constant *index = ir->array_index->as_constant(); + if (index == NULL) { + /* This is variable indexing. */ + this->fragdata_usage |= (1 << var->type->array_size()) - 1; + this->lower_fragdata_array = false; + } + else { + this->fragdata_usage |= 1 << index->get_uint_component(0); + /* Don't lower fragdata array if the output variable + * is not a float variable (or float vector) because it will + * generate wrong register assignments because of different + * data types. + */ + if (var->type->gl_type != GL_FLOAT && + var->type->gl_type != GL_FLOAT_VEC2 && + var->type->gl_type != GL_FLOAT_VEC3 && + var->type->gl_type != GL_FLOAT_VEC4) + this->lower_fragdata_array = false; + } + + /* Don't visit the leaves of ir_dereference_array. */ + return visit_continue_with_parent; + } + + if (!this->find_frag_outputs && var->data.location == VARYING_SLOT_TEX0) { + this->texcoord_array = var; + + ir_constant *index = ir->array_index->as_constant(); + if (index == NULL) { + /* There is variable indexing, we can't lower the texcoord array. + */ + this->texcoord_usage |= (1 << var->type->array_size()) - 1; + this->lower_texcoord_array = false; + } + else { + this->texcoord_usage |= 1 << index->get_uint_component(0); + } + + /* Don't visit the leaves of ir_dereference_array. */ + return visit_continue_with_parent; + } + + return visit_continue; + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + ir_variable *var = ir->variable_referenced(); + + if (var->data.mode != this->mode || !var->type->is_array()) + return visit_continue; + + if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) { + /* This is a whole array dereference. */ + this->fragdata_usage |= (1 << var->type->array_size()) - 1; + this->lower_fragdata_array = false; + return visit_continue; + } + + if (!this->find_frag_outputs && var->data.location == VARYING_SLOT_TEX0) { + /* This is a whole array dereference like "gl_TexCoord = x;", + * there's probably no point in lowering that. + */ + this->texcoord_usage |= (1 << var->type->array_size()) - 1; + this->lower_texcoord_array = false; + } + return visit_continue; + } + + virtual ir_visitor_status visit(ir_variable *var) + { + if (var->data.mode != this->mode) + return visit_continue; + + /* Nothing to do here for fragment outputs. */ + if (this->find_frag_outputs) + return visit_continue; + + /* Handle colors and fog. */ + switch (var->data.location) { + case VARYING_SLOT_COL0: + this->color[0] = var; + this->color_usage |= 1; + break; + case VARYING_SLOT_COL1: + this->color[1] = var; + this->color_usage |= 2; + break; + case VARYING_SLOT_BFC0: + this->backcolor[0] = var; + this->color_usage |= 1; + break; + case VARYING_SLOT_BFC1: + this->backcolor[1] = var; + this->color_usage |= 2; + break; + case VARYING_SLOT_FOGC: + this->fog = var; + this->has_fog = true; + break; + } + + return visit_continue; + } + + void get(exec_list *ir, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls) + { + /* Handle the transform feedback varyings. */ + for (unsigned i = 0; i < num_tfeedback_decls; i++) { + if (!tfeedback_decls[i].is_varying()) + continue; + + unsigned location = tfeedback_decls[i].get_location(); + + switch (location) { + case VARYING_SLOT_COL0: + case VARYING_SLOT_BFC0: + this->tfeedback_color_usage |= 1; + break; + case VARYING_SLOT_COL1: + case VARYING_SLOT_BFC1: + this->tfeedback_color_usage |= 2; + break; + case VARYING_SLOT_FOGC: + this->tfeedback_has_fog = true; + break; + default: + if (location >= VARYING_SLOT_TEX0 && + location <= VARYING_SLOT_TEX7) { + this->lower_texcoord_array = false; + } + } + } + + /* Process the shader. */ + visit_list_elements(this, ir); + + if (!this->texcoord_array) { + this->lower_texcoord_array = false; + } + if (!this->fragdata_array) { + this->lower_fragdata_array = false; + } + } + + bool lower_texcoord_array; + ir_variable *texcoord_array; + unsigned texcoord_usage; /* bitmask */ + + bool find_frag_outputs; /* false if it's looking for varyings */ + bool lower_fragdata_array; + ir_variable *fragdata_array; + unsigned fragdata_usage; /* bitmask */ + + ir_variable *color[2]; + ir_variable *backcolor[2]; + unsigned color_usage; /* bitmask */ + unsigned tfeedback_color_usage; /* bitmask */ + + ir_variable *fog; + bool has_fog; + bool tfeedback_has_fog; + + ir_variable_mode mode; +}; + + +/** + * This replaces unused varyings with temporary variables. + * + * If "ir" is the producer, the "external" usage should come from + * the consumer. It also works the other way around. If either one is + * missing, set the "external" usage to a full mask. + */ +class replace_varyings_visitor : public ir_rvalue_visitor { +public: + replace_varyings_visitor(struct gl_shader *sha, + const varying_info_visitor *info, + unsigned external_texcoord_usage, + unsigned external_color_usage, + bool external_has_fog) + : shader(sha), info(info), new_fog(NULL) + { + void *const ctx = shader->ir; + + memset(this->new_fragdata, 0, sizeof(this->new_fragdata)); + memset(this->new_texcoord, 0, sizeof(this->new_texcoord)); + memset(this->new_color, 0, sizeof(this->new_color)); + memset(this->new_backcolor, 0, sizeof(this->new_backcolor)); + + const char *mode_str = + info->mode == ir_var_shader_in ? "in" : "out"; + + /* Handle texcoord outputs. + * + * We're going to break down the gl_TexCoord array into separate + * variables. First, add declarations of the new variables all + * occurrences of gl_TexCoord will be replaced with. + */ + if (info->lower_texcoord_array) { + prepare_array(shader->ir, this->new_texcoord, + ARRAY_SIZE(this->new_texcoord), + VARYING_SLOT_TEX0, "TexCoord", mode_str, + info->texcoord_usage, external_texcoord_usage); + } + + /* Handle gl_FragData in the same way like gl_TexCoord. */ + if (info->lower_fragdata_array) { + prepare_array(shader->ir, this->new_fragdata, + ARRAY_SIZE(this->new_fragdata), + FRAG_RESULT_DATA0, "FragData", mode_str, + info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - 1); + } + + /* Create dummy variables which will replace set-but-unused color and + * fog outputs. + */ + external_color_usage |= info->tfeedback_color_usage; + + for (int i = 0; i < 2; i++) { + char name[32]; + + if (!(external_color_usage & (1 << i))) { + if (info->color[i]) { + snprintf(name, 32, "gl_%s_FrontColor%i_dummy", mode_str, i); + this->new_color[i] = + new (ctx) ir_variable(glsl_type::vec4_type, name, + ir_var_temporary); + } + + if (info->backcolor[i]) { + snprintf(name, 32, "gl_%s_BackColor%i_dummy", mode_str, i); + this->new_backcolor[i] = + new (ctx) ir_variable(glsl_type::vec4_type, name, + ir_var_temporary); + } + } + } + + if (!external_has_fog && !info->tfeedback_has_fog && + info->fog) { + char name[32]; + + snprintf(name, 32, "gl_%s_FogFragCoord_dummy", mode_str); + this->new_fog = new (ctx) ir_variable(glsl_type::float_type, name, + ir_var_temporary); + } + + /* Now do the replacing. */ + visit_list_elements(this, shader->ir); + } + + void prepare_array(exec_list *ir, + ir_variable **new_var, + int max_elements, unsigned start_location, + const char *var_name, const char *mode_str, + unsigned usage, unsigned external_usage) + { + void *const ctx = ir; + + for (int i = max_elements-1; i >= 0; i--) { + if (usage & (1 << i)) { + char name[32]; + + if (!(external_usage & (1 << i))) { + /* This varying is unused in the next stage. Declare + * a temporary instead of an output. */ + snprintf(name, 32, "gl_%s_%s%i_dummy", mode_str, var_name, i); + new_var[i] = + new (ctx) ir_variable(glsl_type::vec4_type, name, + ir_var_temporary); + } + else { + snprintf(name, 32, "gl_%s_%s%i", mode_str, var_name, i); + new_var[i] = + new(ctx) ir_variable(glsl_type::vec4_type, name, + this->info->mode); + new_var[i]->data.location = start_location + i; + new_var[i]->data.explicit_location = true; + new_var[i]->data.explicit_index = 0; + } + + ir->head->insert_before(new_var[i]); + } + } + } + + virtual ir_visitor_status visit(ir_variable *var) + { + /* Remove the gl_TexCoord array. */ + if (this->info->lower_texcoord_array && + var == this->info->texcoord_array) { + var->remove(); + } + + /* Remove the gl_FragData array. */ + if (this->info->lower_fragdata_array && + var == this->info->fragdata_array) { + + /* Clone variable for program resource list before it is removed. */ + if (!shader->fragdata_arrays) + shader->fragdata_arrays = new (shader) exec_list; + + shader->fragdata_arrays->push_tail(var->clone(shader, NULL)); + + var->remove(); + } + + /* Replace set-but-unused color and fog outputs with dummy variables. */ + for (int i = 0; i < 2; i++) { + if (var == this->info->color[i] && this->new_color[i]) { + var->replace_with(this->new_color[i]); + } + if (var == this->info->backcolor[i] && + this->new_backcolor[i]) { + var->replace_with(this->new_backcolor[i]); + } + } + + if (var == this->info->fog && this->new_fog) { + var->replace_with(this->new_fog); + } + + return visit_continue; + } + + virtual void handle_rvalue(ir_rvalue **rvalue) + { + if (!*rvalue) + return; + + void *ctx = ralloc_parent(*rvalue); + + /* Replace an array dereference gl_TexCoord[i] with a single + * variable dereference representing gl_TexCoord[i]. + */ + if (this->info->lower_texcoord_array) { + /* gl_TexCoord[i] occurrence */ + ir_dereference_array *const da = (*rvalue)->as_dereference_array(); + + if (da && da->variable_referenced() == + this->info->texcoord_array) { + unsigned i = da->array_index->as_constant()->get_uint_component(0); + + *rvalue = new(ctx) ir_dereference_variable(this->new_texcoord[i]); + return; + } + } + + /* Same for gl_FragData. */ + if (this->info->lower_fragdata_array) { + /* gl_FragData[i] occurrence */ + ir_dereference_array *const da = (*rvalue)->as_dereference_array(); + + if (da && da->variable_referenced() == this->info->fragdata_array) { + unsigned i = da->array_index->as_constant()->get_uint_component(0); + + *rvalue = new(ctx) ir_dereference_variable(this->new_fragdata[i]); + return; + } + } + + /* Replace set-but-unused color and fog outputs with dummy variables. */ + ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); + if (!dv) + return; + + ir_variable *var = dv->variable_referenced(); + + for (int i = 0; i < 2; i++) { + if (var == this->info->color[i] && this->new_color[i]) { + *rvalue = new(ctx) ir_dereference_variable(this->new_color[i]); + return; + } + if (var == this->info->backcolor[i] && + this->new_backcolor[i]) { + *rvalue = new(ctx) ir_dereference_variable(this->new_backcolor[i]); + return; + } + } + + if (var == this->info->fog && this->new_fog) { + *rvalue = new(ctx) ir_dereference_variable(this->new_fog); + } + } + + virtual ir_visitor_status visit_leave(ir_assignment *ir) + { + handle_rvalue(&ir->rhs); + handle_rvalue(&ir->condition); + + /* We have to use set_lhs when changing the LHS of an assignment. */ + ir_rvalue *lhs = ir->lhs; + + handle_rvalue(&lhs); + if (lhs != ir->lhs) { + ir->set_lhs(lhs); + } + + return visit_continue; + } + +private: + struct gl_shader *shader; + const varying_info_visitor *info; + ir_variable *new_fragdata[MAX_DRAW_BUFFERS]; + ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS]; + ir_variable *new_color[2]; + ir_variable *new_backcolor[2]; + ir_variable *new_fog; +}; + +} /* anonymous namespace */ + +static void +lower_texcoord_array(struct gl_shader *shader, const varying_info_visitor *info) +{ + replace_varyings_visitor(shader, info, + (1 << MAX_TEXTURE_COORD_UNITS) - 1, + 1 | 2, true); +} + +static void +lower_fragdata_array(struct gl_shader *shader) +{ + varying_info_visitor info(ir_var_shader_out, true); + info.get(shader->ir, 0, NULL); + + replace_varyings_visitor(shader, &info, 0, 0, 0); +} + + +void +do_dead_builtin_varyings(struct gl_context *ctx, + gl_shader *producer, gl_shader *consumer, + unsigned num_tfeedback_decls, + tfeedback_decl *tfeedback_decls) +{ + /* Lower the gl_FragData array to separate variables. */ + if (consumer && consumer->Stage == MESA_SHADER_FRAGMENT) { + lower_fragdata_array(consumer); + } + + /* Lowering of built-in varyings has no effect with the core context and + * GLES2, because they are not available there. + */ + if (ctx->API == API_OPENGL_CORE || + ctx->API == API_OPENGLES2) { + return; + } + + /* Information about built-in varyings. */ + varying_info_visitor producer_info(ir_var_shader_out); + varying_info_visitor consumer_info(ir_var_shader_in); + + if (producer) { + producer_info.get(producer->ir, num_tfeedback_decls, tfeedback_decls); + + if (!consumer) { + /* At least eliminate unused gl_TexCoord elements. */ + if (producer_info.lower_texcoord_array) { + lower_texcoord_array(producer, &producer_info); + } + return; + } + } + + if (consumer) { + consumer_info.get(consumer->ir, 0, NULL); + + if (!producer) { + /* At least eliminate unused gl_TexCoord elements. */ + if (consumer_info.lower_texcoord_array) { + lower_texcoord_array(consumer, &consumer_info); + } + return; + } + } + + /* Eliminate the outputs unused by the consumer. */ + if (producer_info.lower_texcoord_array || + producer_info.color_usage || + producer_info.has_fog) { + replace_varyings_visitor(producer, + &producer_info, + consumer_info.texcoord_usage, + consumer_info.color_usage, + consumer_info.has_fog); + } + + /* The gl_TexCoord fragment shader inputs can be initialized + * by GL_COORD_REPLACE, so we can't eliminate them. + * + * This doesn't prevent elimination of the gl_TexCoord elements which + * are not read by the fragment shader. We want to eliminate those anyway. + */ + if (consumer->Stage == MESA_SHADER_FRAGMENT) { + producer_info.texcoord_usage = (1 << MAX_TEXTURE_COORD_UNITS) - 1; + } + + /* Eliminate the inputs uninitialized by the producer. */ + if (consumer_info.lower_texcoord_array || + consumer_info.color_usage || + consumer_info.has_fog) { + replace_varyings_visitor(consumer, + &consumer_info, + producer_info.texcoord_usage, + producer_info.color_usage, + producer_info.has_fog); + } +} diff --git a/src/compiler/glsl/opt_dead_code.cpp b/src/compiler/glsl/opt_dead_code.cpp new file mode 100644 index 00000000000..dbdb7de8bb8 --- /dev/null +++ b/src/compiler/glsl/opt_dead_code.cpp @@ -0,0 +1,197 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_dead_code.cpp + * + * Eliminates dead assignments and variable declarations from the code. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_variable_refcount.h" +#include "compiler/glsl_types.h" +#include "util/hash_table.h" + +static bool debug = false; + +/** + * Do a dead code pass over instructions and everything that instructions + * references. + * + * Note that this will remove assignments to globals, so it is not suitable + * for usage on an unlinked instruction stream. + */ +bool +do_dead_code(exec_list *instructions, bool uniform_locations_assigned) +{ + ir_variable_refcount_visitor v; + bool progress = false; + + v.run(instructions); + + struct hash_entry *e; + hash_table_foreach(v.ht, e) { + ir_variable_refcount_entry *entry = (ir_variable_refcount_entry *)e->data; + + /* Since each assignment is a reference, the refereneced count must be + * greater than or equal to the assignment count. If they are equal, + * then all of the references are assignments, and the variable is + * dead. + * + * Note that if the variable is neither assigned nor referenced, both + * counts will be zero and will be caught by the equality test. + */ + assert(entry->referenced_count >= entry->assigned_count); + + if (debug) { + printf("%s@%p: %d refs, %d assigns, %sdeclared in our scope\n", + entry->var->name, (void *) entry->var, + entry->referenced_count, entry->assigned_count, + entry->declaration ? "" : "not "); + } + + if ((entry->referenced_count > entry->assigned_count) + || !entry->declaration) + continue; + + /* Section 7.4.1 (Shader Interface Matching) of the OpenGL 4.5 + * (Core Profile) spec says: + * + * "With separable program objects, interfaces between shader + * stages may involve the outputs from one program object and the + * inputs from a second program object. For such interfaces, it is + * not possible to detect mismatches at link time, because the + * programs are linked separately. When each such program is + * linked, all inputs or outputs interfacing with another program + * stage are treated as active." + */ + if (entry->var->data.always_active_io) + continue; + + if (!entry->assign_list.is_empty()) { + /* Remove all the dead assignments to the variable we found. + * Don't do so if it's a shader or function output, though. + */ + if (entry->var->data.mode != ir_var_function_out && + entry->var->data.mode != ir_var_function_inout && + entry->var->data.mode != ir_var_shader_out && + entry->var->data.mode != ir_var_shader_storage) { + + while (!entry->assign_list.is_empty()) { + struct assignment_entry *assignment_entry = + exec_node_data(struct assignment_entry, + entry->assign_list.head, link); + + assignment_entry->assign->remove(); + + if (debug) { + printf("Removed assignment to %s@%p\n", + entry->var->name, (void *) entry->var); + } + + assignment_entry->link.remove(); + free(assignment_entry); + } + progress = true; + } + } + + if (entry->assign_list.is_empty()) { + /* If there are no assignments or references to the variable left, + * then we can remove its declaration. + */ + + /* uniform initializers are precious, and could get used by another + * stage. Also, once uniform locations have been assigned, the + * declaration cannot be deleted. + */ + if (entry->var->data.mode == ir_var_uniform || + entry->var->data.mode == ir_var_shader_storage) { + if (uniform_locations_assigned || entry->var->constant_initializer) + continue; + + /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec + * says: + * + * "All members of a named uniform block declared with a + * shared or std140 layout qualifier are considered active, + * even if they are not referenced in any shader in the + * program. The uniform block itself is also considered + * active, even if no member of the block is referenced." + * + * If the variable is in a uniform block with one of those + * layouts, do not eliminate it. + */ + if (entry->var->is_in_buffer_block()) { + if (entry->var->get_interface_type()->interface_packing != + GLSL_INTERFACE_PACKING_PACKED) + continue; + } + + if (entry->var->type->is_subroutine()) + continue; + } + + entry->var->remove(); + progress = true; + + if (debug) { + printf("Removed declaration of %s@%p\n", + entry->var->name, (void *) entry->var); + } + } + } + + return progress; +} + +/** + * Does a dead code pass on the functions present in the instruction stream. + * + * This is suitable for use while the program is not linked, as it will + * ignore variable declarations (and the assignments to them) for variables + * with global scope. + */ +bool +do_dead_code_unlinked(exec_list *instructions) +{ + bool progress = false; + + foreach_in_list(ir_instruction, ir, instructions) { + ir_function *f = ir->as_function(); + if (f) { + foreach_in_list(ir_function_signature, sig, &f->signatures) { + /* The setting of the uniform_locations_assigned flag here is + * irrelevent. If there is a uniform declaration encountered + * inside the body of the function, something has already gone + * terribly, terribly wrong. + */ + if (do_dead_code(&sig->body, false)) + progress = true; + } + } + } + + return progress; +} diff --git a/src/compiler/glsl/opt_dead_code_local.cpp b/src/compiler/glsl/opt_dead_code_local.cpp new file mode 100644 index 00000000000..d38fd2bf638 --- /dev/null +++ b/src/compiler/glsl/opt_dead_code_local.cpp @@ -0,0 +1,336 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_dead_code_local.cpp + * + * Eliminates local dead assignments from the code. + * + * This operates on basic blocks, tracking assignments and finding if + * they're used before the variable is completely reassigned. + * + * Compare this to ir_dead_code.cpp, which operates globally looking + * for assignments to variables that are never read. + */ + +#include "ir.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +static bool debug = false; + +namespace { + +class assignment_entry : public exec_node +{ +public: + assignment_entry(ir_variable *lhs, ir_assignment *ir) + { + assert(lhs); + assert(ir); + this->lhs = lhs; + this->ir = ir; + this->unused = ir->write_mask; + } + + ir_variable *lhs; + ir_assignment *ir; + + /* bitmask of xyzw channels written that haven't been used so far. */ + int unused; +}; + +class kill_for_derefs_visitor : public ir_hierarchical_visitor { +public: + kill_for_derefs_visitor(exec_list *assignments) + { + this->assignments = assignments; + } + + void use_channels(ir_variable *const var, int used) + { + foreach_in_list_safe(assignment_entry, entry, this->assignments) { + if (entry->lhs == var) { + if (var->type->is_scalar() || var->type->is_vector()) { + if (debug) + printf("used %s (0x%01x - 0x%01x)\n", entry->lhs->name, + entry->unused, used & 0xf); + entry->unused &= ~used; + if (!entry->unused) + entry->remove(); + } else { + if (debug) + printf("used %s\n", entry->lhs->name); + entry->remove(); + } + } + } + } + + virtual ir_visitor_status visit(ir_dereference_variable *ir) + { + use_channels(ir->var, ~0); + + return visit_continue; + } + + virtual ir_visitor_status visit(ir_swizzle *ir) + { + ir_dereference_variable *deref = ir->val->as_dereference_variable(); + if (!deref) + return visit_continue; + + int used = 0; + used |= 1 << ir->mask.x; + used |= 1 << ir->mask.y; + used |= 1 << ir->mask.z; + used |= 1 << ir->mask.w; + + use_channels(deref->var, used); + + return visit_continue_with_parent; + } + + virtual ir_visitor_status visit_leave(ir_emit_vertex *) + { + /* For the purpose of dead code elimination, emitting a vertex counts as + * "reading" all of the currently assigned output variables. + */ + foreach_in_list_safe(assignment_entry, entry, this->assignments) { + if (entry->lhs->data.mode == ir_var_shader_out) { + if (debug) + printf("kill %s\n", entry->lhs->name); + entry->remove(); + } + } + + return visit_continue; + } + +private: + exec_list *assignments; +}; + +class array_index_visit : public ir_hierarchical_visitor { +public: + array_index_visit(ir_hierarchical_visitor *v) + { + this->visitor = v; + } + + virtual ir_visitor_status visit_enter(class ir_dereference_array *ir) + { + ir->array_index->accept(visitor); + return visit_continue; + } + + static void run(ir_instruction *ir, ir_hierarchical_visitor *v) + { + array_index_visit top_visit(v); + ir->accept(& top_visit); + } + + ir_hierarchical_visitor *visitor; +}; + +} /* unnamed namespace */ + +/** + * Adds an entry to the available copy list if it's a plain assignment + * of a variable to a variable. + */ +static bool +process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments) +{ + ir_variable *var = NULL; + bool progress = false; + kill_for_derefs_visitor v(assignments); + + /* Kill assignment entries for things used to produce this assignment. */ + ir->rhs->accept(&v); + if (ir->condition) { + ir->condition->accept(&v); + } + + /* Kill assignment enties used as array indices. + */ + array_index_visit::run(ir->lhs, &v); + var = ir->lhs->variable_referenced(); + assert(var); + + /* Now, check if we did a whole-variable assignment. */ + if (!ir->condition) { + ir_dereference_variable *deref_var = ir->lhs->as_dereference_variable(); + + /* If it's a vector type, we can do per-channel elimination of + * use of the RHS. + */ + if (deref_var && (deref_var->var->type->is_scalar() || + deref_var->var->type->is_vector())) { + + if (debug) + printf("looking for %s.0x%01x to remove\n", var->name, + ir->write_mask); + + foreach_in_list_safe(assignment_entry, entry, assignments) { + if (entry->lhs != var) + continue; + + /* Skip if the assignment we're trying to eliminate isn't a plain + * variable deref. */ + if (entry->ir->lhs->ir_type != ir_type_dereference_variable) + continue; + + int remove = entry->unused & ir->write_mask; + if (debug) { + printf("%s 0x%01x - 0x%01x = 0x%01x\n", + var->name, + entry->ir->write_mask, + remove, entry->ir->write_mask & ~remove); + } + if (remove) { + progress = true; + + if (debug) { + printf("rewriting:\n "); + entry->ir->print(); + printf("\n"); + } + + entry->ir->write_mask &= ~remove; + entry->unused &= ~remove; + if (entry->ir->write_mask == 0) { + /* Delete the dead assignment. */ + entry->ir->remove(); + entry->remove(); + } else { + void *mem_ctx = ralloc_parent(entry->ir); + /* Reswizzle the RHS arguments according to the new + * write_mask. + */ + unsigned components[4]; + unsigned channels = 0; + unsigned next = 0; + + for (int i = 0; i < 4; i++) { + if ((entry->ir->write_mask | remove) & (1 << i)) { + if (!(remove & (1 << i))) + components[channels++] = next; + next++; + } + } + + entry->ir->rhs = new(mem_ctx) ir_swizzle(entry->ir->rhs, + components, + channels); + if (debug) { + printf("to:\n "); + entry->ir->print(); + printf("\n"); + } + } + } + } + } else if (ir->whole_variable_written() != NULL) { + /* We did a whole-variable assignment. So, any instruction in + * the assignment list with the same LHS is dead. + */ + if (debug) + printf("looking for %s to remove\n", var->name); + foreach_in_list_safe(assignment_entry, entry, assignments) { + if (entry->lhs == var) { + if (debug) + printf("removing %s\n", var->name); + entry->ir->remove(); + entry->remove(); + progress = true; + } + } + } + } + + /* Add this instruction to the assignment list available to be removed. */ + assignment_entry *entry = new(ctx) assignment_entry(var, ir); + assignments->push_tail(entry); + + if (debug) { + printf("add %s\n", var->name); + + printf("current entries\n"); + foreach_in_list(assignment_entry, entry, assignments) { + printf(" %s (0x%01x)\n", entry->lhs->name, entry->unused); + } + } + + return progress; +} + +static void +dead_code_local_basic_block(ir_instruction *first, + ir_instruction *last, + void *data) +{ + ir_instruction *ir, *ir_next; + /* List of avaialble_copy */ + exec_list assignments; + bool *out_progress = (bool *)data; + bool progress = false; + + void *ctx = ralloc_context(NULL); + /* Safe looping, since process_assignment */ + for (ir = first, ir_next = (ir_instruction *)first->next;; + ir = ir_next, ir_next = (ir_instruction *)ir->next) { + ir_assignment *ir_assign = ir->as_assignment(); + + if (debug) { + ir->print(); + printf("\n"); + } + + if (ir_assign) { + progress = process_assignment(ctx, ir_assign, &assignments) || progress; + } else { + kill_for_derefs_visitor kill(&assignments); + ir->accept(&kill); + } + + if (ir == last) + break; + } + *out_progress = progress; + ralloc_free(ctx); +} + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_dead_code_local(exec_list *instructions) +{ + bool progress = false; + + call_for_basic_blocks(instructions, dead_code_local_basic_block, &progress); + + return progress; +} diff --git a/src/compiler/glsl/opt_dead_functions.cpp b/src/compiler/glsl/opt_dead_functions.cpp new file mode 100644 index 00000000000..2e90b650fa8 --- /dev/null +++ b/src/compiler/glsl/opt_dead_functions.cpp @@ -0,0 +1,152 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_dead_functions.cpp + * + * Eliminates unused functions from the linked program. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_expression_flattening.h" +#include "compiler/glsl_types.h" + +namespace { + +class signature_entry : public exec_node +{ +public: + signature_entry(ir_function_signature *sig) + { + this->signature = sig; + this->used = false; + } + + ir_function_signature *signature; + bool used; +}; + +class ir_dead_functions_visitor : public ir_hierarchical_visitor { +public: + ir_dead_functions_visitor() + { + this->mem_ctx = ralloc_context(NULL); + } + + ~ir_dead_functions_visitor() + { + ralloc_free(this->mem_ctx); + } + + virtual ir_visitor_status visit_enter(ir_function_signature *); + virtual ir_visitor_status visit_enter(ir_call *); + + signature_entry *get_signature_entry(ir_function_signature *var); + + /* List of signature_entry */ + exec_list signature_list; + void *mem_ctx; +}; + +} /* unnamed namespace */ + +signature_entry * +ir_dead_functions_visitor::get_signature_entry(ir_function_signature *sig) +{ + foreach_in_list(signature_entry, entry, &this->signature_list) { + if (entry->signature == sig) + return entry; + } + + signature_entry *entry = new(mem_ctx) signature_entry(sig); + this->signature_list.push_tail(entry); + return entry; +} + + +ir_visitor_status +ir_dead_functions_visitor::visit_enter(ir_function_signature *ir) +{ + signature_entry *entry = this->get_signature_entry(ir); + + if (strcmp(ir->function_name(), "main") == 0) { + entry->used = true; + } + + + + return visit_continue; +} + + +ir_visitor_status +ir_dead_functions_visitor::visit_enter(ir_call *ir) +{ + signature_entry *entry = this->get_signature_entry(ir->callee); + + entry->used = true; + + return visit_continue; +} + +bool +do_dead_functions(exec_list *instructions) +{ + ir_dead_functions_visitor v; + bool progress = false; + + visit_list_elements(&v, instructions); + + /* Now that we've figured out which function signatures are used, remove + * the unused ones, and remove function definitions that have no more + * signatures. + */ + foreach_in_list_safe(signature_entry, entry, &v.signature_list) { + if (!entry->used) { + entry->signature->remove(); + delete entry->signature; + progress = true; + } + delete(entry); + } + + /* We don't just do this above when we nuked a signature because of + * const pointers. + */ + foreach_in_list_safe(ir_instruction, ir, instructions) { + ir_function *func = ir->as_function(); + + if (func && func->signatures.is_empty()) { + /* At this point (post-linking), the symbol table is no + * longer in use, so not removing the function from the + * symbol table should be OK. + */ + func->remove(); + delete func; + progress = true; + } + } + + return progress; +} diff --git a/src/compiler/glsl/opt_flatten_nested_if_blocks.cpp b/src/compiler/glsl/opt_flatten_nested_if_blocks.cpp new file mode 100644 index 00000000000..c702102045f --- /dev/null +++ b/src/compiler/glsl/opt_flatten_nested_if_blocks.cpp @@ -0,0 +1,103 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_flatten_nested_if_blocks.cpp + * + * Flattens nested if blocks such as: + * + * if (x) { + * if (y) { + * ... + * } + * } + * + * into a single if block with a combined condition: + * + * if (x && y) { + * ... + * } + */ + +#include "ir.h" +#include "ir_builder.h" + +using namespace ir_builder; + +namespace { + +class nested_if_flattener : public ir_hierarchical_visitor { +public: + nested_if_flattener() + { + progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + ir_visitor_status visit_enter(ir_assignment *); + + bool progress; +}; + +} /* unnamed namespace */ + +/* We only care about the top level "if" instructions, so don't + * descend into expressions. + */ +ir_visitor_status +nested_if_flattener::visit_enter(ir_assignment *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + +bool +opt_flatten_nested_if_blocks(exec_list *instructions) +{ + nested_if_flattener v; + + v.run(instructions); + return v.progress; +} + + +ir_visitor_status +nested_if_flattener::visit_leave(ir_if *ir) +{ + /* Only handle a single ir_if within the then clause of an ir_if. No extra + * instructions, no else clauses, nothing. + */ + if (ir->then_instructions.is_empty() || !ir->else_instructions.is_empty()) + return visit_continue; + + ir_if *inner = ((ir_instruction *) ir->then_instructions.head)->as_if(); + if (!inner || !inner->next->is_tail_sentinel() || + !inner->else_instructions.is_empty()) + return visit_continue; + + ir->condition = logic_and(ir->condition, inner->condition); + inner->then_instructions.move_nodes_to(&ir->then_instructions); + + progress = true; + return visit_continue; +} diff --git a/src/compiler/glsl/opt_flip_matrices.cpp b/src/compiler/glsl/opt_flip_matrices.cpp new file mode 100644 index 00000000000..04c6170b845 --- /dev/null +++ b/src/compiler/glsl/opt_flip_matrices.cpp @@ -0,0 +1,123 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_flip_matrices.cpp + * + * Convert (matrix * vector) operations to (vector * matrixTranspose), + * which can be done using dot products rather than multiplies and adds. + * On some hardware, this is more efficient. + * + * This currently only does the conversion for built-in matrices which + * already have transposed equivalents. Namely, gl_ModelViewProjectionMatrix + * and gl_TextureMatrix. + */ +#include "ir.h" +#include "ir_optimization.h" +#include "main/macros.h" + +namespace { +class matrix_flipper : public ir_hierarchical_visitor { +public: + matrix_flipper(exec_list *instructions) + { + progress = false; + mvp_transpose = NULL; + texmat_transpose = NULL; + + foreach_in_list(ir_instruction, ir, instructions) { + ir_variable *var = ir->as_variable(); + if (!var) + continue; + if (strcmp(var->name, "gl_ModelViewProjectionMatrixTranspose") == 0) + mvp_transpose = var; + if (strcmp(var->name, "gl_TextureMatrixTranspose") == 0) + texmat_transpose = var; + } + } + + ir_visitor_status visit_enter(ir_expression *ir); + + bool progress; + +private: + ir_variable *mvp_transpose; + ir_variable *texmat_transpose; +}; +} + +ir_visitor_status +matrix_flipper::visit_enter(ir_expression *ir) +{ + if (ir->operation != ir_binop_mul || + !ir->operands[0]->type->is_matrix() || + !ir->operands[1]->type->is_vector()) + return visit_continue; + + ir_variable *mat_var = ir->operands[0]->variable_referenced(); + if (!mat_var) + return visit_continue; + + if (mvp_transpose && + strcmp(mat_var->name, "gl_ModelViewProjectionMatrix") == 0) { +#ifndef NDEBUG + ir_dereference_variable *deref = ir->operands[0]->as_dereference_variable(); + assert(deref && deref->var == mat_var); +#endif + + void *mem_ctx = ralloc_parent(ir); + + ir->operands[0] = ir->operands[1]; + ir->operands[1] = new(mem_ctx) ir_dereference_variable(mvp_transpose); + + progress = true; + } else if (texmat_transpose && + strcmp(mat_var->name, "gl_TextureMatrix") == 0) { + ir_dereference_array *array_ref = ir->operands[0]->as_dereference_array(); + assert(array_ref != NULL); + ir_dereference_variable *var_ref = array_ref->array->as_dereference_variable(); + assert(var_ref && var_ref->var == mat_var); + + ir->operands[0] = ir->operands[1]; + ir->operands[1] = array_ref; + + var_ref->var = texmat_transpose; + + texmat_transpose->data.max_array_access = + MAX2(texmat_transpose->data.max_array_access, mat_var->data.max_array_access); + + progress = true; + } + + return visit_continue; +} + +bool +opt_flip_matrices(struct exec_list *instructions) +{ + matrix_flipper v(instructions); + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_function_inlining.cpp b/src/compiler/glsl/opt_function_inlining.cpp new file mode 100644 index 00000000000..19f5fae0a17 --- /dev/null +++ b/src/compiler/glsl/opt_function_inlining.cpp @@ -0,0 +1,360 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_function_inlining.cpp + * + * Replaces calls to functions with the body of the function. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_function_inlining.h" +#include "ir_expression_flattening.h" +#include "compiler/glsl_types.h" +#include "program/hash_table.h" + +static void +do_variable_replacement(exec_list *instructions, + ir_variable *orig, + ir_dereference *repl); + +namespace { + +class ir_function_inlining_visitor : public ir_hierarchical_visitor { +public: + ir_function_inlining_visitor() + { + progress = false; + } + + virtual ~ir_function_inlining_visitor() + { + /* empty */ + } + + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_call *); + virtual ir_visitor_status visit_enter(ir_return *); + virtual ir_visitor_status visit_enter(ir_texture *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + + bool progress; +}; + +} /* unnamed namespace */ + +bool +do_function_inlining(exec_list *instructions) +{ + ir_function_inlining_visitor v; + + v.run(instructions); + + return v.progress; +} + +static void +replace_return_with_assignment(ir_instruction *ir, void *data) +{ + void *ctx = ralloc_parent(ir); + ir_dereference *orig_deref = (ir_dereference *) data; + ir_return *ret = ir->as_return(); + + if (ret) { + if (ret->value) { + ir_rvalue *lhs = orig_deref->clone(ctx, NULL); + ret->replace_with(new(ctx) ir_assignment(lhs, ret->value, NULL)); + } else { + /* un-valued return has to be the last return, or we shouldn't + * have reached here. (see can_inline()). + */ + assert(ret->next->is_tail_sentinel()); + ret->remove(); + } + } +} + +void +ir_call::generate_inline(ir_instruction *next_ir) +{ + void *ctx = ralloc_parent(this); + ir_variable **parameters; + unsigned num_parameters; + int i; + struct hash_table *ht; + + ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); + + num_parameters = this->callee->parameters.length(); + parameters = new ir_variable *[num_parameters]; + + /* Generate the declarations for the parameters to our inlined code, + * and set up the mapping of real function body variables to ours. + */ + i = 0; + foreach_two_lists(formal_node, &this->callee->parameters, + actual_node, &this->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *param = (ir_rvalue *) actual_node; + + /* Generate a new variable for the parameter. */ + if (sig_param->type->contains_opaque()) { + /* For opaque types, we want the inlined variable references + * referencing the passed in variable, since that will have + * the location information, which an assignment of an opaque + * variable wouldn't. Fix it up below. + */ + parameters[i] = NULL; + } else { + parameters[i] = sig_param->clone(ctx, ht); + parameters[i]->data.mode = ir_var_auto; + + /* Remove the read-only decoration because we're going to write + * directly to this variable. If the cloned variable is left + * read-only and the inlined function is inside a loop, the loop + * analysis code will get confused. + */ + parameters[i]->data.read_only = false; + next_ir->insert_before(parameters[i]); + } + + /* Move the actual param into our param variable if it's an 'in' type. */ + if (parameters[i] && (sig_param->data.mode == ir_var_function_in || + sig_param->data.mode == ir_var_const_in || + sig_param->data.mode == ir_var_function_inout)) { + ir_assignment *assign; + + assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]), + param, NULL); + next_ir->insert_before(assign); + } + + ++i; + } + + exec_list new_instructions; + + /* Generate the inlined body of the function to a new list */ + foreach_in_list(ir_instruction, ir, &callee->body) { + ir_instruction *new_ir = ir->clone(ctx, ht); + + new_instructions.push_tail(new_ir); + visit_tree(new_ir, replace_return_with_assignment, this->return_deref); + } + + /* If any opaque types were passed in, replace any deref of the + * opaque variable with a deref of the argument. + */ + foreach_two_lists(formal_node, &this->callee->parameters, + actual_node, &this->actual_parameters) { + ir_rvalue *const param = (ir_rvalue *) actual_node; + ir_variable *sig_param = (ir_variable *) formal_node; + + if (sig_param->type->contains_opaque()) { + ir_dereference *deref = param->as_dereference(); + + assert(deref); + do_variable_replacement(&new_instructions, sig_param, deref); + } + } + + /* Now push those new instructions in. */ + next_ir->insert_before(&new_instructions); + + /* Copy back the value of any 'out' parameters from the function body + * variables to our own. + */ + i = 0; + foreach_two_lists(formal_node, &this->callee->parameters, + actual_node, &this->actual_parameters) { + ir_rvalue *const param = (ir_rvalue *) actual_node; + const ir_variable *const sig_param = (ir_variable *) formal_node; + + /* Move our param variable into the actual param if it's an 'out' type. */ + if (parameters[i] && (sig_param->data.mode == ir_var_function_out || + sig_param->data.mode == ir_var_function_inout)) { + ir_assignment *assign; + + assign = new(ctx) ir_assignment(param->clone(ctx, NULL)->as_rvalue(), + new(ctx) ir_dereference_variable(parameters[i]), + NULL); + next_ir->insert_before(assign); + } + + ++i; + } + + delete [] parameters; + + hash_table_dtor(ht); +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_expression *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_return *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_texture *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_swizzle *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + + +ir_visitor_status +ir_function_inlining_visitor::visit_enter(ir_call *ir) +{ + if (can_inline(ir)) { + ir->generate_inline(ir); + ir->remove(); + this->progress = true; + } + + return visit_continue; +} + + +/** + * Replaces references to the "orig" variable with a clone of "repl." + * + * From the spec, opaque types can appear in the tree as function + * (non-out) parameters and as the result of array indexing and + * structure field selection. In our builtin implementation, they + * also appear in the sampler field of an ir_tex instruction. + */ + +class ir_variable_replacement_visitor : public ir_hierarchical_visitor { +public: + ir_variable_replacement_visitor(ir_variable *orig, ir_dereference *repl) + { + this->orig = orig; + this->repl = repl; + } + + virtual ~ir_variable_replacement_visitor() + { + } + + virtual ir_visitor_status visit_leave(ir_call *); + virtual ir_visitor_status visit_leave(ir_dereference_array *); + virtual ir_visitor_status visit_leave(ir_dereference_record *); + virtual ir_visitor_status visit_leave(ir_texture *); + + void replace_deref(ir_dereference **deref); + void replace_rvalue(ir_rvalue **rvalue); + + ir_variable *orig; + ir_dereference *repl; +}; + +void +ir_variable_replacement_visitor::replace_deref(ir_dereference **deref) +{ + ir_dereference_variable *deref_var = (*deref)->as_dereference_variable(); + if (deref_var && deref_var->var == this->orig) { + *deref = this->repl->clone(ralloc_parent(*deref), NULL); + } +} + +void +ir_variable_replacement_visitor::replace_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + + if (!deref) + return; + + replace_deref(&deref); + *rvalue = deref; +} + +ir_visitor_status +ir_variable_replacement_visitor::visit_leave(ir_texture *ir) +{ + replace_deref(&ir->sampler); + + return visit_continue; +} + +ir_visitor_status +ir_variable_replacement_visitor::visit_leave(ir_dereference_array *ir) +{ + replace_rvalue(&ir->array); + return visit_continue; +} + +ir_visitor_status +ir_variable_replacement_visitor::visit_leave(ir_dereference_record *ir) +{ + replace_rvalue(&ir->record); + return visit_continue; +} + +ir_visitor_status +ir_variable_replacement_visitor::visit_leave(ir_call *ir) +{ + foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { + ir_rvalue *new_param = param; + replace_rvalue(&new_param); + + if (new_param != param) { + param->replace_with(new_param); + } + } + return visit_continue; +} + +static void +do_variable_replacement(exec_list *instructions, + ir_variable *orig, + ir_dereference *repl) +{ + ir_variable_replacement_visitor v(orig, repl); + + visit_list_elements(&v, instructions); +} diff --git a/src/compiler/glsl/opt_if_simplification.cpp b/src/compiler/glsl/opt_if_simplification.cpp new file mode 100644 index 00000000000..e05f03190aa --- /dev/null +++ b/src/compiler/glsl/opt_if_simplification.cpp @@ -0,0 +1,126 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_if_simplification.cpp + * + * Moves constant branches of if statements out to the surrounding + * instruction stream, and inverts if conditionals to avoid empty + * "then" blocks. + */ + +#include "ir.h" + +namespace { + +class ir_if_simplification_visitor : public ir_hierarchical_visitor { +public: + ir_if_simplification_visitor() + { + this->made_progress = false; + } + + ir_visitor_status visit_leave(ir_if *); + ir_visitor_status visit_enter(ir_assignment *); + + bool made_progress; +}; + +} /* unnamed namespace */ + +/* We only care about the top level "if" instructions, so don't + * descend into expressions. + */ +ir_visitor_status +ir_if_simplification_visitor::visit_enter(ir_assignment *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + +bool +do_if_simplification(exec_list *instructions) +{ + ir_if_simplification_visitor v; + + v.run(instructions); + return v.made_progress; +} + + +ir_visitor_status +ir_if_simplification_visitor::visit_leave(ir_if *ir) +{ + /* If the if statement has nothing on either side, remove it. */ + if (ir->then_instructions.is_empty() && + ir->else_instructions.is_empty()) { + ir->remove(); + this->made_progress = true; + return visit_continue; + } + + /* FINISHME: Ideally there would be a way to note that the condition results + * FINISHME: in a constant before processing both of the other subtrees. + * FINISHME: This can probably be done with some flags, but it would take + * FINISHME: some work to get right. + */ + ir_constant *condition_constant = ir->condition->constant_expression_value(); + if (condition_constant) { + /* Move the contents of the one branch of the conditional + * that matters out. + */ + if (condition_constant->value.b[0]) { + ir->insert_before(&ir->then_instructions); + } else { + ir->insert_before(&ir->else_instructions); + } + ir->remove(); + this->made_progress = true; + return visit_continue; + } + + /* Turn: + * + * if (cond) { + * } else { + * do_work(); + * } + * + * into : + * + * if (!cond) + * do_work(); + * + * which avoids control flow for "else" (which is usually more + * expensive than normal operations), and the "not" can usually be + * folded into the generation of "cond" anyway. + */ + if (ir->then_instructions.is_empty()) { + ir->condition = new(ralloc_parent(ir->condition)) + ir_expression(ir_unop_logic_not, ir->condition); + ir->else_instructions.move_nodes_to(&ir->then_instructions); + this->made_progress = true; + } + + return visit_continue; +} diff --git a/src/compiler/glsl/opt_minmax.cpp b/src/compiler/glsl/opt_minmax.cpp new file mode 100644 index 00000000000..29482ee69de --- /dev/null +++ b/src/compiler/glsl/opt_minmax.cpp @@ -0,0 +1,488 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_minmax.cpp + * + * Drop operands from an expression tree of only min/max operations if they + * can be proven to not contribute to the final result. + * + * The algorithm is similar to alpha-beta pruning on a minmax search. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "ir_builder.h" +#include "program/prog_instruction.h" +#include "compiler/glsl_types.h" +#include "main/macros.h" + +using namespace ir_builder; + +namespace { + +enum compare_components_result { + LESS, + LESS_OR_EQUAL, + EQUAL, + GREATER_OR_EQUAL, + GREATER, + MIXED +}; + +class minmax_range { +public: + minmax_range(ir_constant *low = NULL, ir_constant *high = NULL) + { + this->low = low; + this->high = high; + } + + /* low is the lower limit of the range, high is the higher limit. NULL on + * low means negative infinity (unlimited) and on high positive infinity + * (unlimited). Because of the two interpretations of the value NULL, + * arbitrary comparison between ir_constants is impossible. + */ + ir_constant *low; + ir_constant *high; +}; + +class ir_minmax_visitor : public ir_rvalue_enter_visitor { +public: + ir_minmax_visitor() + : progress(false) + { + } + + ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange); + + void handle_rvalue(ir_rvalue **rvalue); + + bool progress; +}; + +/* + * Returns LESS if all vector components of `a' are strictly lower than of `b', + * GREATER if all vector components of `a' are strictly greater than of `b', + * MIXED if some vector components of `a' are strictly lower than of `b' while + * others are strictly greater, or EQUAL otherwise. + */ +static enum compare_components_result +compare_components(ir_constant *a, ir_constant *b) +{ + assert(a != NULL); + assert(b != NULL); + + assert(a->type->base_type == b->type->base_type); + + unsigned a_inc = a->type->is_scalar() ? 0 : 1; + unsigned b_inc = b->type->is_scalar() ? 0 : 1; + unsigned components = MAX2(a->type->components(), b->type->components()); + + bool foundless = false; + bool foundgreater = false; + bool foundequal = false; + + for (unsigned i = 0, c0 = 0, c1 = 0; + i < components; + c0 += a_inc, c1 += b_inc, ++i) { + switch (a->type->base_type) { + case GLSL_TYPE_UINT: + if (a->value.u[c0] < b->value.u[c1]) + foundless = true; + else if (a->value.u[c0] > b->value.u[c1]) + foundgreater = true; + else + foundequal = true; + break; + case GLSL_TYPE_INT: + if (a->value.i[c0] < b->value.i[c1]) + foundless = true; + else if (a->value.i[c0] > b->value.i[c1]) + foundgreater = true; + else + foundequal = true; + break; + case GLSL_TYPE_FLOAT: + if (a->value.f[c0] < b->value.f[c1]) + foundless = true; + else if (a->value.f[c0] > b->value.f[c1]) + foundgreater = true; + else + foundequal = true; + break; + case GLSL_TYPE_DOUBLE: + if (a->value.d[c0] < b->value.d[c1]) + foundless = true; + else if (a->value.d[c0] > b->value.d[c1]) + foundgreater = true; + else + foundequal = true; + break; + default: + unreachable("not reached"); + } + } + + if (foundless && foundgreater) { + /* Some components are strictly lower, others are strictly greater */ + return MIXED; + } + + if (foundequal) { + /* It is not mixed, but it is not strictly lower or greater */ + if (foundless) + return LESS_OR_EQUAL; + if (foundgreater) + return GREATER_OR_EQUAL; + return EQUAL; + } + + /* All components are strictly lower or strictly greater */ + return foundless ? LESS : GREATER; +} + +static ir_constant * +combine_constant(bool ismin, ir_constant *a, ir_constant *b) +{ + void *mem_ctx = ralloc_parent(a); + ir_constant *c = a->clone(mem_ctx, NULL); + for (unsigned i = 0; i < c->type->components(); i++) { + switch (c->type->base_type) { + case GLSL_TYPE_UINT: + if ((ismin && b->value.u[i] < c->value.u[i]) || + (!ismin && b->value.u[i] > c->value.u[i])) + c->value.u[i] = b->value.u[i]; + break; + case GLSL_TYPE_INT: + if ((ismin && b->value.i[i] < c->value.i[i]) || + (!ismin && b->value.i[i] > c->value.i[i])) + c->value.i[i] = b->value.i[i]; + break; + case GLSL_TYPE_FLOAT: + if ((ismin && b->value.f[i] < c->value.f[i]) || + (!ismin && b->value.f[i] > c->value.f[i])) + c->value.f[i] = b->value.f[i]; + break; + case GLSL_TYPE_DOUBLE: + if ((ismin && b->value.d[i] < c->value.d[i]) || + (!ismin && b->value.d[i] > c->value.d[i])) + c->value.d[i] = b->value.d[i]; + break; + default: + assert(!"not reached"); + } + } + return c; +} + +static ir_constant * +smaller_constant(ir_constant *a, ir_constant *b) +{ + assert(a != NULL); + assert(b != NULL); + + enum compare_components_result ret = compare_components(a, b); + if (ret == MIXED) + return combine_constant(true, a, b); + else if (ret < EQUAL) + return a; + else + return b; +} + +static ir_constant * +larger_constant(ir_constant *a, ir_constant *b) +{ + assert(a != NULL); + assert(b != NULL); + + enum compare_components_result ret = compare_components(a, b); + if (ret == MIXED) + return combine_constant(false, a, b); + else if (ret < EQUAL) + return b; + else + return a; +} + +/* Combines two ranges by doing an element-wise min() / max() depending on the + * operation. + */ +static minmax_range +combine_range(minmax_range r0, minmax_range r1, bool ismin) +{ + minmax_range ret; + + if (!r0.low) { + ret.low = ismin ? r0.low : r1.low; + } else if (!r1.low) { + ret.low = ismin ? r1.low : r0.low; + } else { + ret.low = ismin ? smaller_constant(r0.low, r1.low) : + larger_constant(r0.low, r1.low); + } + + if (!r0.high) { + ret.high = ismin ? r1.high : r0.high; + } else if (!r1.high) { + ret.high = ismin ? r0.high : r1.high; + } else { + ret.high = ismin ? smaller_constant(r0.high, r1.high) : + larger_constant(r0.high, r1.high); + } + + return ret; +} + +/* Returns a range so that lower limit is the larger of the two lower limits, + * and higher limit is the smaller of the two higher limits. + */ +static minmax_range +range_intersection(minmax_range r0, minmax_range r1) +{ + minmax_range ret; + + if (!r0.low) + ret.low = r1.low; + else if (!r1.low) + ret.low = r0.low; + else + ret.low = larger_constant(r0.low, r1.low); + + if (!r0.high) + ret.high = r1.high; + else if (!r1.high) + ret.high = r0.high; + else + ret.high = smaller_constant(r0.high, r1.high); + + return ret; +} + +static minmax_range +get_range(ir_rvalue *rval) +{ + ir_expression *expr = rval->as_expression(); + if (expr && (expr->operation == ir_binop_min || + expr->operation == ir_binop_max)) { + minmax_range r0 = get_range(expr->operands[0]); + minmax_range r1 = get_range(expr->operands[1]); + return combine_range(r0, r1, expr->operation == ir_binop_min); + } + + ir_constant *c = rval->as_constant(); + if (c) { + return minmax_range(c, c); + } + + return minmax_range(); +} + +/** + * Prunes a min/max expression considering the base range of the parent + * min/max expression. + * + * @param baserange the range that the parents of this min/max expression + * in the min/max tree will clamp its value to. + */ +ir_rvalue * +ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange) +{ + assert(expr->operation == ir_binop_min || + expr->operation == ir_binop_max); + + bool ismin = expr->operation == ir_binop_min; + minmax_range limits[2]; + + /* Recurse to get the ranges for each of the subtrees of this + * expression. We need to do this as a separate step because we need to + * know the ranges of each of the subtrees before we prune either one. + * Consider something like this: + * + * max + * / \ + * max max + * / \ / \ + * 3 a b 2 + * + * We would like to prune away the max on the bottom-right, but to do so + * we need to know the range of the expression on the left beforehand, + * and there's no guarantee that we will visit either subtree in a + * particular order. + */ + for (unsigned i = 0; i < 2; ++i) + limits[i] = get_range(expr->operands[i]); + + for (unsigned i = 0; i < 2; ++i) { + bool is_redundant = false; + + enum compare_components_result cr = LESS; + if (ismin) { + /* If this operand will always be greater than the other one, it's + * redundant. + */ + if (limits[i].low && limits[1 - i].high) { + cr = compare_components(limits[i].low, limits[1 - i].high); + if (cr >= EQUAL && cr != MIXED) + is_redundant = true; + } + /* If this operand is always greater than baserange, then even if + * it's smaller than the other one it'll get clamped, so it's + * redundant. + */ + if (!is_redundant && limits[i].low && baserange.high) { + cr = compare_components(limits[i].low, baserange.high); + if (cr >= EQUAL && cr != MIXED) + is_redundant = true; + } + } else { + /* If this operand will always be lower than the other one, it's + * redundant. + */ + if (limits[i].high && limits[1 - i].low) { + cr = compare_components(limits[i].high, limits[1 - i].low); + if (cr <= EQUAL) + is_redundant = true; + } + /* If this operand is always lower than baserange, then even if + * it's greater than the other one it'll get clamped, so it's + * redundant. + */ + if (!is_redundant && limits[i].high && baserange.low) { + cr = compare_components(limits[i].high, baserange.low); + if (cr <= EQUAL) + is_redundant = true; + } + } + + if (is_redundant) { + progress = true; + + /* Recurse if necessary. */ + ir_expression *op_expr = expr->operands[1 - i]->as_expression(); + if (op_expr && (op_expr->operation == ir_binop_min || + op_expr->operation == ir_binop_max)) { + return prune_expression(op_expr, baserange); + } + + return expr->operands[1 - i]; + } else if (cr == MIXED) { + /* If we have mixed vector operands, we can try to resolve the minmax + * expression by doing a component-wise minmax: + * + * min min + * / \ / \ + * min a ===> [1,1] a + * / \ + * [1,3] [3,1] + * + */ + ir_constant *a = expr->operands[0]->as_constant(); + ir_constant *b = expr->operands[1]->as_constant(); + if (a && b) + return combine_constant(ismin, a, b); + } + } + + /* Now recurse to operands giving them the proper baserange. The baserange + * to pass is the intersection of our baserange and the other operand's + * limit with one of the ranges unlimited. If we can't compute a valid + * intersection, we use the current baserange. + */ + for (unsigned i = 0; i < 2; ++i) { + ir_expression *op_expr = expr->operands[i]->as_expression(); + if (op_expr && (op_expr->operation == ir_binop_min || + op_expr->operation == ir_binop_max)) { + /* We can only compute a new baserange for this operand if we managed + * to compute a valid range for the other operand. + */ + if (ismin) + limits[1 - i].low = NULL; + else + limits[1 - i].high = NULL; + minmax_range base = range_intersection(limits[1 - i], baserange); + expr->operands[i] = prune_expression(op_expr, base); + } + } + + /* If we got here we could not discard any of the operands of the minmax + * expression, but we can still try to resolve the expression if both + * operands are constant. We do this after the loop above, to make sure + * that if our operands are minmax expressions we have tried to prune them + * first (hopefully reducing them to constants). + */ + ir_constant *a = expr->operands[0]->as_constant(); + ir_constant *b = expr->operands[1]->as_constant(); + if (a && b) + return combine_constant(ismin, a, b); + + return expr; +} + +static ir_rvalue * +swizzle_if_required(ir_expression *expr, ir_rvalue *rval) +{ + if (expr->type->is_vector() && rval->type->is_scalar()) { + return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements); + } else { + return rval; + } +} + +void +ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr || (expr->operation != ir_binop_min && + expr->operation != ir_binop_max)) + return; + + ir_rvalue *new_rvalue = prune_expression(expr, minmax_range()); + if (new_rvalue == *rvalue) + return; + + /* If the expression type is a vector and the optimization leaves a scalar + * as the result, we need to turn it into a vector. + */ + *rvalue = swizzle_if_required(expr, new_rvalue); + + progress = true; +} + +} + +bool +do_minmax_prune(exec_list *instructions) +{ + ir_minmax_visitor v; + + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_noop_swizzle.cpp b/src/compiler/glsl/opt_noop_swizzle.cpp new file mode 100644 index 00000000000..41890ab2b15 --- /dev/null +++ b/src/compiler/glsl/opt_noop_swizzle.cpp @@ -0,0 +1,83 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_noop_swizzle.cpp + * + * If a swizzle doesn't change the order or count of components, then + * remove the swizzle so that other optimization passes see the value + * behind it. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_noop_swizzle_visitor : public ir_rvalue_visitor { +public: + ir_noop_swizzle_visitor() + { + this->progress = false; + } + + void handle_rvalue(ir_rvalue **rvalue); + bool progress; +}; + +} /* unnamed namespace */ + +void +ir_noop_swizzle_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_swizzle *swiz = (*rvalue)->as_swizzle(); + if (!swiz || swiz->type != swiz->val->type) + return; + + int elems = swiz->val->type->vector_elements; + if (swiz->mask.x != 0) + return; + if (elems >= 2 && swiz->mask.y != 1) + return; + if (elems >= 3 && swiz->mask.z != 2) + return; + if (elems >= 4 && swiz->mask.w != 3) + return; + + this->progress = true; + *rvalue = swiz->val; +} + +bool +do_noop_swizzle(exec_list *instructions) +{ + ir_noop_swizzle_visitor v; + visit_list_elements(&v, instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_rebalance_tree.cpp b/src/compiler/glsl/opt_rebalance_tree.cpp new file mode 100644 index 00000000000..095f2d7d2f0 --- /dev/null +++ b/src/compiler/glsl/opt_rebalance_tree.cpp @@ -0,0 +1,321 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_rebalance_tree.cpp + * + * Rebalances a reduction expression tree. + * + * For reduction operations (e.g., x + y + z + w) we generate an expression + * tree like + * + * + + * / \ + * + w + * / \ + * + z + * / \ + * x y + * + * which we can rebalance into + * + * + + * / \ + * / \ + * + + + * / \ / \ + * x y z w + * + * to get a better instruction scheduling. + * + * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout + * and Bette L. Warren. + * + * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable + * explanation of the of the tree_to_vine() (rightward rotation) and + * vine_to_tree() (leftward rotation) algorithms. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "ir_optimization.h" +#include "main/macros.h" /* for MAX2 */ + +/* The DSW algorithm generates a degenerate tree (really, a linked list) in + * tree_to_vine(). We'd rather not leave a binary expression with only one + * operand, so trivial modifications (the ternary operators below) are needed + * to ensure that we only rotate around the ir_expression nodes of the tree. + */ +static unsigned +tree_to_vine(ir_expression *root) +{ + unsigned size = 0; + ir_rvalue *vine_tail = root; + ir_rvalue *remainder = root->operands[1]; + + while (remainder != NULL) { + ir_expression *remainder_temp = remainder->as_expression(); + ir_expression *remainder_left = remainder_temp ? + remainder_temp->operands[0]->as_expression() : NULL; + + if (remainder_left == NULL) { + /* move vine_tail down one */ + vine_tail = remainder; + remainder = remainder->as_expression() ? + ((ir_expression *)remainder)->operands[1] : NULL; + size++; + } else { + /* rotate */ + ir_expression *tempptr = remainder_left; + ((ir_expression *)remainder)->operands[0] = tempptr->operands[1]; + tempptr->operands[1] = remainder; + remainder = tempptr; + ((ir_expression *)vine_tail)->operands[1] = tempptr; + } + } + + return size; +} + +static void +compression(ir_expression *root, unsigned count) +{ + ir_expression *scanner = root; + + for (unsigned i = 0; i < count; i++) { + ir_expression *child = (ir_expression *)scanner->operands[1]; + scanner->operands[1] = child->operands[1]; + scanner = (ir_expression *)scanner->operands[1]; + child->operands[1] = scanner->operands[0]; + scanner->operands[0] = child; + } +} + +static void +vine_to_tree(ir_expression *root, unsigned size) +{ + int n = size - 1; + for (int m = n / 2; m > 0; m = n / 2) { + compression(root, m); + n -= m + 1; + } +} + +namespace { + +class ir_rebalance_visitor : public ir_rvalue_enter_visitor { +public: + ir_rebalance_visitor() + { + progress = false; + } + + void handle_rvalue(ir_rvalue **rvalue); + + bool progress; +}; + +struct is_reduction_data { + ir_expression_operation operation; + const glsl_type *type; + unsigned num_expr; + bool is_reduction; + bool contains_constant; +}; + +} /* anonymous namespace */ + +static bool +is_reduction_operation(ir_expression_operation operation) +{ + switch (operation) { + case ir_binop_add: + case ir_binop_mul: + case ir_binop_bit_and: + case ir_binop_bit_xor: + case ir_binop_bit_or: + case ir_binop_logic_and: + case ir_binop_logic_xor: + case ir_binop_logic_or: + case ir_binop_min: + case ir_binop_max: + return true; + default: + return false; + } +} + +/* Note that this function does not attempt to recognize that reduction trees + * are already balanced. + * + * We return false from this function for a number of reasons other than an + * expression tree not being a mathematical reduction. Namely, + * + * - if the tree contains multiple constants that we may be able to combine. + * - if the tree contains matrices: + * - they might contain vec4's with many constant components that we can + * simplify after splitting. + * - applying the matrix chain ordering optimization is more than just + * balancing an expression tree. + * - if the tree contains operations on multiple types. + * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c + * would trick the visiting pass. + */ +static void +is_reduction(ir_instruction *ir, void *data) +{ + struct is_reduction_data *ird = (struct is_reduction_data *)data; + if (!ird->is_reduction) + return; + + /* We don't want to balance a tree that contains multiple constants, since + * we'll be able to constant fold them if they're not in separate subtrees. + */ + if (ir->as_constant()) { + if (ird->contains_constant) { + ird->is_reduction = false; + } + ird->contains_constant = true; + return; + } + + /* Array/record dereferences have subtrees that are not part of the expr + * tree we're balancing. Skip trees containing them. + */ + if (ir->ir_type == ir_type_dereference_array || + ir->ir_type == ir_type_dereference_record) { + ird->is_reduction = false; + return; + } + + ir_expression *expr = ir->as_expression(); + if (!expr) + return; + + /* Non-constant matrices might still contain constant vec4 that we can + * constant fold once split up. Handling matrices will need some more + * work. + */ + if (expr->type->is_matrix() || + expr->operands[0]->type->is_matrix() || + (expr->operands[1] && expr->operands[1]->type->is_matrix())) { + ird->is_reduction = false; + return; + } + + if (ird->type != NULL && ird->type != expr->type) { + ird->is_reduction = false; + return; + } + ird->type = expr->type; + + ird->num_expr++; + if (is_reduction_operation(expr->operation)) { + if (ird->operation != 0 && ird->operation != expr->operation) + ird->is_reduction = false; + ird->operation = expr->operation; + } else { + ird->is_reduction = false; + } +} + +static ir_rvalue * +handle_expression(ir_expression *expr) +{ + struct is_reduction_data ird; + ird.operation = (ir_expression_operation)0; + ird.type = NULL; + ird.num_expr = 0; + ird.is_reduction = true; + ird.contains_constant = false; + + visit_tree(expr, is_reduction, (void *)&ird); + + if (ird.is_reduction && ird.num_expr > 2) { + ir_constant z = ir_constant(0.0f); + ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr); + + unsigned size = tree_to_vine(&pseudo_root); + vine_to_tree(&pseudo_root, size); + + expr = (ir_expression *)pseudo_root.operands[1]; + } + return expr; +} + +static void +update_types(ir_instruction *ir, void *) +{ + ir_expression *expr = ir->as_expression(); + if (!expr) + return; + + const glsl_type *const new_type = + glsl_type::get_instance(expr->type->base_type, + MAX2(expr->operands[0]->type->vector_elements, + expr->operands[1]->type->vector_elements), + 1); + assert(new_type != glsl_type::error_type); + expr->type = new_type; +} + +void +ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_expression *expr = (*rvalue)->as_expression(); + if (!expr || !is_reduction_operation(expr->operation)) + return; + + ir_rvalue *new_rvalue = handle_expression(expr); + + /* If we failed to rebalance the tree (e.g., because it wasn't a reduction, + * or some other set of cases) new_rvalue will point to the same root as + * before. + * + * Similarly, if the tree rooted at *rvalue was a reduction and was already + * balanced, the algorithm will rearrange the tree but will ultimately + * return an identical tree, so this check will handle that as well and + * will not set progress = true. + */ + if (new_rvalue == *rvalue) + return; + + visit_tree(new_rvalue, NULL, NULL, update_types); + + *rvalue = new_rvalue; + this->progress = true; +} + +bool +do_rebalance_tree(exec_list *instructions) +{ + ir_rebalance_visitor v; + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_redundant_jumps.cpp b/src/compiler/glsl/opt_redundant_jumps.cpp new file mode 100644 index 00000000000..ee384d0f23c --- /dev/null +++ b/src/compiler/glsl/opt_redundant_jumps.cpp @@ -0,0 +1,124 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_redundant_jumps.cpp + * Remove certain types of redundant jumps + */ + +#include "ir.h" + +namespace { + +class redundant_jumps_visitor : public ir_hierarchical_visitor { +public: + redundant_jumps_visitor() + { + this->progress = false; + } + + virtual ir_visitor_status visit_leave(ir_if *); + virtual ir_visitor_status visit_leave(ir_loop *); + virtual ir_visitor_status visit_enter(ir_assignment *); + + bool progress; +}; + +} /* unnamed namespace */ + +/* We only care about the top level instructions, so don't descend + * into expressions. + */ +ir_visitor_status +redundant_jumps_visitor::visit_enter(ir_assignment *) +{ + return visit_continue_with_parent; +} + +ir_visitor_status +redundant_jumps_visitor::visit_leave(ir_if *ir) +{ + /* If the last instruction in both branches is a 'break' or a 'continue', + * pull it out of the branches and insert it after the if-statment. Note + * that both must be the same type (either 'break' or 'continue'). + */ + ir_instruction *const last_then = + (ir_instruction *) ir->then_instructions.get_tail(); + ir_instruction *const last_else = + (ir_instruction *) ir->else_instructions.get_tail(); + + if ((last_then == NULL) || (last_else == NULL)) + return visit_continue; + + if ((last_then->ir_type != ir_type_loop_jump) + || (last_else->ir_type != ir_type_loop_jump)) + return visit_continue; + + ir_loop_jump *const then_jump = (ir_loop_jump *) last_then; + ir_loop_jump *const else_jump = (ir_loop_jump *) last_else; + + if (then_jump->mode != else_jump->mode) + return visit_continue; + + then_jump->remove(); + else_jump->remove(); + this->progress = true; + + ir->insert_after(then_jump); + + /* If both branchs of the if-statement are now empty, remove the + * if-statement. + */ + if (ir->then_instructions.is_empty() && ir->else_instructions.is_empty()) + ir->remove(); + + return visit_continue; +} + + +ir_visitor_status +redundant_jumps_visitor::visit_leave(ir_loop *ir) +{ + /* If the last instruction of a loop body is a 'continue', remove it. + */ + ir_instruction *const last = + (ir_instruction *) ir->body_instructions.get_tail(); + + if (last && (last->ir_type == ir_type_loop_jump) + && (((ir_loop_jump *) last)->mode == ir_loop_jump::jump_continue)) { + last->remove(); + this->progress = true; + } + + return visit_continue; +} + + +bool +optimize_redundant_jumps(exec_list *instructions) +{ + redundant_jumps_visitor v; + + v.run(instructions); + return v.progress; +} diff --git a/src/compiler/glsl/opt_structure_splitting.cpp b/src/compiler/glsl/opt_structure_splitting.cpp new file mode 100644 index 00000000000..0d18a2f7584 --- /dev/null +++ b/src/compiler/glsl/opt_structure_splitting.cpp @@ -0,0 +1,367 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_structure_splitting.cpp + * + * If a structure is only ever referenced by its components, then + * split those components out to individual variables so they can be + * handled normally by other optimization passes. + * + * This skips structures like uniforms, which need to be accessible as + * structures for their access by the GL. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_rvalue_visitor.h" +#include "compiler/glsl_types.h" + +namespace { + +static bool debug = false; + +class variable_entry : public exec_node +{ +public: + variable_entry(ir_variable *var) + { + this->var = var; + this->whole_structure_access = 0; + this->declaration = false; + this->components = NULL; + this->mem_ctx = NULL; + } + + ir_variable *var; /* The key: the variable's pointer. */ + + /** Number of times the variable is referenced, including assignments. */ + unsigned whole_structure_access; + + /* If the variable had a decl we can work with in the instruction + * stream. We can't do splitting on function arguments, which + * don't get this variable set. + */ + bool declaration; + + ir_variable **components; + + /** ralloc_parent(this->var) -- the shader's ralloc context. */ + void *mem_ctx; +}; + + +class ir_structure_reference_visitor : public ir_hierarchical_visitor { +public: + ir_structure_reference_visitor(void) + { + this->mem_ctx = ralloc_context(NULL); + this->variable_list.make_empty(); + } + + ~ir_structure_reference_visitor(void) + { + ralloc_free(mem_ctx); + } + + virtual ir_visitor_status visit(ir_variable *); + virtual ir_visitor_status visit(ir_dereference_variable *); + virtual ir_visitor_status visit_enter(ir_dereference_record *); + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_function_signature *); + + variable_entry *get_variable_entry(ir_variable *var); + + /* List of variable_entry */ + exec_list variable_list; + + void *mem_ctx; +}; + +variable_entry * +ir_structure_reference_visitor::get_variable_entry(ir_variable *var) +{ + assert(var); + + if (!var->type->is_record() || + var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage || + var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) + return NULL; + + foreach_in_list(variable_entry, entry, &this->variable_list) { + if (entry->var == var) + return entry; + } + + variable_entry *entry = new(mem_ctx) variable_entry(var); + this->variable_list.push_tail(entry); + return entry; +} + + +ir_visitor_status +ir_structure_reference_visitor::visit(ir_variable *ir) +{ + variable_entry *entry = this->get_variable_entry(ir); + + if (entry) + entry->declaration = true; + + return visit_continue; +} + +ir_visitor_status +ir_structure_reference_visitor::visit(ir_dereference_variable *ir) +{ + ir_variable *const var = ir->variable_referenced(); + variable_entry *entry = this->get_variable_entry(var); + + if (entry) + entry->whole_structure_access++; + + return visit_continue; +} + +ir_visitor_status +ir_structure_reference_visitor::visit_enter(ir_dereference_record *ir) +{ + (void) ir; + /* Don't descend into the ir_dereference_variable below. */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_structure_reference_visitor::visit_enter(ir_assignment *ir) +{ + /* If there are no structure references yet, no need to bother with + * processing the expression tree. + */ + if (this->variable_list.is_empty()) + return visit_continue_with_parent; + + if (ir->lhs->as_dereference_variable() && + ir->rhs->as_dereference_variable() && + !ir->condition) { + /* We'll split copies of a structure to copies of components, so don't + * descend to the ir_dereference_variables. + */ + return visit_continue_with_parent; + } + return visit_continue; +} + +ir_visitor_status +ir_structure_reference_visitor::visit_enter(ir_function_signature *ir) +{ + /* We don't have logic for structure-splitting function arguments, + * so just look at the body instructions and not the parameter + * declarations. + */ + visit_list_elements(this, &ir->body); + return visit_continue_with_parent; +} + +class ir_structure_splitting_visitor : public ir_rvalue_visitor { +public: + ir_structure_splitting_visitor(exec_list *vars) + { + this->variable_list = vars; + } + + virtual ~ir_structure_splitting_visitor() + { + } + + virtual ir_visitor_status visit_leave(ir_assignment *); + + void split_deref(ir_dereference **deref); + void handle_rvalue(ir_rvalue **rvalue); + variable_entry *get_splitting_entry(ir_variable *var); + + exec_list *variable_list; +}; + +variable_entry * +ir_structure_splitting_visitor::get_splitting_entry(ir_variable *var) +{ + assert(var); + + if (!var->type->is_record()) + return NULL; + + foreach_in_list(variable_entry, entry, this->variable_list) { + if (entry->var == var) { + return entry; + } + } + + return NULL; +} + +void +ir_structure_splitting_visitor::split_deref(ir_dereference **deref) +{ + if ((*deref)->ir_type != ir_type_dereference_record) + return; + + ir_dereference_record *deref_record = (ir_dereference_record *)*deref; + ir_dereference_variable *deref_var = deref_record->record->as_dereference_variable(); + if (!deref_var) + return; + + variable_entry *entry = get_splitting_entry(deref_var->var); + if (!entry) + return; + + unsigned int i; + for (i = 0; i < entry->var->type->length; i++) { + if (strcmp(deref_record->field, + entry->var->type->fields.structure[i].name) == 0) + break; + } + assert(i != entry->var->type->length); + + *deref = new(entry->mem_ctx) ir_dereference_variable(entry->components[i]); +} + +void +ir_structure_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) +{ + if (!*rvalue) + return; + + ir_dereference *deref = (*rvalue)->as_dereference(); + + if (!deref) + return; + + split_deref(&deref); + *rvalue = deref; +} + +ir_visitor_status +ir_structure_splitting_visitor::visit_leave(ir_assignment *ir) +{ + ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable(); + ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable(); + variable_entry *lhs_entry = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL; + variable_entry *rhs_entry = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL; + const glsl_type *type = ir->rhs->type; + + if ((lhs_entry || rhs_entry) && !ir->condition) { + for (unsigned int i = 0; i < type->length; i++) { + ir_dereference *new_lhs, *new_rhs; + void *mem_ctx = lhs_entry ? lhs_entry->mem_ctx : rhs_entry->mem_ctx; + + if (lhs_entry) { + new_lhs = new(mem_ctx) ir_dereference_variable(lhs_entry->components[i]); + } else { + new_lhs = new(mem_ctx) + ir_dereference_record(ir->lhs->clone(mem_ctx, NULL), + type->fields.structure[i].name); + } + + if (rhs_entry) { + new_rhs = new(mem_ctx) ir_dereference_variable(rhs_entry->components[i]); + } else { + new_rhs = new(mem_ctx) + ir_dereference_record(ir->rhs->clone(mem_ctx, NULL), + type->fields.structure[i].name); + } + + ir->insert_before(new(mem_ctx) ir_assignment(new_lhs, + new_rhs, + NULL)); + } + ir->remove(); + } else { + handle_rvalue(&ir->rhs); + split_deref(&ir->lhs); + } + + handle_rvalue(&ir->condition); + + return visit_continue; +} + +} /* unnamed namespace */ + +bool +do_structure_splitting(exec_list *instructions) +{ + ir_structure_reference_visitor refs; + + visit_list_elements(&refs, instructions); + + /* Trim out variables we can't split. */ + foreach_in_list_safe(variable_entry, entry, &refs.variable_list) { + if (debug) { + printf("structure %s@%p: decl %d, whole_access %d\n", + entry->var->name, (void *) entry->var, entry->declaration, + entry->whole_structure_access); + } + + if (!entry->declaration || entry->whole_structure_access) { + entry->remove(); + } + } + + if (refs.variable_list.is_empty()) + return false; + + void *mem_ctx = ralloc_context(NULL); + + /* Replace the decls of the structures to be split with their split + * components. + */ + foreach_in_list_safe(variable_entry, entry, &refs.variable_list) { + const struct glsl_type *type = entry->var->type; + + entry->mem_ctx = ralloc_parent(entry->var); + + entry->components = ralloc_array(mem_ctx, + ir_variable *, + type->length); + + for (unsigned int i = 0; i < entry->var->type->length; i++) { + const char *name = ralloc_asprintf(mem_ctx, "%s_%s", + entry->var->name, + type->fields.structure[i].name); + + entry->components[i] = + new(entry->mem_ctx) ir_variable(type->fields.structure[i].type, + name, + ir_var_temporary); + entry->var->insert_before(entry->components[i]); + } + + entry->var->remove(); + } + + ir_structure_splitting_visitor split(&refs.variable_list); + visit_list_elements(&split, instructions); + + ralloc_free(mem_ctx); + + return true; +} diff --git a/src/compiler/glsl/opt_swizzle_swizzle.cpp b/src/compiler/glsl/opt_swizzle_swizzle.cpp new file mode 100644 index 00000000000..7285474b089 --- /dev/null +++ b/src/compiler/glsl/opt_swizzle_swizzle.cpp @@ -0,0 +1,97 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_swizzle_swizzle.cpp + * + * Eliminates the second swizzle in a swizzle chain. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +class ir_swizzle_swizzle_visitor : public ir_hierarchical_visitor { +public: + ir_swizzle_swizzle_visitor() + { + progress = false; + } + + virtual ir_visitor_status visit_enter(ir_swizzle *); + + bool progress; +}; + +} /* unnamed namespace */ + +ir_visitor_status +ir_swizzle_swizzle_visitor::visit_enter(ir_swizzle *ir) +{ + int mask2[4]; + + ir_swizzle *swiz2 = ir->val->as_swizzle(); + if (!swiz2) + return visit_continue; + + memset(&mask2, 0, sizeof(mask2)); + if (swiz2->mask.num_components >= 1) + mask2[0] = swiz2->mask.x; + if (swiz2->mask.num_components >= 2) + mask2[1] = swiz2->mask.y; + if (swiz2->mask.num_components >= 3) + mask2[2] = swiz2->mask.z; + if (swiz2->mask.num_components >= 4) + mask2[3] = swiz2->mask.w; + + if (ir->mask.num_components >= 1) + ir->mask.x = mask2[ir->mask.x]; + if (ir->mask.num_components >= 2) + ir->mask.y = mask2[ir->mask.y]; + if (ir->mask.num_components >= 3) + ir->mask.z = mask2[ir->mask.z]; + if (ir->mask.num_components >= 4) + ir->mask.w = mask2[ir->mask.w]; + + ir->val = swiz2->val; + + this->progress = true; + + return visit_continue; +} + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_swizzle_swizzle(exec_list *instructions) +{ + ir_swizzle_swizzle_visitor v; + + v.run(instructions); + + return v.progress; +} diff --git a/src/compiler/glsl/opt_tree_grafting.cpp b/src/compiler/glsl/opt_tree_grafting.cpp new file mode 100644 index 00000000000..83effb7424c --- /dev/null +++ b/src/compiler/glsl/opt_tree_grafting.cpp @@ -0,0 +1,404 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_tree_grafting.cpp + * + * Takes assignments to variables that are dereferenced only once and + * pastes the RHS expression into where the variable is dereferenced. + * + * In the process of various operations like function inlining and + * tertiary op handling, we'll end up with our expression trees having + * been chopped up into a series of assignments of short expressions + * to temps. Other passes like ir_algebraic.cpp would prefer to see + * the deepest expression trees they can to try to optimize them. + * + * This is a lot like copy propagaton. In comparison, copy + * propagation only acts on plain copies, not arbitrary expressions on + * the RHS. Generally, we wouldn't want to go pasting some + * complicated expression everywhere it got used, though, so we don't + * handle expressions in that pass. + * + * The hard part is making sure we don't move an expression across + * some other assignments that would change the value of the + * expression. So we split this into two passes: First, find the + * variables in our scope which are written to once and read once, and + * then go through basic blocks seeing if we find an opportunity to + * move those expressions safely. + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_variable_refcount.h" +#include "ir_basic_block.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" + +namespace { + +static bool debug = false; + +class ir_tree_grafting_visitor : public ir_hierarchical_visitor { +public: + ir_tree_grafting_visitor(ir_assignment *graft_assign, + ir_variable *graft_var) + { + this->progress = false; + this->graft_assign = graft_assign; + this->graft_var = graft_var; + } + + virtual ir_visitor_status visit_leave(class ir_assignment *); + virtual ir_visitor_status visit_enter(class ir_call *); + virtual ir_visitor_status visit_enter(class ir_expression *); + virtual ir_visitor_status visit_enter(class ir_function *); + virtual ir_visitor_status visit_enter(class ir_function_signature *); + virtual ir_visitor_status visit_enter(class ir_if *); + virtual ir_visitor_status visit_enter(class ir_loop *); + virtual ir_visitor_status visit_enter(class ir_swizzle *); + virtual ir_visitor_status visit_enter(class ir_texture *); + + ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var); + + bool do_graft(ir_rvalue **rvalue); + + bool progress; + ir_variable *graft_var; + ir_assignment *graft_assign; +}; + +struct find_deref_info { + ir_variable *var; + bool found; +}; + +void +dereferences_variable_callback(ir_instruction *ir, void *data) +{ + struct find_deref_info *info = (struct find_deref_info *)data; + ir_dereference_variable *deref = ir->as_dereference_variable(); + + if (deref && deref->var == info->var) + info->found = true; +} + +static bool +dereferences_variable(ir_instruction *ir, ir_variable *var) +{ + struct find_deref_info info; + + info.var = var; + info.found = false; + + visit_tree(ir, dereferences_variable_callback, &info); + + return info.found; +} + +bool +ir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue) +{ + if (!*rvalue) + return false; + + ir_dereference_variable *deref = (*rvalue)->as_dereference_variable(); + + if (!deref || deref->var != this->graft_var) + return false; + + if (debug) { + fprintf(stderr, "GRAFTING:\n"); + this->graft_assign->fprint(stderr); + fprintf(stderr, "\n"); + fprintf(stderr, "TO:\n"); + (*rvalue)->fprint(stderr); + fprintf(stderr, "\n"); + } + + this->graft_assign->remove(); + *rvalue = this->graft_assign->rhs; + + this->progress = true; + return true; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_loop *ir) +{ + (void)ir; + /* Do not traverse into the body of the loop since that is a + * different basic block. + */ + return visit_stop; +} + +/** + * Check if we can continue grafting after writing to a variable. If the + * expression we're trying to graft references the variable, we must stop. + * + * \param ir An instruction that writes to a variable. + * \param var The variable being updated. + */ +ir_visitor_status +ir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var) +{ + if (dereferences_variable(this->graft_assign->rhs, var)) { + if (debug) { + fprintf(stderr, "graft killed by: "); + ir->fprint(stderr); + fprintf(stderr, "\n"); + } + return visit_stop; + } + + return visit_continue; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_leave(ir_assignment *ir) +{ + if (do_graft(&ir->rhs) || + do_graft(&ir->condition)) + return visit_stop; + + /* If this assignment updates a variable used in the assignment + * we're trying to graft, then we're done. + */ + return check_graft(ir, ir->lhs->variable_referenced()); +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_function *ir) +{ + (void) ir; + return visit_continue_with_parent; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_function_signature *ir) +{ + (void)ir; + return visit_continue_with_parent; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_call *ir) +{ + foreach_two_lists(formal_node, &ir->callee->parameters, + actual_node, &ir->actual_parameters) { + ir_variable *sig_param = (ir_variable *) formal_node; + ir_rvalue *ir = (ir_rvalue *) actual_node; + ir_rvalue *new_ir = ir; + + if (sig_param->data.mode != ir_var_function_in + && sig_param->data.mode != ir_var_const_in) { + if (check_graft(ir, sig_param) == visit_stop) + return visit_stop; + continue; + } + + if (do_graft(&new_ir)) { + ir->replace_with(new_ir); + return visit_stop; + } + } + + if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop) + return visit_stop; + + return visit_continue; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_expression *ir) +{ + for (unsigned int i = 0; i < ir->get_num_operands(); i++) { + if (do_graft(&ir->operands[i])) + return visit_stop; + } + + return visit_continue; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_if *ir) +{ + if (do_graft(&ir->condition)) + return visit_stop; + + /* Do not traverse into the body of the if-statement since that is a + * different basic block. + */ + return visit_continue_with_parent; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_swizzle *ir) +{ + if (do_graft(&ir->val)) + return visit_stop; + + return visit_continue; +} + +ir_visitor_status +ir_tree_grafting_visitor::visit_enter(ir_texture *ir) +{ + if (do_graft(&ir->coordinate) || + do_graft(&ir->projector) || + do_graft(&ir->offset) || + do_graft(&ir->shadow_comparitor)) + return visit_stop; + + switch (ir->op) { + case ir_tex: + case ir_lod: + case ir_query_levels: + case ir_texture_samples: + case ir_samples_identical: + break; + case ir_txb: + if (do_graft(&ir->lod_info.bias)) + return visit_stop; + break; + case ir_txf: + case ir_txl: + case ir_txs: + if (do_graft(&ir->lod_info.lod)) + return visit_stop; + break; + case ir_txf_ms: + if (do_graft(&ir->lod_info.sample_index)) + return visit_stop; + break; + case ir_txd: + if (do_graft(&ir->lod_info.grad.dPdx) || + do_graft(&ir->lod_info.grad.dPdy)) + return visit_stop; + break; + case ir_tg4: + if (do_graft(&ir->lod_info.component)) + return visit_stop; + break; + } + + return visit_continue; +} + +struct tree_grafting_info { + ir_variable_refcount_visitor *refs; + bool progress; +}; + +static bool +try_tree_grafting(ir_assignment *start, + ir_variable *lhs_var, + ir_instruction *bb_last) +{ + ir_tree_grafting_visitor v(start, lhs_var); + + if (debug) { + fprintf(stderr, "trying to graft: "); + lhs_var->fprint(stderr); + fprintf(stderr, "\n"); + } + + for (ir_instruction *ir = (ir_instruction *)start->next; + ir != bb_last->next; + ir = (ir_instruction *)ir->next) { + + if (debug) { + fprintf(stderr, "- "); + ir->fprint(stderr); + fprintf(stderr, "\n"); + } + + ir_visitor_status s = ir->accept(&v); + if (s == visit_stop) + return v.progress; + } + + return false; +} + +static void +tree_grafting_basic_block(ir_instruction *bb_first, + ir_instruction *bb_last, + void *data) +{ + struct tree_grafting_info *info = (struct tree_grafting_info *)data; + ir_instruction *ir, *next; + + for (ir = bb_first, next = (ir_instruction *)ir->next; + ir != bb_last->next; + ir = next, next = (ir_instruction *)ir->next) { + ir_assignment *assign = ir->as_assignment(); + + if (!assign) + continue; + + ir_variable *lhs_var = assign->whole_variable_written(); + if (!lhs_var) + continue; + + if (lhs_var->data.mode == ir_var_function_out || + lhs_var->data.mode == ir_var_function_inout || + lhs_var->data.mode == ir_var_shader_out || + lhs_var->data.mode == ir_var_shader_storage) + continue; + + ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var); + + if (!entry->declaration || + entry->assigned_count != 1 || + entry->referenced_count != 2) + continue; + + /* Found a possibly graftable assignment. Now, walk through the + * rest of the BB seeing if the deref is here, and if nothing interfered with + * pasting its expression's values in between. + */ + info->progress |= try_tree_grafting(assign, lhs_var, bb_last); + } +} + +} /* unnamed namespace */ + +/** + * Does a copy propagation pass on the code present in the instruction stream. + */ +bool +do_tree_grafting(exec_list *instructions) +{ + ir_variable_refcount_visitor refs; + struct tree_grafting_info info; + + info.progress = false; + info.refs = &refs; + + visit_list_elements(info.refs, instructions); + + call_for_basic_blocks(instructions, tree_grafting_basic_block, &info); + + return info.progress; +} diff --git a/src/compiler/glsl/opt_vectorize.cpp b/src/compiler/glsl/opt_vectorize.cpp new file mode 100644 index 00000000000..88318cd8a6e --- /dev/null +++ b/src/compiler/glsl/opt_vectorize.cpp @@ -0,0 +1,407 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file opt_vectorize.cpp + * + * Combines scalar assignments of the same expression (modulo swizzle) to + * multiple channels of the same variable into a single vectorized expression + * and assignment. + * + * Many generated shaders contain scalarized code. That is, they contain + * + * r1.x = log2(v0.x); + * r1.y = log2(v0.y); + * r1.z = log2(v0.z); + * + * rather than + * + * r1.xyz = log2(v0.xyz); + * + * We look for consecutive assignments of the same expression (modulo swizzle) + * to each channel of the same variable. + * + * For instance, we want to convert these three scalar operations + * + * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0)))) + * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0)))) + * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0)))) + * + * into a single vector operation + * + * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0)))) + */ + +#include "ir.h" +#include "ir_visitor.h" +#include "ir_optimization.h" +#include "compiler/glsl_types.h" +#include "program/prog_instruction.h" + +namespace { + +class ir_vectorize_visitor : public ir_hierarchical_visitor { +public: + void clear() + { + assignment[0] = NULL; + assignment[1] = NULL; + assignment[2] = NULL; + assignment[3] = NULL; + current_assignment = NULL; + last_assignment = NULL; + channels = 0; + has_swizzle = false; + } + + ir_vectorize_visitor() + { + clear(); + progress = false; + } + + virtual ir_visitor_status visit_enter(ir_assignment *); + virtual ir_visitor_status visit_enter(ir_swizzle *); + virtual ir_visitor_status visit_enter(ir_dereference_array *); + virtual ir_visitor_status visit_enter(ir_expression *); + virtual ir_visitor_status visit_enter(ir_if *); + virtual ir_visitor_status visit_enter(ir_loop *); + virtual ir_visitor_status visit_enter(ir_texture *); + + virtual ir_visitor_status visit_leave(ir_assignment *); + + void try_vectorize(); + + ir_assignment *assignment[4]; + ir_assignment *current_assignment, *last_assignment; + unsigned channels; + bool has_swizzle; + + bool progress; +}; + +} /* unnamed namespace */ + +/** + * Rewrites the swizzles and types of a right-hand side of an assignment. + * + * From the example above, this function would be called (by visit_tree()) on + * the nodes of the tree (expression float log2 (swiz z (var_ref v0))), + * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))). + * + * The function operates on ir_expressions (and its operands) and ir_swizzles. + * For expressions it sets a new type and swizzles any non-expression and non- + * swizzle scalar operands into appropriately sized vector arguments. For + * example, if combining + * + * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1)))) + * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1)))) + * + * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on + * (var_ref v1) such that the final result was + * + * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0)) + * (swiz xx (var_ref v1)))) + * + * For swizzles, it sets a new type, and if the variable being swizzled is a + * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the + * data parameter. If the swizzled variable is scalar, then the swizzle was + * added by an earlier call to rewrite_swizzle() on an expression, so the + * mask should not be modified. + */ +static void +rewrite_swizzle(ir_instruction *ir, void *data) +{ + ir_swizzle_mask *mask = (ir_swizzle_mask *)data; + + switch (ir->ir_type) { + case ir_type_swizzle: { + ir_swizzle *swz = (ir_swizzle *)ir; + if (swz->val->type->is_vector()) { + swz->mask = *mask; + } + swz->type = glsl_type::get_instance(swz->type->base_type, + mask->num_components, 1); + break; + } + case ir_type_expression: { + ir_expression *expr = (ir_expression *)ir; + expr->type = glsl_type::get_instance(expr->type->base_type, + mask->num_components, 1); + for (unsigned i = 0; i < 4; i++) { + if (expr->operands[i]) { + ir_rvalue *rval = expr->operands[i]->as_rvalue(); + if (rval && rval->type->is_scalar() && + !rval->as_expression() && !rval->as_swizzle()) { + expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0, + mask->num_components); + } + } + } + break; + } + default: + break; + } +} + +/** + * Attempt to vectorize the previously saved assignments, and clear them from + * consideration. + * + * If the assignments are able to be combined, it modifies in-place the last + * assignment seen to be an equivalent vector form of the scalar assignments. + * It then removes the other now obsolete scalar assignments. + */ +void +ir_vectorize_visitor::try_vectorize() +{ + if (this->last_assignment && this->channels > 1) { + ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0}; + + this->last_assignment->write_mask = 0; + + for (unsigned i = 0, j = 0; i < 4; i++) { + if (this->assignment[i]) { + this->last_assignment->write_mask |= 1 << i; + + if (this->assignment[i] != this->last_assignment) { + this->assignment[i]->remove(); + } + + switch (j) { + case 0: mask.x = i; break; + case 1: mask.y = i; break; + case 2: mask.z = i; break; + case 3: mask.w = i; break; + } + + j++; + } + } + + visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); + + this->progress = true; + } + clear(); +} + +/** + * Returns whether the write mask is a single channel. + */ +static bool +single_channel_write_mask(unsigned write_mask) +{ + return write_mask != 0 && (write_mask & (write_mask - 1)) == 0; +} + +/** + * Translates single-channeled write mask to single-channeled swizzle. + */ +static unsigned +write_mask_to_swizzle(unsigned write_mask) +{ + switch (write_mask) { + case WRITEMASK_X: return SWIZZLE_X; + case WRITEMASK_Y: return SWIZZLE_Y; + case WRITEMASK_Z: return SWIZZLE_Z; + case WRITEMASK_W: return SWIZZLE_W; + } + unreachable("not reached"); +} + +/** + * Returns whether a single-channeled write mask matches a swizzle. + */ +static bool +write_mask_matches_swizzle(unsigned write_mask, + const ir_swizzle *swz) +{ + return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) || + (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) || + (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) || + (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W)); +} + +/** + * Upon entering an ir_assignment, attempt to vectorize the currently tracked + * assignments if the current assignment is not suitable. Keep a pointer to + * the current assignment. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_assignment *ir) +{ + ir_dereference *lhs = this->last_assignment != NULL ? + this->last_assignment->lhs : NULL; + ir_rvalue *rhs = this->last_assignment != NULL ? + this->last_assignment->rhs : NULL; + + if (ir->condition || + this->channels >= 4 || + !single_channel_write_mask(ir->write_mask) || + this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL || + (lhs && !ir->lhs->equals(lhs)) || + (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) { + try_vectorize(); + } + + this->current_assignment = ir; + + return visit_continue; +} + +/** + * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an + * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask + * matches the current assignment's write mask. + * + * If the write mask doesn't match the swizzle mask, remove the current + * assignment from further consideration. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_swizzle *ir) +{ + if (this->current_assignment) { + if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) { + this->has_swizzle = true; + } else { + this->current_assignment = NULL; + } + } + return visit_continue; +} + +/* Upon entering an ir_array_dereference, remove the current assignment from + * further consideration. Since the index of an array dereference must scalar, + * we are not able to vectorize it. + * + * FINISHME: If all of scalar indices are identical we could vectorize. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_dereference_array *) +{ + this->current_assignment = NULL; + return visit_continue_with_parent; +} + +/** + * Upon entering an ir_expression, remove the current assignment from further + * consideration if the expression operates horizontally on vectors. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_expression *ir) +{ + if (ir->is_horizontal()) { + this->current_assignment = NULL; + return visit_continue_with_parent; + } + return visit_continue; +} + +/* Since there is no statement to visit between the "then" and "else" + * instructions try to vectorize before, in between, and after them to avoid + * combining statements from different basic blocks. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_if *ir) +{ + try_vectorize(); + + visit_list_elements(this, &ir->then_instructions); + try_vectorize(); + + visit_list_elements(this, &ir->else_instructions); + try_vectorize(); + + return visit_continue_with_parent; +} + +/* Since there is no statement to visit between the instructions in the body of + * the loop and the instructions after it try to vectorize before and after the + * body to avoid combining statements from different basic blocks. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_loop *ir) +{ + try_vectorize(); + + visit_list_elements(this, &ir->body_instructions); + try_vectorize(); + + return visit_continue_with_parent; +} + +/** + * Upon entering an ir_texture, remove the current assignment from + * further consideration. Vectorizing multiple texture lookups into one + * is wrong. + */ +ir_visitor_status +ir_vectorize_visitor::visit_enter(ir_texture *) +{ + this->current_assignment = NULL; + return visit_continue_with_parent; +} + +/** + * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if + * the swizzle mask(s) found were appropriate. Also save a pointer in + * ::last_assignment so that we can compare future assignments with it. + * + * Finally, clear ::current_assignment and ::has_swizzle. + */ +ir_visitor_status +ir_vectorize_visitor::visit_leave(ir_assignment *ir) +{ + if (this->has_swizzle && this->current_assignment) { + assert(this->current_assignment == ir); + + unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask); + this->assignment[channel] = ir; + this->channels++; + + this->last_assignment = this->current_assignment; + } + this->current_assignment = NULL; + this->has_swizzle = false; + return visit_continue; +} + +/** + * Combines scalar assignments of the same expression (modulo swizzle) to + * multiple channels of the same variable into a single vectorized expression + * and assignment. + */ +bool +do_vectorize(exec_list *instructions) +{ + ir_vectorize_visitor v; + + v.run(instructions); + + /* Try to vectorize the last assignments seen. */ + v.try_vectorize(); + + return v.progress; +} diff --git a/src/compiler/glsl/program.h b/src/compiler/glsl/program.h new file mode 100644 index 00000000000..64f54635f62 --- /dev/null +++ b/src/compiler/glsl/program.h @@ -0,0 +1,55 @@ +/* + * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "main/core.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern void +_mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, + bool dump_ast, bool dump_hir); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +extern void +link_shaders(struct gl_context *ctx, struct gl_shader_program *prog); + +extern void +build_program_resource_list(struct gl_shader_program *shProg); + +extern void +linker_error(struct gl_shader_program *prog, const char *fmt, ...) + PRINTFLIKE(2, 3); + +extern void +linker_warning(struct gl_shader_program *prog, const char *fmt, ...) + PRINTFLIKE(2, 3); + +extern long +parse_program_resource_name(const GLchar *name, + const GLchar **out_base_name_end); diff --git a/src/compiler/glsl/s_expression.cpp b/src/compiler/glsl/s_expression.cpp new file mode 100644 index 00000000000..f82e155a6b6 --- /dev/null +++ b/src/compiler/glsl/s_expression.cpp @@ -0,0 +1,218 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include +#include "s_expression.h" + +s_symbol::s_symbol(const char *str, size_t n) +{ + /* Assume the given string is already nul-terminated and in memory that + * will live as long as this node. + */ + assert(str[n] == '\0'); + this->str = str; +} + +s_list::s_list() +{ +} + +static void +skip_whitespace(const char *&src, char *&symbol_buffer) +{ + size_t n = strspn(src, " \v\t\r\n"); + src += n; + symbol_buffer += n; + /* Also skip Scheme-style comments: semi-colon 'til end of line */ + if (src[0] == ';') { + n = strcspn(src, "\n"); + src += n; + symbol_buffer += n; + skip_whitespace(src, symbol_buffer); + } +} + +static s_expression * +read_atom(void *ctx, const char *&src, char *&symbol_buffer) +{ + s_expression *expr = NULL; + + skip_whitespace(src, symbol_buffer); + + size_t n = strcspn(src, "( \v\t\r\n);"); + if (n == 0) + return NULL; // no atom + + // Check for the special symbol '+INF', which means +Infinity. Note: C99 + // requires strtof to parse '+INF' as +Infinity, but we still support some + // non-C99-compliant compilers (e.g. MSVC). + if (n == 4 && strncmp(src, "+INF", 4) == 0) { + expr = new(ctx) s_float(INFINITY); + } else { + // Check if the atom is a number. + char *float_end = NULL; + float f = _mesa_strtof(src, &float_end); + if (float_end != src) { + char *int_end = NULL; + int i = strtol(src, &int_end, 10); + // If strtof matched more characters, it must have a decimal part + if (float_end > int_end) + expr = new(ctx) s_float(f); + else + expr = new(ctx) s_int(i); + } else { + // Not a number; return a symbol. + symbol_buffer[n] = '\0'; + expr = new(ctx) s_symbol(symbol_buffer, n); + } + } + + src += n; + symbol_buffer += n; + + return expr; +} + +static s_expression * +__read_expression(void *ctx, const char *&src, char *&symbol_buffer) +{ + s_expression *atom = read_atom(ctx, src, symbol_buffer); + if (atom != NULL) + return atom; + + skip_whitespace(src, symbol_buffer); + if (src[0] == '(') { + ++src; + ++symbol_buffer; + + s_list *list = new(ctx) s_list; + s_expression *expr; + + while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) { + list->subexpressions.push_tail(expr); + } + skip_whitespace(src, symbol_buffer); + if (src[0] != ')') { + printf("Unclosed expression (check your parenthesis).\n"); + return NULL; + } + ++src; + ++symbol_buffer; + return list; + } + return NULL; +} + +s_expression * +s_expression::read_expression(void *ctx, const char *&src) +{ + assert(src != NULL); + + /* When we encounter a Symbol, we need to save a nul-terminated copy of + * the string. However, ralloc_strndup'ing every individual Symbol is + * extremely expensive. We could avoid this by simply overwriting the + * next character (guaranteed to be whitespace, parens, or semicolon) with + * a nul-byte. But overwriting non-whitespace would mess up parsing. + * + * So, just copy the whole buffer ahead of time. Walk both, leaving the + * original source string unmodified, and altering the copy to contain the + * necessary nul-bytes whenever we encounter a symbol. + */ + char *symbol_buffer = ralloc_strdup(ctx, src); + return __read_expression(ctx, src, symbol_buffer); +} + +void s_int::print() +{ + printf("%d", this->val); +} + +void s_float::print() +{ + printf("%f", this->val); +} + +void s_symbol::print() +{ + printf("%s", this->str); +} + +void s_list::print() +{ + printf("("); + foreach_in_list(s_expression, expr, &this->subexpressions) { + expr->print(); + if (!expr->next->is_tail_sentinel()) + printf(" "); + } + printf(")"); +} + +// -------------------------------------------------- + +bool +s_pattern::match(s_expression *expr) +{ + switch (type) + { + case EXPR: *p_expr = expr; break; + case LIST: if (expr->is_list()) *p_list = (s_list *) expr; break; + case SYMBOL: if (expr->is_symbol()) *p_symbol = (s_symbol *) expr; break; + case NUMBER: if (expr->is_number()) *p_number = (s_number *) expr; break; + case INT: if (expr->is_int()) *p_int = (s_int *) expr; break; + case STRING: + s_symbol *sym = SX_AS_SYMBOL(expr); + if (sym != NULL && strcmp(sym->value(), literal) == 0) + return true; + return false; + }; + + return *p_expr == expr; +} + +bool +s_match(s_expression *top, unsigned n, s_pattern *pattern, bool partial) +{ + s_list *list = SX_AS_LIST(top); + if (list == NULL) + return false; + + unsigned i = 0; + foreach_in_list(s_expression, expr, &list->subexpressions) { + if (i >= n) + return partial; /* More actual items than the pattern expected */ + + if (expr == NULL || !pattern[i].match(expr)) + return false; + + i++; + } + + if (i < n) + return false; /* Less actual items than the pattern expected */ + + return true; +} diff --git a/src/compiler/glsl/s_expression.h b/src/compiler/glsl/s_expression.h new file mode 100644 index 00000000000..f0dffb1b2f8 --- /dev/null +++ b/src/compiler/glsl/s_expression.h @@ -0,0 +1,180 @@ +/* -*- c++ -*- */ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef S_EXPRESSION_H +#define S_EXPRESSION_H + +#include "main/core.h" /* for Elements */ +#include "util/strtod.h" +#include "list.h" + +/* Type-safe downcasting macros (also safe to pass NULL) */ +#define SX_AS_(t,x) ((x) && ((s_expression*) x)->is_##t()) ? ((s_##t*) (x)) \ + : NULL +#define SX_AS_LIST(x) SX_AS_(list, x) +#define SX_AS_SYMBOL(x) SX_AS_(symbol, x) +#define SX_AS_NUMBER(x) SX_AS_(number, x) +#define SX_AS_INT(x) SX_AS_(int, x) + +/* Pattern matching macros */ +#define MATCH(list, pat) s_match(list, ARRAY_SIZE(pat), pat, false) +#define PARTIAL_MATCH(list, pat) s_match(list, ARRAY_SIZE(pat), pat, true) + +/* For our purposes, S-Expressions are: + * - + * - + * - symbol + * - (expr1 expr2 ... exprN) where exprN is an S-Expression + * + * Unlike LISP/Scheme, we do not support (foo . bar) pairs. + */ +class s_expression : public exec_node +{ +public: + /** + * Read an S-Expression from the given string. + * Advances the supplied pointer to just after the expression read. + * + * Any allocation will be performed with 'ctx' as the ralloc owner. + */ + static s_expression *read_expression(void *ctx, const char *&src); + + /** + * Print out an S-Expression. Useful for debugging. + */ + virtual void print() = 0; + + virtual bool is_list() const { return false; } + virtual bool is_symbol() const { return false; } + virtual bool is_number() const { return false; } + virtual bool is_int() const { return false; } + +protected: + s_expression() { } +}; + +/* Atoms */ + +class s_number : public s_expression +{ +public: + bool is_number() const { return true; } + + virtual float fvalue() = 0; + +protected: + s_number() { } +}; + +class s_int : public s_number +{ +public: + s_int(int x) : val(x) { } + + bool is_int() const { return true; } + + float fvalue() { return float(this->val); } + int value() { return this->val; } + + void print(); + +private: + int val; +}; + +class s_float : public s_number +{ +public: + s_float(float x) : val(x) { } + + float fvalue() { return this->val; } + + void print(); + +private: + float val; +}; + +class s_symbol : public s_expression +{ +public: + s_symbol(const char *, size_t); + + bool is_symbol() const { return true; } + + const char *value() { return this->str; } + + void print(); + +private: + const char *str; +}; + +/* Lists of expressions: (expr1 ... exprN) */ +class s_list : public s_expression +{ +public: + s_list(); + + virtual bool is_list() const { return true; } + + void print(); + + exec_list subexpressions; +}; + +// ------------------------------------------------------------ + +/** + * Part of a pattern to match - essentially a record holding a pointer to the + * storage for the component to match, along with the appropriate type. + */ +class s_pattern { +public: + s_pattern(s_expression *&s) : p_expr(&s), type(EXPR) { } + s_pattern(s_list *&s) : p_list(&s), type(LIST) { } + s_pattern(s_symbol *&s) : p_symbol(&s), type(SYMBOL) { } + s_pattern(s_number *&s) : p_number(&s), type(NUMBER) { } + s_pattern(s_int *&s) : p_int(&s), type(INT) { } + s_pattern(const char *str) : literal(str), type(STRING) { } + + bool match(s_expression *expr); + +private: + union { + s_expression **p_expr; + s_list **p_list; + s_symbol **p_symbol; + s_number **p_number; + s_int **p_int; + const char *literal; + }; + enum { EXPR, LIST, SYMBOL, NUMBER, INT, STRING } type; +}; + +bool +s_match(s_expression *top, unsigned n, s_pattern *pattern, bool partial); + +#endif /* S_EXPRESSION_H */ diff --git a/src/compiler/glsl/standalone_scaffolding.cpp b/src/compiler/glsl/standalone_scaffolding.cpp new file mode 100644 index 00000000000..d5d214b57cc --- /dev/null +++ b/src/compiler/glsl/standalone_scaffolding.cpp @@ -0,0 +1,221 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* This file declares stripped-down versions of functions that + * normally exist outside of the glsl folder, so that they can be used + * when running the GLSL compiler standalone (for unit testing or + * compiling builtins). + */ + +#include "standalone_scaffolding.h" + +#include +#include +#include +#include "util/ralloc.h" +#include "util/strtod.h" + +void +_mesa_warning(struct gl_context *ctx, const char *fmt, ...) +{ + va_list vargs; + (void) ctx; + + va_start(vargs, fmt); + + /* This output is not thread-safe, but that's good enough for the + * standalone compiler. + */ + fprintf(stderr, "Mesa warning: "); + vfprintf(stderr, fmt, vargs); + fprintf(stderr, "\n"); + + va_end(vargs); +} + +void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh) +{ + (void) ctx; + *ptr = sh; +} + +void +_mesa_shader_debug(struct gl_context *, GLenum, GLuint *, + const char *) +{ +} + +struct gl_shader * +_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) +{ + struct gl_shader *shader; + + (void) ctx; + + assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER); + shader = rzalloc(NULL, struct gl_shader); + if (shader) { + shader->Type = type; + shader->Stage = _mesa_shader_enum_to_shader_stage(type); + shader->Name = name; + shader->RefCount = 1; + } + return shader; +} + +void +_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh) +{ + free((void *)sh->Source); + free(sh->Label); + ralloc_free(sh); +} + +void +_mesa_clear_shader_program_data(struct gl_shader_program *shProg) +{ + unsigned i; + + shProg->NumUniformStorage = 0; + shProg->UniformStorage = NULL; + shProg->NumUniformRemapTable = 0; + shProg->UniformRemapTable = NULL; + shProg->UniformHash = NULL; + + ralloc_free(shProg->InfoLog); + shProg->InfoLog = ralloc_strdup(shProg, ""); + + ralloc_free(shProg->BufferInterfaceBlocks); + shProg->BufferInterfaceBlocks = NULL; + shProg->NumBufferInterfaceBlocks = 0; + + ralloc_free(shProg->UniformBlocks); + shProg->UniformBlocks = NULL; + shProg->NumUniformBlocks = 0; + + ralloc_free(shProg->ShaderStorageBlocks); + shProg->ShaderStorageBlocks = NULL; + shProg->NumShaderStorageBlocks = 0; + + for (i = 0; i < MESA_SHADER_STAGES; i++) { + ralloc_free(shProg->InterfaceBlockStageIndex[i]); + shProg->InterfaceBlockStageIndex[i] = NULL; + } + + ralloc_free(shProg->UboInterfaceBlockIndex); + shProg->UboInterfaceBlockIndex = NULL; + ralloc_free(shProg->SsboInterfaceBlockIndex); + shProg->SsboInterfaceBlockIndex = NULL; + + ralloc_free(shProg->AtomicBuffers); + shProg->AtomicBuffers = NULL; + shProg->NumAtomicBuffers = 0; +} + +void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) +{ + memset(ctx, 0, sizeof(*ctx)); + + ctx->API = api; + + ctx->Extensions.dummy_false = false; + ctx->Extensions.dummy_true = true; + ctx->Extensions.ARB_compute_shader = true; + ctx->Extensions.ARB_conservative_depth = true; + ctx->Extensions.ARB_draw_instanced = true; + ctx->Extensions.ARB_ES2_compatibility = true; + ctx->Extensions.ARB_ES3_compatibility = true; + ctx->Extensions.ARB_explicit_attrib_location = true; + ctx->Extensions.ARB_fragment_coord_conventions = true; + ctx->Extensions.ARB_fragment_layer_viewport = true; + ctx->Extensions.ARB_gpu_shader5 = true; + ctx->Extensions.ARB_gpu_shader_fp64 = true; + ctx->Extensions.ARB_sample_shading = true; + ctx->Extensions.ARB_shader_bit_encoding = true; + ctx->Extensions.ARB_shader_draw_parameters = true; + ctx->Extensions.ARB_shader_stencil_export = true; + ctx->Extensions.ARB_shader_subroutine = true; + ctx->Extensions.ARB_shader_texture_lod = true; + ctx->Extensions.ARB_shading_language_420pack = true; + ctx->Extensions.ARB_shading_language_packing = true; + ctx->Extensions.ARB_tessellation_shader = true; + ctx->Extensions.ARB_texture_cube_map_array = true; + ctx->Extensions.ARB_texture_gather = true; + ctx->Extensions.ARB_texture_multisample = true; + ctx->Extensions.ARB_texture_query_levels = true; + ctx->Extensions.ARB_texture_query_lod = true; + ctx->Extensions.ARB_uniform_buffer_object = true; + ctx->Extensions.ARB_viewport_array = true; + + ctx->Extensions.OES_EGL_image_external = true; + ctx->Extensions.OES_standard_derivatives = true; + + ctx->Extensions.EXT_shader_integer_mix = true; + ctx->Extensions.EXT_texture_array = true; + + ctx->Extensions.NV_texture_rectangle = true; + + ctx->Const.GLSLVersion = 120; + + /* 1.20 minimums. */ + ctx->Const.MaxLights = 8; + ctx->Const.MaxClipPlanes = 6; + ctx->Const.MaxTextureUnits = 2; + ctx->Const.MaxTextureCoordUnits = 2; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; + + ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512; + ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; + ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */ + ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; + ctx->Const.MaxCombinedTextureImageUnits = 2; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 2; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64; + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 32; + + ctx->Const.MaxDrawBuffers = 1; + ctx->Const.MaxComputeWorkGroupCount[0] = 65535; + ctx->Const.MaxComputeWorkGroupCount[1] = 65535; + ctx->Const.MaxComputeWorkGroupCount[2] = 65535; + ctx->Const.MaxComputeWorkGroupSize[0] = 1024; + ctx->Const.MaxComputeWorkGroupSize[1] = 1024; + ctx->Const.MaxComputeWorkGroupSize[2] = 64; + ctx->Const.MaxComputeWorkGroupInvocations = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ + ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ + + /* Set up default shader compiler options. */ + struct gl_shader_compiler_options options; + memset(&options, 0, sizeof(options)); + options.MaxUnrollIterations = 32; + options.MaxIfDepth = UINT_MAX; + + for (int sh = 0; sh < MESA_SHADER_STAGES; ++sh) + memcpy(&ctx->Const.ShaderCompilerOptions[sh], &options, sizeof(options)); + + _mesa_locale_init(); +} diff --git a/src/compiler/glsl/standalone_scaffolding.h b/src/compiler/glsl/standalone_scaffolding.h new file mode 100644 index 00000000000..f853a187bf4 --- /dev/null +++ b/src/compiler/glsl/standalone_scaffolding.h @@ -0,0 +1,90 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/* This file declares stripped-down versions of functions that + * normally exist outside of the glsl folder, so that they can be used + * when running the GLSL compiler standalone (for unit testing or + * compiling builtins). + */ + +#pragma once +#ifndef STANDALONE_SCAFFOLDING_H +#define STANDALONE_SCAFFOLDING_H + +#include +#include "main/mtypes.h" + +extern "C" void +_mesa_warning(struct gl_context *ctx, const char *fmtString, ... ); + +extern "C" void +_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, + struct gl_shader *sh); + +extern "C" struct gl_shader * +_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); + +extern "C" void +_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh); + +extern "C" void +_mesa_clear_shader_program_data(struct gl_shader_program *); + +extern "C" void +_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id, + const char *msg); + +static inline gl_shader_stage +_mesa_shader_enum_to_shader_stage(GLenum v) +{ + switch (v) { + case GL_VERTEX_SHADER: + return MESA_SHADER_VERTEX; + case GL_FRAGMENT_SHADER: + return MESA_SHADER_FRAGMENT; + case GL_GEOMETRY_SHADER: + return MESA_SHADER_GEOMETRY; + case GL_TESS_CONTROL_SHADER: + return MESA_SHADER_TESS_CTRL; + case GL_TESS_EVALUATION_SHADER: + return MESA_SHADER_TESS_EVAL; + case GL_COMPUTE_SHADER: + return MESA_SHADER_COMPUTE; + default: + assert(!"bad value in _mesa_shader_enum_to_shader_stage()"); + return MESA_SHADER_VERTEX; + } +} + +/** + * Initialize the given gl_context structure to a reasonable set of + * defaults representing the minimum capabilities required by the + * OpenGL spec. + * + * This is used when compiling builtin functions and in testing, when + * we don't have a connection to an actual driver. + */ +void initialize_context_to_defaults(struct gl_context *ctx, gl_api api); + + +#endif /* STANDALONE_SCAFFOLDING_H */ diff --git a/src/compiler/glsl/test.cpp b/src/compiler/glsl/test.cpp new file mode 100644 index 00000000000..b1ff92ed1d4 --- /dev/null +++ b/src/compiler/glsl/test.cpp @@ -0,0 +1,78 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file test.cpp + * + * Standalone tests for the GLSL compiler. + * + * This file provides a standalone executable which can be used to + * test components of the GLSL. + * + * Each test is a function with the same signature as main(). The + * main function interprets its first argument as the name of the test + * to run, strips out that argument, and then calls the test function. + */ + +#include +#include +#include + +#include "test_optpass.h" + +/** + * Print proper usage and exit with failure. + */ +static void +usage_fail(const char *name) +{ + printf("*** usage: %s \n", name); + printf("\n"); + printf("Possible commands are:\n"); + printf(" optpass: test an optimization pass in isolation\n"); + exit(EXIT_FAILURE); +} + +static const char *extract_command_from_argv(int *argc, char **argv) +{ + if (*argc < 2) { + usage_fail(argv[0]); + } + const char *command = argv[1]; + --*argc; + memmove(&argv[1], &argv[2], (*argc) * sizeof(argv[1])); + return command; +} + +int main(int argc, char **argv) +{ + const char *command = extract_command_from_argv(&argc, argv); + if (strcmp(command, "optpass") == 0) { + return test_optpass(argc, argv); + } else { + usage_fail(argv[0]); + } + + /* Execution should never reach here. */ + return EXIT_FAILURE; +} diff --git a/src/compiler/glsl/test_optpass.cpp b/src/compiler/glsl/test_optpass.cpp new file mode 100644 index 00000000000..fed1fabf301 --- /dev/null +++ b/src/compiler/glsl/test_optpass.cpp @@ -0,0 +1,276 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +/** + * \file test_optpass.cpp + * + * Standalone test for optimization passes. + * + * This file provides the "optpass" command for the standalone + * glsl_test app. It accepts either GLSL or high-level IR as input, + * and performs the optimiation passes specified on the command line. + * It outputs the IR, both before and after optimiations. + */ + +#include +#include +#include +#include + +#include "ast.h" +#include "ir_optimization.h" +#include "program.h" +#include "ir_reader.h" +#include "standalone_scaffolding.h" + +using namespace std; + +static string read_stdin_to_eof() +{ + stringbuf sb; + cin.get(sb, '\0'); + return sb.str(); +} + +static GLboolean +do_optimization(struct exec_list *ir, const char *optimization, + const struct gl_shader_compiler_options *options) +{ + int int_0; + int int_1; + int int_2; + int int_3; + int int_4; + + if (sscanf(optimization, "do_common_optimization ( %d ) ", &int_0) == 1) { + return do_common_optimization(ir, int_0 != 0, false, options, true); + } else if (strcmp(optimization, "do_algebraic") == 0) { + return do_algebraic(ir, true, options); + } else if (strcmp(optimization, "do_constant_folding") == 0) { + return do_constant_folding(ir); + } else if (strcmp(optimization, "do_constant_variable") == 0) { + return do_constant_variable(ir); + } else if (strcmp(optimization, "do_constant_variable_unlinked") == 0) { + return do_constant_variable_unlinked(ir); + } else if (strcmp(optimization, "do_copy_propagation") == 0) { + return do_copy_propagation(ir); + } else if (strcmp(optimization, "do_copy_propagation_elements") == 0) { + return do_copy_propagation_elements(ir); + } else if (strcmp(optimization, "do_constant_propagation") == 0) { + return do_constant_propagation(ir); + } else if (strcmp(optimization, "do_dead_code") == 0) { + return do_dead_code(ir, false); + } else if (strcmp(optimization, "do_dead_code_local") == 0) { + return do_dead_code_local(ir); + } else if (strcmp(optimization, "do_dead_code_unlinked") == 0) { + return do_dead_code_unlinked(ir); + } else if (strcmp(optimization, "do_dead_functions") == 0) { + return do_dead_functions(ir); + } else if (strcmp(optimization, "do_function_inlining") == 0) { + return do_function_inlining(ir); + } else if (sscanf(optimization, + "do_lower_jumps ( %d , %d , %d , %d , %d ) ", + &int_0, &int_1, &int_2, &int_3, &int_4) == 5) { + return do_lower_jumps(ir, int_0 != 0, int_1 != 0, int_2 != 0, + int_3 != 0, int_4 != 0); + } else if (strcmp(optimization, "do_lower_texture_projection") == 0) { + return do_lower_texture_projection(ir); + } else if (strcmp(optimization, "do_if_simplification") == 0) { + return do_if_simplification(ir); + } else if (sscanf(optimization, "lower_if_to_cond_assign ( %d ) ", + &int_0) == 1) { + return lower_if_to_cond_assign(ir, int_0); + } else if (strcmp(optimization, "do_mat_op_to_vec") == 0) { + return do_mat_op_to_vec(ir); + } else if (strcmp(optimization, "do_noop_swizzle") == 0) { + return do_noop_swizzle(ir); + } else if (strcmp(optimization, "do_structure_splitting") == 0) { + return do_structure_splitting(ir); + } else if (strcmp(optimization, "do_swizzle_swizzle") == 0) { + return do_swizzle_swizzle(ir); + } else if (strcmp(optimization, "do_tree_grafting") == 0) { + return do_tree_grafting(ir); + } else if (strcmp(optimization, "do_vec_index_to_cond_assign") == 0) { + return do_vec_index_to_cond_assign(ir); + } else if (strcmp(optimization, "do_vec_index_to_swizzle") == 0) { + return do_vec_index_to_swizzle(ir); + } else if (strcmp(optimization, "lower_discard") == 0) { + return lower_discard(ir); + } else if (sscanf(optimization, "lower_instructions ( %d ) ", + &int_0) == 1) { + return lower_instructions(ir, int_0); + } else if (strcmp(optimization, "lower_noise") == 0) { + return lower_noise(ir); + } else if (sscanf(optimization, "lower_variable_index_to_cond_assign " + "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2, + &int_3) == 4) { + return lower_variable_index_to_cond_assign(MESA_SHADER_VERTEX, ir, + int_0 != 0, int_1 != 0, + int_2 != 0, int_3 != 0); + } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ", + &int_0) == 1) { + return lower_quadop_vector(ir, int_0 != 0); + } else if (strcmp(optimization, "optimize_redundant_jumps") == 0) { + return optimize_redundant_jumps(ir); + } else { + printf("Unrecognized optimization %s\n", optimization); + exit(EXIT_FAILURE); + return false; + } +} + +static GLboolean +do_optimization_passes(struct exec_list *ir, char **optimizations, + int num_optimizations, bool quiet, + const struct gl_shader_compiler_options *options) +{ + GLboolean overall_progress = false; + + for (int i = 0; i < num_optimizations; ++i) { + const char *optimization = optimizations[i]; + if (!quiet) { + printf("*** Running optimization %s...", optimization); + } + GLboolean progress = do_optimization(ir, optimization, options); + if (!quiet) { + printf("%s\n", progress ? "progress" : "no progress"); + } + validate_ir_tree(ir); + + overall_progress = overall_progress || progress; + } + + return overall_progress; +} + +int test_optpass(int argc, char **argv) +{ + int input_format_ir = 0; /* 0=glsl, 1=ir */ + int loop = 0; + int shader_type = GL_VERTEX_SHADER; + int quiet = 0; + + const struct option optpass_opts[] = { + { "input-ir", no_argument, &input_format_ir, 1 }, + { "input-glsl", no_argument, &input_format_ir, 0 }, + { "loop", no_argument, &loop, 1 }, + { "vertex-shader", no_argument, &shader_type, GL_VERTEX_SHADER }, + { "fragment-shader", no_argument, &shader_type, GL_FRAGMENT_SHADER }, + { "quiet", no_argument, &quiet, 1 }, + { NULL, 0, NULL, 0 } + }; + + int idx = 0; + int c; + while ((c = getopt_long(argc, argv, "", optpass_opts, &idx)) != -1) { + if (c != 0) { + printf("*** usage: %s optpass \n", argv[0]); + printf("\n"); + printf("Possible options are:\n"); + printf(" --input-ir: input format is IR\n"); + printf(" --input-glsl: input format is GLSL (the default)\n"); + printf(" --loop: run optimizations repeatedly until no progress\n"); + printf(" --vertex-shader: test with a vertex shader (the default)\n"); + printf(" --fragment-shader: test with a fragment shader\n"); + exit(EXIT_FAILURE); + } + } + + struct gl_context local_ctx; + struct gl_context *ctx = &local_ctx; + initialize_context_to_defaults(ctx, API_OPENGL_COMPAT); + + ctx->Driver.NewShader = _mesa_new_shader; + ir_variable::temporaries_allocate_names = true; + + struct gl_shader *shader = rzalloc(NULL, struct gl_shader); + shader->Type = shader_type; + shader->Stage = _mesa_shader_enum_to_shader_stage(shader_type); + + string input = read_stdin_to_eof(); + + struct _mesa_glsl_parse_state *state + = new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); + + if (input_format_ir) { + shader->ir = new(shader) exec_list; + _mesa_glsl_initialize_types(state); + _mesa_glsl_read_ir(state, shader->ir, input.c_str(), true); + } else { + shader->Source = input.c_str(); + const char *source = shader->Source; + state->error = glcpp_preprocess(state, &source, &state->info_log, + state->extensions, ctx) != 0; + + if (!state->error) { + _mesa_glsl_lexer_ctor(state, source); + _mesa_glsl_parse(state); + _mesa_glsl_lexer_dtor(state); + } + + shader->ir = new(shader) exec_list; + if (!state->error && !state->translation_unit.is_empty()) + _mesa_ast_to_hir(shader->ir, state); + } + + /* Print out the initial IR */ + if (!state->error && !quiet) { + printf("*** pre-optimization IR:\n"); + _mesa_print_ir(stdout, shader->ir, state); + printf("\n--\n"); + } + + /* Optimization passes */ + if (!state->error) { + GLboolean progress; + const struct gl_shader_compiler_options *options = + &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader_type)]; + do { + progress = do_optimization_passes(shader->ir, &argv[optind], + argc - optind, quiet != 0, options); + } while (loop && progress); + } + + /* Print out the resulting IR */ + if (!state->error) { + if (!quiet) { + printf("*** resulting IR:\n"); + } + _mesa_print_ir(stdout, shader->ir, state); + if (!quiet) { + printf("\n--\n"); + } + } + + if (state->error) { + printf("*** error(s) occurred:\n"); + printf("%s\n", state->info_log); + printf("--\n"); + } + + ralloc_free(state); + ralloc_free(shader); + + return state->error; +} + diff --git a/src/compiler/glsl/test_optpass.h b/src/compiler/glsl/test_optpass.h new file mode 100644 index 00000000000..923ccf3dece --- /dev/null +++ b/src/compiler/glsl/test_optpass.h @@ -0,0 +1,30 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once +#ifndef TEST_OPTPASS_H +#define TEST_OPTPASS_H + +int test_optpass(int argc, char **argv); + +#endif /* TEST_OPTPASS_H */ diff --git a/src/compiler/glsl/tests/.gitignore b/src/compiler/glsl/tests/.gitignore new file mode 100644 index 00000000000..13dcdc4ab73 --- /dev/null +++ b/src/compiler/glsl/tests/.gitignore @@ -0,0 +1,5 @@ +blob-test +ralloc-test +uniform-initializer-test +sampler-types-test +general-ir-test diff --git a/src/compiler/glsl/tests/blob_test.c b/src/compiler/glsl/tests/blob_test.c new file mode 100644 index 00000000000..4806029bca6 --- /dev/null +++ b/src/compiler/glsl/tests/blob_test.c @@ -0,0 +1,320 @@ +/* + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +/* A collection of unit tests for blob.c */ + +#include +#include +#include +#include + +#include "util/ralloc.h" +#include "blob.h" + +#define bytes_test_str "bytes_test" +#define reserve_test_str "reserve_test" + +/* This placeholder must be the same length as the next overwrite_test_str */ +#define placeholder_str "XXXXXXXXXXXXXX" +#define overwrite_test_str "overwrite_test" +#define uint32_test 0x12345678 +#define uint32_placeholder 0xDEADBEEF +#define uint32_overwrite 0xA1B2C3D4 +#define uint64_test 0x1234567890ABCDEF +#define string_test_str "string_test" + +bool error = false; + +static void +expect_equal(uint64_t expected, uint64_t actual, const char *test) +{ + if (actual != expected) { + fprintf (stderr, "Error: Test '%s' failed: Expected=%ld, Actual=%ld\n", + test, expected, actual); + error = true; + } +} + +static void +expect_unequal(uint64_t expected, uint64_t actual, const char *test) +{ + if (actual == expected) { + fprintf (stderr, "Error: Test '%s' failed: Result=%ld, but expected something different.\n", + test, actual); + error = true; + } +} + +static void +expect_equal_str(const char *expected, const char *actual, const char *test) +{ + if (strcmp(expected, actual)) { + fprintf (stderr, "Error: Test '%s' failed:\n\t" + "Expected=\"%s\", Actual=\"%s\"\n", + test, expected, actual); + error = true; + } +} + +static void +expect_equal_bytes(uint8_t *expected, uint8_t *actual, + size_t num_bytes, const char *test) +{ + size_t i; + + if (memcmp(expected, actual, num_bytes)) { + fprintf (stderr, "Error: Test '%s' failed:\n\t", test); + + fprintf (stderr, "Expected=["); + for (i = 0; i < num_bytes; i++) { + if (i != 0) + fprintf(stderr, ", "); + fprintf(stderr, "0x%02x", expected[i]); + } + fprintf (stderr, "]"); + + fprintf (stderr, "Actual=["); + for (i = 0; i < num_bytes; i++) { + if (i != 0) + fprintf(stderr, ", "); + fprintf(stderr, "0x%02x", actual[i]); + } + fprintf (stderr, "]\n"); + + error = true; + } +} + +/* Test at least one call of each blob_write_foo and blob_read_foo function, + * verifying that we read out everything we wrote, that every bytes is + * consumed, and that the overrun bit is not set. + */ +static void +test_write_and_read_functions (void) +{ + void *ctx = ralloc_context(NULL); + struct blob *blob; + struct blob_reader reader; + uint8_t *reserved; + size_t str_offset, uint_offset; + uint8_t reserve_buf[sizeof(reserve_test_str)]; + + blob = blob_create(ctx); + + /*** Test blob by writing one of every possible kind of value. */ + + blob_write_bytes(blob, bytes_test_str, sizeof(bytes_test_str)); + + reserved = blob_reserve_bytes(blob, sizeof(reserve_test_str)); + memcpy(reserved, reserve_test_str, sizeof(reserve_test_str)); + + /* Write a placeholder, (to be replaced later via overwrite_bytes) */ + str_offset = blob->size; + blob_write_bytes(blob, placeholder_str, sizeof(placeholder_str)); + + blob_write_uint32(blob, uint32_test); + + /* Write a placeholder, (to be replaced later via overwrite_uint32) */ + uint_offset = blob->size; + blob_write_uint32(blob, uint32_placeholder); + + blob_write_uint64(blob, uint64_test); + + blob_write_intptr(blob, (intptr_t) blob); + + blob_write_string(blob, string_test_str); + + /* Finally, overwrite our placeholders. */ + blob_overwrite_bytes(blob, str_offset, overwrite_test_str, + sizeof(overwrite_test_str)); + blob_overwrite_uint32(blob, uint_offset, uint32_overwrite); + + /*** Now read each value and verify. */ + blob_reader_init(&reader, blob->data, blob->size); + + expect_equal_str(bytes_test_str, + blob_read_bytes(&reader, sizeof(bytes_test_str)), + "blob_write/read_bytes"); + + blob_copy_bytes(&reader, reserve_buf, sizeof(reserve_buf)); + expect_equal_str(reserve_test_str, (char *) reserve_buf, + "blob_reserve_bytes/blob_copy_bytes"); + + expect_equal_str(overwrite_test_str, + blob_read_bytes(&reader, sizeof(overwrite_test_str)), + "blob_overwrite_bytes"); + + expect_equal(uint32_test, blob_read_uint32(&reader), + "blob_write/read_uint32"); + expect_equal(uint32_overwrite, blob_read_uint32(&reader), + "blob_overwrite_uint32"); + expect_equal(uint64_test, blob_read_uint64(&reader), + "blob_write/read_uint64"); + expect_equal((intptr_t) blob, blob_read_intptr(&reader), + "blob_write/read_intptr"); + expect_equal_str(string_test_str, blob_read_string(&reader), + "blob_write/read_string"); + + expect_equal(reader.end - reader.data, reader.current - reader.data, + "read_consumes_all_bytes"); + expect_equal(false, reader.overrun, "read_does_not_overrun"); + + ralloc_free(ctx); +} + +/* Test that data values are written and read with proper alignment. */ +static void +test_alignment(void) +{ + void *ctx = ralloc_context(NULL); + struct blob *blob; + struct blob_reader reader; + uint8_t bytes[] = "ABCDEFGHIJKLMNOP"; + size_t delta, last, num_bytes; + + blob = blob_create(ctx); + + /* First, write an intptr value to the blob and capture that size. This is + * the expected offset between any pair of intptr values (if written with + * alignment). + */ + blob_write_intptr(blob, (intptr_t) blob); + + delta = blob->size; + last = blob->size; + + /* Then loop doing the following: + * + * 1. Write an unaligned number of bytes + * 2. Verify that write results in an unaligned size + * 3. Write an intptr_t value + * 2. Verify that that write results in an aligned size + */ + for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) { + blob_write_bytes(blob, bytes, num_bytes); + + expect_unequal(delta, blob->size - last, "unaligned write of bytes"); + + blob_write_intptr(blob, (intptr_t) blob); + + expect_equal(2 * delta, blob->size - last, "aligned write of intptr"); + + last = blob->size; + } + + /* Finally, test that reading also does proper alignment. Since we know + * that values were written with all the right alignment, all we have to do + * here is verify that correct values are read. + */ + blob_reader_init(&reader, blob->data, blob->size); + + expect_equal((intptr_t) blob, blob_read_intptr(&reader), + "read of initial, aligned intptr_t"); + + for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) { + expect_equal_bytes(bytes, blob_read_bytes(&reader, num_bytes), + num_bytes, "unaligned read of bytes"); + expect_equal((intptr_t) blob, blob_read_intptr(&reader), + "aligned read of intptr_t"); + } + + ralloc_free(ctx); +} + +/* Test that we detect overrun. */ +static void +test_overrun(void) +{ + void *ctx =ralloc_context(NULL); + struct blob *blob; + struct blob_reader reader; + uint32_t value = 0xdeadbeef; + + blob = blob_create(ctx); + + blob_write_uint32(blob, value); + + blob_reader_init(&reader, blob->data, blob->size); + + expect_equal(value, blob_read_uint32(&reader), "read before overrun"); + expect_equal(false, reader.overrun, "overrun flag not set"); + expect_equal(0, blob_read_uint32(&reader), "read at overrun"); + expect_equal(true, reader.overrun, "overrun flag set"); + + ralloc_free(ctx); +} + +/* Test that we can read and write some large objects, (exercising the code in + * the blob_write functions to realloc blob->data. + */ +static void +test_big_objects(void) +{ + void *ctx = ralloc_context(NULL); + struct blob *blob; + struct blob_reader reader; + int size = 1000; + int count = 1000; + size_t i; + char *buf; + + blob = blob_create(ctx); + + /* Initialize our buffer. */ + buf = ralloc_size(ctx, size); + for (i = 0; i < size; i++) { + buf[i] = i % 256; + } + + /* Write it many times. */ + for (i = 0; i < count; i++) { + blob_write_bytes(blob, buf, size); + } + + blob_reader_init(&reader, blob->data, blob->size); + + /* Read and verify it many times. */ + for (i = 0; i < count; i++) { + expect_equal_bytes((uint8_t *) buf, blob_read_bytes(&reader, size), size, + "read of large objects"); + } + + expect_equal(reader.end - reader.data, reader.current - reader.data, + "number of bytes read reading large objects"); + + expect_equal(false, reader.overrun, + "overrun flag not set reading large objects"); + + ralloc_free(ctx); +} + +int +main (void) +{ + test_write_and_read_functions (); + test_alignment (); + test_overrun (); + test_big_objects (); + + return error ? 1 : 0; +} diff --git a/src/compiler/glsl/tests/builtin_variable_test.cpp b/src/compiler/glsl/tests/builtin_variable_test.cpp new file mode 100644 index 00000000000..11e384a0722 --- /dev/null +++ b/src/compiler/glsl/tests/builtin_variable_test.cpp @@ -0,0 +1,393 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "standalone_scaffolding.h" +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "ir.h" +#include "glsl_parser_extras.h" +#include "glsl_symbol_table.h" + +class common_builtin : public ::testing::Test { +public: + common_builtin(GLenum shader_type) + : shader_type(shader_type) + { + /* empty */ + } + + virtual void SetUp(); + virtual void TearDown(); + + void string_starts_with_prefix(const char *str, const char *prefix); + void names_start_with_gl(); + void uniforms_and_system_values_dont_have_explicit_location(); + void constants_are_constant(); + void no_invalid_variable_modes(); + + GLenum shader_type; + struct _mesa_glsl_parse_state *state; + struct gl_shader *shader; + void *mem_ctx; + gl_context ctx; + exec_list ir; +}; + +void +common_builtin::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); + this->ir.make_empty(); + + initialize_context_to_defaults(&this->ctx, API_OPENGL_COMPAT); + + this->shader = rzalloc(this->mem_ctx, gl_shader); + this->shader->Type = this->shader_type; + this->shader->Stage = _mesa_shader_enum_to_shader_stage(this->shader_type); + + this->state = + new(mem_ctx) _mesa_glsl_parse_state(&this->ctx, this->shader->Stage, + this->shader); + + _mesa_glsl_initialize_types(this->state); + _mesa_glsl_initialize_variables(&this->ir, this->state); +} + +void +common_builtin::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; +} + +void +common_builtin::string_starts_with_prefix(const char *str, const char *prefix) +{ + const size_t len = strlen(prefix); + char *const name_prefix = new char[len + 1]; + + strncpy(name_prefix, str, len); + name_prefix[len] = '\0'; + EXPECT_STREQ(prefix, name_prefix) << "Bad name " << str; + + delete [] name_prefix; +} + +void +common_builtin::names_start_with_gl() +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + string_starts_with_prefix(var->name, "gl_"); + } +} + +void +common_builtin::uniforms_and_system_values_dont_have_explicit_location() +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_uniform && var->data.mode != ir_var_system_value) + continue; + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + } +} + +void +common_builtin::constants_are_constant() +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_auto) + continue; + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + EXPECT_TRUE(var->data.read_only); + } +} + +void +common_builtin::no_invalid_variable_modes() +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + switch (var->data.mode) { + case ir_var_auto: + case ir_var_uniform: + case ir_var_shader_in: + case ir_var_shader_out: + case ir_var_system_value: + break; + + default: + ADD_FAILURE() << "Built-in variable " << var->name + << " has an invalid mode " << int(var->data.mode); + break; + } + } +} + +/************************************************************/ + +class vertex_builtin : public common_builtin { +public: + vertex_builtin() + : common_builtin(GL_VERTEX_SHADER) + { + /* empty */ + } +}; + +TEST_F(vertex_builtin, names_start_with_gl) +{ + common_builtin::names_start_with_gl(); +} + +TEST_F(vertex_builtin, inputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_in) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VERT_ATTRIB_GENERIC0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + } +} + +TEST_F(vertex_builtin, outputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_out) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + + /* Several varyings only exist in the fragment shader. Be sure that no + * outputs with these locations exist. + */ + EXPECT_NE(VARYING_SLOT_PNTC, var->data.location); + EXPECT_NE(VARYING_SLOT_FACE, var->data.location); + EXPECT_NE(VARYING_SLOT_PRIMITIVE_ID, var->data.location); + } +} + +TEST_F(vertex_builtin, uniforms_and_system_values_dont_have_explicit_location) +{ + common_builtin::uniforms_and_system_values_dont_have_explicit_location(); +} + +TEST_F(vertex_builtin, constants_are_constant) +{ + common_builtin::constants_are_constant(); +} + +TEST_F(vertex_builtin, no_invalid_variable_modes) +{ + common_builtin::no_invalid_variable_modes(); +} + +/********************************************************************/ + +class fragment_builtin : public common_builtin { +public: + fragment_builtin() + : common_builtin(GL_FRAGMENT_SHADER) + { + /* empty */ + } +}; + +TEST_F(fragment_builtin, names_start_with_gl) +{ + common_builtin::names_start_with_gl(); +} + +TEST_F(fragment_builtin, inputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_in) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + + /* Several varyings only exist in the vertex / geometry shader. Be sure + * that no inputs with these locations exist. + */ + EXPECT_TRUE(_mesa_varying_slot_in_fs((gl_varying_slot) var->data.location)); + } +} + +TEST_F(fragment_builtin, outputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_out) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + + /* gl_FragData[] has location FRAG_RESULT_DATA0. Locations beyond that + * are invalid. + */ + EXPECT_GE(FRAG_RESULT_DATA0, var->data.location); + + EXPECT_EQ(0u, var->data.location_frac); + } +} + +TEST_F(fragment_builtin, uniforms_and_system_values_dont_have_explicit_location) +{ + common_builtin::uniforms_and_system_values_dont_have_explicit_location(); +} + +TEST_F(fragment_builtin, constants_are_constant) +{ + common_builtin::constants_are_constant(); +} + +TEST_F(fragment_builtin, no_invalid_variable_modes) +{ + common_builtin::no_invalid_variable_modes(); +} + +/********************************************************************/ + +class geometry_builtin : public common_builtin { +public: + geometry_builtin() + : common_builtin(GL_GEOMETRY_SHADER) + { + /* empty */ + } +}; + +TEST_F(geometry_builtin, names_start_with_gl) +{ + common_builtin::names_start_with_gl(); +} + +TEST_F(geometry_builtin, inputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_in) + continue; + + if (var->is_interface_instance()) { + EXPECT_STREQ("gl_in", var->name); + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + ASSERT_TRUE(var->type->is_array()); + + const glsl_type *const instance_type = var->type->fields.array; + + for (unsigned i = 0; i < instance_type->length; i++) { + const glsl_struct_field *const input = + &instance_type->fields.structure[i]; + + string_starts_with_prefix(input->name, "gl_"); + EXPECT_NE(-1, input->location); + EXPECT_GT(VARYING_SLOT_VAR0, input->location); + + /* Several varyings only exist in the fragment shader. Be sure + * that no inputs with these locations exist. + */ + EXPECT_NE(VARYING_SLOT_PNTC, input->location); + EXPECT_NE(VARYING_SLOT_FACE, input->location); + } + } else { + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + } + + /* Several varyings only exist in the fragment shader. Be sure that no + * inputs with these locations exist. + */ + EXPECT_NE(VARYING_SLOT_PNTC, var->data.location); + EXPECT_NE(VARYING_SLOT_FACE, var->data.location); + } +} + +TEST_F(geometry_builtin, outputs_have_explicit_location) +{ + foreach_in_list(ir_instruction, node, &this->ir) { + ir_variable *const var = node->as_variable(); + + if (var->data.mode != ir_var_shader_out) + continue; + + EXPECT_TRUE(var->data.explicit_location); + EXPECT_NE(-1, var->data.location); + EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + + /* Several varyings only exist in the fragment shader. Be sure that no + * outputs with these locations exist. + */ + EXPECT_NE(VARYING_SLOT_PNTC, var->data.location); + EXPECT_NE(VARYING_SLOT_FACE, var->data.location); + } +} + +TEST_F(geometry_builtin, uniforms_and_system_values_dont_have_explicit_location) +{ + common_builtin::uniforms_and_system_values_dont_have_explicit_location(); +} + +TEST_F(geometry_builtin, constants_are_constant) +{ + common_builtin::constants_are_constant(); +} + +TEST_F(geometry_builtin, no_invalid_variable_modes) +{ + common_builtin::no_invalid_variable_modes(); +} diff --git a/src/compiler/glsl/tests/compare_ir b/src/compiler/glsl/tests/compare_ir new file mode 100755 index 00000000000..a40fc810cf3 --- /dev/null +++ b/src/compiler/glsl/tests/compare_ir @@ -0,0 +1,59 @@ +#!/usr/bin/env python +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# Compare two files containing IR code. Ignore formatting differences +# and declaration order. + +import os +import os.path +import subprocess +import sys +import tempfile + +from sexps import * + +if len(sys.argv) != 3: + print 'Usage: compare_ir ' + exit(1) + +with open(sys.argv[1]) as f: + ir1 = sort_decls(parse_sexp(f.read())) +with open(sys.argv[2]) as f: + ir2 = sort_decls(parse_sexp(f.read())) + +if ir1 == ir2: + exit(0) +else: + file1, path1 = tempfile.mkstemp(os.path.basename(sys.argv[1])) + file2, path2 = tempfile.mkstemp(os.path.basename(sys.argv[2])) + try: + os.write(file1, '{0}\n'.format(sexp_to_string(ir1))) + os.close(file1) + os.write(file2, '{0}\n'.format(sexp_to_string(ir2))) + os.close(file2) + subprocess.call(['diff', '-u', path1, path2]) + finally: + os.remove(path1) + os.remove(path2) + exit(1) diff --git a/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp b/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp new file mode 100644 index 00000000000..cd48bc523c1 --- /dev/null +++ b/src/compiler/glsl/tests/copy_constant_to_storage_tests.cpp @@ -0,0 +1,300 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "uniform_initializer_utils.h" + +namespace linker { +extern void +copy_constant_to_storage(union gl_constant_value *storage, + const ir_constant *val, + const enum glsl_base_type base_type, + const unsigned int elements, + unsigned int boolean_true); +} + +class copy_constant_to_storage : public ::testing::Test { +public: + void int_test(unsigned rows); + void uint_test(unsigned rows); + void bool_test(unsigned rows); + void sampler_test(); + void float_test(unsigned columns, unsigned rows); + + virtual void SetUp(); + virtual void TearDown(); + + gl_constant_value storage[17]; + void *mem_ctx; +}; + +void +copy_constant_to_storage::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); +} + +void +copy_constant_to_storage::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; +} + +void +copy_constant_to_storage::int_test(unsigned rows) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_INT, 1, rows, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + val->type->base_type, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +void +copy_constant_to_storage::uint_test(unsigned rows) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_UINT, 1, rows, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + val->type->base_type, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +void +copy_constant_to_storage::float_test(unsigned columns, unsigned rows) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_FLOAT, columns, rows, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + val->type->base_type, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +void +copy_constant_to_storage::bool_test(unsigned rows) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_BOOL, 1, rows, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + val->type->base_type, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +/** + * The only difference between this test and int_test is that the base type + * passed to \c linker::copy_constant_to_storage is hard-coded to \c + * GLSL_TYPE_SAMPLER instead of using the base type from the constant. + */ +void +copy_constant_to_storage::sampler_test(void) +{ + ir_constant *val; + generate_data(mem_ctx, GLSL_TYPE_INT, 1, 1, val); + + const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); + fill_storage_array_with_sentinels(storage, + val->type->components(), + red_zone_size); + + linker::copy_constant_to_storage(storage, + val, + GLSL_TYPE_SAMPLER, + val->type->components(), + 0xF00F); + + verify_data(storage, 0, val, red_zone_size, 0xF00F); +} + +TEST_F(copy_constant_to_storage, bool_uniform) +{ + bool_test(1); +} + +TEST_F(copy_constant_to_storage, bvec2_uniform) +{ + bool_test(2); +} + +TEST_F(copy_constant_to_storage, bvec3_uniform) +{ + bool_test(3); +} + +TEST_F(copy_constant_to_storage, bvec4_uniform) +{ + bool_test(4); +} + +TEST_F(copy_constant_to_storage, int_uniform) +{ + int_test(1); +} + +TEST_F(copy_constant_to_storage, ivec2_uniform) +{ + int_test(2); +} + +TEST_F(copy_constant_to_storage, ivec3_uniform) +{ + int_test(3); +} + +TEST_F(copy_constant_to_storage, ivec4_uniform) +{ + int_test(4); +} + +TEST_F(copy_constant_to_storage, uint_uniform) +{ + uint_test(1); +} + +TEST_F(copy_constant_to_storage, uvec2_uniform) +{ + uint_test(2); +} + +TEST_F(copy_constant_to_storage, uvec3_uniform) +{ + uint_test(3); +} + +TEST_F(copy_constant_to_storage, uvec4_uniform) +{ + uint_test(4); +} + +TEST_F(copy_constant_to_storage, float_uniform) +{ + float_test(1, 1); +} + +TEST_F(copy_constant_to_storage, vec2_uniform) +{ + float_test(1, 2); +} + +TEST_F(copy_constant_to_storage, vec3_uniform) +{ + float_test(1, 3); +} + +TEST_F(copy_constant_to_storage, vec4_uniform) +{ + float_test(1, 4); +} + +TEST_F(copy_constant_to_storage, mat2x2_uniform) +{ + float_test(2, 2); +} + +TEST_F(copy_constant_to_storage, mat2x3_uniform) +{ + float_test(2, 3); +} + +TEST_F(copy_constant_to_storage, mat2x4_uniform) +{ + float_test(2, 4); +} + +TEST_F(copy_constant_to_storage, mat3x2_uniform) +{ + float_test(3, 2); +} + +TEST_F(copy_constant_to_storage, mat3x3_uniform) +{ + float_test(3, 3); +} + +TEST_F(copy_constant_to_storage, mat3x4_uniform) +{ + float_test(3, 4); +} + +TEST_F(copy_constant_to_storage, mat4x2_uniform) +{ + float_test(4, 2); +} + +TEST_F(copy_constant_to_storage, mat4x3_uniform) +{ + float_test(4, 3); +} + +TEST_F(copy_constant_to_storage, mat4x4_uniform) +{ + float_test(4, 4); +} + +TEST_F(copy_constant_to_storage, sampler_uniform) +{ + sampler_test(); +} diff --git a/src/compiler/glsl/tests/general_ir_test.cpp b/src/compiler/glsl/tests/general_ir_test.cpp new file mode 100644 index 00000000000..217305bf847 --- /dev/null +++ b/src/compiler/glsl/tests/general_ir_test.cpp @@ -0,0 +1,80 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "ir.h" + +TEST(ir_variable_constructor, interface) +{ + void *mem_ctx = ralloc_context(NULL); + + static const glsl_struct_field f[] = { + glsl_struct_field(glsl_type::vec(4), "v") + }; + + const glsl_type *const interface = + glsl_type::get_interface_instance(f, + ARRAY_SIZE(f), + GLSL_INTERFACE_PACKING_STD140, + "simple_interface"); + + static const char name[] = "named_instance"; + + ir_variable *const v = + new(mem_ctx) ir_variable(interface, name, ir_var_uniform); + + EXPECT_STREQ(name, v->name); + EXPECT_NE(name, v->name); + EXPECT_EQ(interface, v->type); + EXPECT_EQ(interface, v->get_interface_type()); +} + +TEST(ir_variable_constructor, interface_array) +{ + void *mem_ctx = ralloc_context(NULL); + + static const glsl_struct_field f[] = { + glsl_struct_field(glsl_type::vec(4), "v") + }; + + const glsl_type *const interface = + glsl_type::get_interface_instance(f, + ARRAY_SIZE(f), + GLSL_INTERFACE_PACKING_STD140, + "simple_interface"); + + const glsl_type *const interface_array = + glsl_type::get_array_instance(interface, 2); + + static const char name[] = "array_instance"; + + ir_variable *const v = + new(mem_ctx) ir_variable(interface_array, name, ir_var_uniform); + + EXPECT_STREQ(name, v->name); + EXPECT_NE(name, v->name); + EXPECT_EQ(interface_array, v->type); + EXPECT_EQ(interface, v->get_interface_type()); +} diff --git a/src/compiler/glsl/tests/invalidate_locations_test.cpp b/src/compiler/glsl/tests/invalidate_locations_test.cpp new file mode 100644 index 00000000000..ba94d7e3a21 --- /dev/null +++ b/src/compiler/glsl/tests/invalidate_locations_test.cpp @@ -0,0 +1,196 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "ir.h" +#include "linker.h" + +/** + * \file varyings_test.cpp + * + * Test various aspects of linking shader stage inputs and outputs. + */ + +class invalidate_locations : public ::testing::Test { +public: + virtual void SetUp(); + virtual void TearDown(); + + void *mem_ctx; + exec_list ir; +}; + +void +invalidate_locations::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); + this->ir.make_empty(); +} + +void +invalidate_locations::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; +} + +TEST_F(invalidate_locations, simple_vertex_in_generic) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VERT_ATTRIB_GENERIC0; + var->data.location_frac = 2; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(-1, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_FALSE(var->data.explicit_location); + EXPECT_TRUE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, explicit_location_vertex_in_generic) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VERT_ATTRIB_GENERIC0; + var->data.explicit_location = true; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(VERT_ATTRIB_GENERIC0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_TRUE(var->data.explicit_location); + EXPECT_FALSE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, explicit_location_frac_vertex_in_generic) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VERT_ATTRIB_GENERIC0; + var->data.location_frac = 2; + var->data.explicit_location = true; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(VERT_ATTRIB_GENERIC0, var->data.location); + EXPECT_EQ(2u, var->data.location_frac); + EXPECT_TRUE(var->data.explicit_location); + EXPECT_FALSE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, vertex_in_builtin) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "gl_Vertex", + ir_var_shader_in); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VERT_ATTRIB_POS; + var->data.explicit_location = true; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(VERT_ATTRIB_POS, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_TRUE(var->data.explicit_location); + EXPECT_FALSE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, simple_vertex_out_generic) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_out); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VARYING_SLOT_VAR0; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(-1, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_FALSE(var->data.explicit_location); + EXPECT_TRUE(var->data.is_unmatched_generic_inout); +} + +TEST_F(invalidate_locations, vertex_out_builtin) +{ + ir_variable *const var = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "gl_FrontColor", + ir_var_shader_out); + + EXPECT_FALSE(var->data.explicit_location); + EXPECT_EQ(-1, var->data.location); + + var->data.location = VARYING_SLOT_COL0; + var->data.explicit_location = true; + + ir.push_tail(var); + + link_invalidate_variable_locations(&ir); + + EXPECT_EQ(VARYING_SLOT_COL0, var->data.location); + EXPECT_EQ(0u, var->data.location_frac); + EXPECT_TRUE(var->data.explicit_location); + EXPECT_FALSE(var->data.is_unmatched_generic_inout); +} diff --git a/src/compiler/glsl/tests/lower_jumps/.gitignore b/src/compiler/glsl/tests/lower_jumps/.gitignore new file mode 100644 index 00000000000..e98df627fd8 --- /dev/null +++ b/src/compiler/glsl/tests/lower_jumps/.gitignore @@ -0,0 +1,3 @@ +*.opt_test +*.expected +*.out diff --git a/src/compiler/glsl/tests/lower_jumps/create_test_cases.py b/src/compiler/glsl/tests/lower_jumps/create_test_cases.py new file mode 100644 index 00000000000..3be1079bc14 --- /dev/null +++ b/src/compiler/glsl/tests/lower_jumps/create_test_cases.py @@ -0,0 +1,643 @@ +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +import os +import os.path +import re +import subprocess +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir +from sexps import * + +def make_test_case(f_name, ret_type, body): + """Create a simple optimization test case consisting of a single + function with the given name, return type, and body. + + Global declarations are automatically created for any undeclared + variables that are referenced by the function. All undeclared + variables are assumed to be floats. + """ + check_sexp(body) + declarations = {} + def make_declarations(sexp, already_declared = ()): + if isinstance(sexp, list): + if len(sexp) == 2 and sexp[0] == 'var_ref': + if sexp[1] not in already_declared: + declarations[sexp[1]] = [ + 'declare', ['in'], 'float', sexp[1]] + elif len(sexp) == 4 and sexp[0] == 'assign': + assert sexp[2][0] == 'var_ref' + if sexp[2][1] not in already_declared: + declarations[sexp[2][1]] = [ + 'declare', ['out'], 'float', sexp[2][1]] + make_declarations(sexp[3], already_declared) + else: + already_declared = set(already_declared) + for s in sexp: + if isinstance(s, list) and len(s) >= 4 and \ + s[0] == 'declare': + already_declared.add(s[3]) + else: + make_declarations(s, already_declared) + make_declarations(body) + return declarations.values() + \ + [['function', f_name, ['signature', ret_type, ['parameters'], body]]] + + +# The following functions can be used to build expressions. + +def const_float(value): + """Create an expression representing the given floating point value.""" + return ['constant', 'float', ['{0:.6f}'.format(value)]] + +def const_bool(value): + """Create an expression representing the given boolean value. + + If value is not a boolean, it is converted to a boolean. So, for + instance, const_bool(1) is equivalent to const_bool(True). + """ + return ['constant', 'bool', ['{0}'.format(1 if value else 0)]] + +def gt_zero(var_name): + """Create Construct the expression var_name > 0""" + return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)] + + +# The following functions can be used to build complex control flow +# statements. All of these functions return statement lists (even +# those which only create a single statement), so that statements can +# be sequenced together using the '+' operator. + +def return_(value = None): + """Create a return statement.""" + if value is not None: + return [['return', value]] + else: + return [['return']] + +def break_(): + """Create a break statement.""" + return ['break'] + +def continue_(): + """Create a continue statement.""" + return ['continue'] + +def simple_if(var_name, then_statements, else_statements = None): + """Create a statement of the form + + if (var_name > 0.0) { + + } else { + + } + + else_statements may be omitted. + """ + if else_statements is None: + else_statements = [] + check_sexp(then_statements) + check_sexp(else_statements) + return [['if', gt_zero(var_name), then_statements, else_statements]] + +def loop(statements): + """Create a loop containing the given statements as its loop + body. + """ + check_sexp(statements) + return [['loop', statements]] + +def declare_temp(var_type, var_name): + """Create a declaration of the form + + (declare (temporary) to the variable + . The assignment uses the mask (x). + """ + check_sexp(value) + return [['assign', ['x'], ['var_ref', var_name], value]] + +def complex_if(var_prefix, statements): + """Create a statement of the form + + if (a > 0.0) { + if (b > 0.0) { + + } + } + + This is useful in testing jump lowering, because if + ends in a jump, lower_jumps.cpp won't try to combine this + construct with the code that follows it, as it might do for a + simple if. + + All variables used in the if statement are prefixed with + var_prefix. This can be used to ensure uniqueness. + """ + check_sexp(statements) + return simple_if(var_prefix + 'a', simple_if(var_prefix + 'b', statements)) + +def declare_execute_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean execute_flag. + """ + return declare_temp('bool', 'execute_flag') + \ + assign_x('execute_flag', const_bool(True)) + +def declare_return_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean return_flag. + """ + return declare_temp('bool', 'return_flag') + \ + assign_x('return_flag', const_bool(False)) + +def declare_return_value(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary variable return_value. Assume that + return_value is a float. + """ + return declare_temp('float', 'return_value') + +def declare_break_flag(): + """Create the statements that lower_jumps.cpp uses to declare and + initialize the temporary boolean break_flag. + """ + return declare_temp('bool', 'break_flag') + \ + assign_x('break_flag', const_bool(False)) + +def lowered_return_simple(value = None): + """Create the statements that lower_jumps.cpp lowers a return + statement to, in situations where it does not need to clear the + execute flag. + """ + if value: + result = assign_x('return_value', value) + else: + result = [] + return result + assign_x('return_flag', const_bool(True)) + +def lowered_return(value = None): + """Create the statements that lower_jumps.cpp lowers a return + statement to, in situations where it needs to clear the execute + flag. + """ + return lowered_return_simple(value) + \ + assign_x('execute_flag', const_bool(False)) + +def lowered_continue(): + """Create the statement that lower_jumps.cpp lowers a continue + statement to. + """ + return assign_x('execute_flag', const_bool(False)) + +def lowered_break_simple(): + """Create the statement that lower_jumps.cpp lowers a break + statement to, in situations where it does not need to clear the + execute flag. + """ + return assign_x('break_flag', const_bool(True)) + +def lowered_break(): + """Create the statement that lower_jumps.cpp lowers a break + statement to, in situations where it needs to clear the execute + flag. + """ + return lowered_break_simple() + assign_x('execute_flag', const_bool(False)) + +def if_execute_flag(statements): + """Wrap statements in an if test so that they will only execute if + execute_flag is True. + """ + check_sexp(statements) + return [['if', ['var_ref', 'execute_flag'], statements, []]] + +def if_not_return_flag(statements): + """Wrap statements in an if test so that they will only execute if + return_flag is False. + """ + check_sexp(statements) + return [['if', ['var_ref', 'return_flag'], [], statements]] + +def final_return(): + """Create the return statement that lower_jumps.cpp places at the + end of a function when lowering returns. + """ + return [['return', ['var_ref', 'return_value']]] + +def final_break(): + """Create the conditional break statement that lower_jumps.cpp + places at the end of a function when lowering breaks. + """ + return [['if', ['var_ref', 'break_flag'], break_(), []]] + +def bash_quote(*args): + """Quote the arguments appropriately so that bash will understand + each argument as a single word. + """ + def quote_word(word): + for c in word: + if not (c.isalpha() or c.isdigit() or c in '@%_-+=:,./'): + break + else: + if not word: + return "''" + return word + return "'{0}'".format(word.replace("'", "'\"'\"'")) + return ' '.join(quote_word(word) for word in args) + +def create_test_case(doc_string, input_sexp, expected_sexp, test_name, + pull_out_jumps=False, lower_sub_return=False, + lower_main_return=False, lower_continue=False, + lower_break=False): + """Create a test case that verifies that do_lower_jumps transforms + the given code in the expected way. + """ + doc_lines = [line.strip() for line in doc_string.splitlines()] + doc_string = ''.join('# {0}\n'.format(line) for line in doc_lines if line != '') + check_sexp(input_sexp) + check_sexp(expected_sexp) + input_str = sexp_to_string(sort_decls(input_sexp)) + expected_output = sexp_to_string(sort_decls(expected_sexp)) + + optimization = ( + 'do_lower_jumps({0:d}, {1:d}, {2:d}, {3:d}, {4:d})'.format( + pull_out_jumps, lower_sub_return, lower_main_return, + lower_continue, lower_break)) + args = ['../../glsl_test', 'optpass', '--quiet', '--input-ir', optimization] + test_file = '{0}.opt_test'.format(test_name) + with open(test_file, 'w') as f: + f.write('#!/usr/bin/env bash\n#\n# This file was generated by create_test_cases.py.\n#\n') + f.write(doc_string) + f.write('{0} < "$test.out" 2>&1 + total=$((total+1)) + if $PYTHON2 $PYTHON_FLAGS $compare_ir "$test.expected" "$test.out" >/dev/null 2>&1; then + echo "PASS" + pass=$((pass+1)) + else + echo "FAIL" + $PYTHON2 $PYTHON_FLAGS $compare_ir "$test.expected" "$test.out" + fi +done + +echo "" +echo "$pass/$total tests returned correct results" +echo "" + +if [[ $pass == $total ]]; then + exit 0 +else + exit 1 +fi diff --git a/src/compiler/glsl/tests/sampler_types_test.cpp b/src/compiler/glsl/tests/sampler_types_test.cpp new file mode 100644 index 00000000000..04dd65e6e8d --- /dev/null +++ b/src/compiler/glsl/tests/sampler_types_test.cpp @@ -0,0 +1,100 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "ir.h" + +/** + * \file sampler_types_test.cpp + * + * Test that built-in sampler types have the right properties. + */ + +#define ARRAY EXPECT_TRUE(type->sampler_array); +#define NONARRAY EXPECT_FALSE(type->sampler_array); +#define SHADOW EXPECT_TRUE(type->sampler_shadow); +#define COLOR EXPECT_FALSE(type->sampler_shadow); + +#define T(TYPE, DIM, DATA_TYPE, ARR, SHAD, COMPS) \ +TEST(sampler_types, TYPE) \ +{ \ + const glsl_type *type = glsl_type::TYPE##_type; \ + EXPECT_EQ(GLSL_TYPE_SAMPLER, type->base_type); \ + EXPECT_EQ(DIM, type->sampler_dimensionality); \ + EXPECT_EQ(DATA_TYPE, type->sampler_type); \ + ARR; \ + SHAD; \ + EXPECT_EQ(COMPS, type->coordinate_components()); \ +} + +T( sampler1D, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 1) +T( sampler2D, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) +T( sampler3D, GLSL_SAMPLER_DIM_3D, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 3) +T( samplerCube, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 3) +T( sampler1DArray, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, ARRAY, COLOR, 2) +T( sampler2DArray, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, ARRAY, COLOR, 3) +T( samplerCubeArray, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, ARRAY, COLOR, 4) +T( sampler2DRect, GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) +T( samplerBuffer, GLSL_SAMPLER_DIM_BUF, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 1) +T( sampler2DMS, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) +T( sampler2DMSArray, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_FLOAT, ARRAY, COLOR, 3) +T(isampler1D, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_INT, NONARRAY, COLOR, 1) +T(isampler2D, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_INT, NONARRAY, COLOR, 2) +T(isampler3D, GLSL_SAMPLER_DIM_3D, GLSL_TYPE_INT, NONARRAY, COLOR, 3) +T(isamplerCube, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_INT, NONARRAY, COLOR, 3) +T(isampler1DArray, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_INT, ARRAY, COLOR, 2) +T(isampler2DArray, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_INT, ARRAY, COLOR, 3) +T(isamplerCubeArray, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_INT, ARRAY, COLOR, 4) +T(isampler2DRect, GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_INT, NONARRAY, COLOR, 2) +T(isamplerBuffer, GLSL_SAMPLER_DIM_BUF, GLSL_TYPE_INT, NONARRAY, COLOR, 1) +T(isampler2DMS, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_INT, NONARRAY, COLOR, 2) +T(isampler2DMSArray, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_INT, ARRAY, COLOR, 3) +T(usampler1D, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_UINT, NONARRAY, COLOR, 1) +T(usampler2D, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_UINT, NONARRAY, COLOR, 2) +T(usampler3D, GLSL_SAMPLER_DIM_3D, GLSL_TYPE_UINT, NONARRAY, COLOR, 3) +T(usamplerCube, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_UINT, NONARRAY, COLOR, 3) +T(usampler1DArray, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_UINT, ARRAY, COLOR, 2) +T(usampler2DArray, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_UINT, ARRAY, COLOR, 3) +T(usamplerCubeArray, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_UINT, ARRAY, COLOR, 4) +T(usampler2DRect, GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_UINT, NONARRAY, COLOR, 2) +T(usamplerBuffer, GLSL_SAMPLER_DIM_BUF, GLSL_TYPE_UINT, NONARRAY, COLOR, 1) +T(usampler2DMS, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_UINT, NONARRAY, COLOR, 2) +T(usampler2DMSArray, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_UINT, ARRAY, COLOR, 3) + +T(sampler1DShadow, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 1) +T(sampler2DShadow, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 2) +T(samplerCubeShadow, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 3) + +T(sampler1DArrayShadow, + GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, ARRAY, SHADOW, 2) +T(sampler2DArrayShadow, + GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, ARRAY, SHADOW, 3) +T(samplerCubeArrayShadow, + GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, ARRAY, SHADOW, 4) +T(sampler2DRectShadow, + GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 2) + +T(samplerExternalOES, + GLSL_SAMPLER_DIM_EXTERNAL, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) diff --git a/src/compiler/glsl/tests/set_uniform_initializer_tests.cpp b/src/compiler/glsl/tests/set_uniform_initializer_tests.cpp new file mode 100644 index 00000000000..0b1f66cb342 --- /dev/null +++ b/src/compiler/glsl/tests/set_uniform_initializer_tests.cpp @@ -0,0 +1,594 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "uniform_initializer_utils.h" + +namespace linker { +extern void +set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, + const char *name, const glsl_type *type, + ir_constant *val, unsigned int boolean_true); +} + +class set_uniform_initializer : public ::testing::Test { +public: + virtual void SetUp(); + virtual void TearDown(); + + /** + * Index of the uniform to be tested. + * + * All of the \c set_uniform_initializer tests create several slots for + * unifroms. All but one of the slots is fake. This field holds the index + * of the slot for the uniform being tested. + */ + unsigned actual_index; + + /** + * Name of the uniform to be tested. + */ + const char *name; + + /** + * Shader program used in the test. + */ + struct gl_shader_program *prog; + + /** + * Ralloc memory context used for all temporary allocations. + */ + void *mem_ctx; +}; + +void +set_uniform_initializer::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); + this->prog = rzalloc(NULL, struct gl_shader_program); + + /* Set default values used by the test cases. + */ + this->actual_index = 1; + this->name = "i"; +} + +void +set_uniform_initializer::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; + + ralloc_free(this->prog); + this->prog = NULL; +} + +/** + * Create some uniform storage for a program. + * + * \param prog Program to get some storage + * \param num_storage Total number of storage slots + * \param index_to_set Storage slot that will actually get a value + * \param name Name for the actual storage slot + * \param type Type for the elements of the actual storage slot + * \param array_size Size for the array of the actual storage slot. This + * should be zero for non-arrays. + */ +static unsigned +establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage, + unsigned index_to_set, const char *name, + const glsl_type *type, unsigned array_size) +{ + const unsigned elements = MAX2(1, array_size); + const unsigned data_components = elements * type->components(); + const unsigned total_components = MAX2(17, (data_components + + type->components())); + const unsigned red_zone_components = total_components - data_components; + + prog->UniformStorage = rzalloc_array(prog, struct gl_uniform_storage, + num_storage); + prog->NumUniformStorage = num_storage; + + prog->UniformStorage[index_to_set].name = (char *) name; + prog->UniformStorage[index_to_set].type = type; + prog->UniformStorage[index_to_set].array_elements = array_size; + prog->UniformStorage[index_to_set].initialized = false; + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + prog->UniformStorage[index_to_set].opaque[sh].index = ~0; + prog->UniformStorage[index_to_set].opaque[sh].active = false; + } + prog->UniformStorage[index_to_set].num_driver_storage = 0; + prog->UniformStorage[index_to_set].driver_storage = NULL; + prog->UniformStorage[index_to_set].storage = + rzalloc_array(prog, union gl_constant_value, total_components); + + fill_storage_array_with_sentinels(prog->UniformStorage[index_to_set].storage, + data_components, + red_zone_components); + + for (unsigned i = 0; i < num_storage; i++) { + if (i == index_to_set) + continue; + + prog->UniformStorage[i].name = (char *) "invalid slot"; + prog->UniformStorage[i].type = glsl_type::void_type; + prog->UniformStorage[i].array_elements = 0; + prog->UniformStorage[i].initialized = false; + for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { + prog->UniformStorage[i].opaque[sh].index = ~0; + prog->UniformStorage[i].opaque[sh].active = false; + } + prog->UniformStorage[i].num_driver_storage = 0; + prog->UniformStorage[i].driver_storage = NULL; + prog->UniformStorage[i].storage = NULL; + } + + return red_zone_components; +} + +/** + * Verify that the correct uniform is marked as having been initialized. + */ +static void +verify_initialization(struct gl_shader_program *prog, unsigned actual_index) +{ + for (unsigned i = 0; i < prog->NumUniformStorage; i++) { + if (i == actual_index) { + EXPECT_TRUE(prog->UniformStorage[actual_index].initialized); + } else { + EXPECT_FALSE(prog->UniformStorage[i].initialized); + } + } +} + +static void +non_array_test(void *mem_ctx, struct gl_shader_program *prog, + unsigned actual_index, const char *name, + enum glsl_base_type base_type, + unsigned columns, unsigned rows) +{ + const glsl_type *const type = + glsl_type::get_instance(base_type, rows, columns); + + unsigned red_zone_components = + establish_uniform_storage(prog, 3, actual_index, name, type, 0); + + ir_constant *val; + generate_data(mem_ctx, base_type, columns, rows, val); + + linker::set_uniform_initializer(mem_ctx, prog, name, type, val, 0xF00F); + + verify_initialization(prog, actual_index); + verify_data(prog->UniformStorage[actual_index].storage, 0, val, + red_zone_components, 0xF00F); +} + +TEST_F(set_uniform_initializer, int_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 1); +} + +TEST_F(set_uniform_initializer, ivec2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 2); +} + +TEST_F(set_uniform_initializer, ivec3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 3); +} + +TEST_F(set_uniform_initializer, ivec4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 4); +} + +TEST_F(set_uniform_initializer, uint_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 1); +} + +TEST_F(set_uniform_initializer, uvec2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 2); +} + +TEST_F(set_uniform_initializer, uvec3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 3); +} + +TEST_F(set_uniform_initializer, uvec4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 4); +} + +TEST_F(set_uniform_initializer, bool_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 1); +} + +TEST_F(set_uniform_initializer, bvec2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 2); +} + +TEST_F(set_uniform_initializer, bvec3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 3); +} + +TEST_F(set_uniform_initializer, bvec4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 4); +} + +TEST_F(set_uniform_initializer, float_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2); +} + +TEST_F(set_uniform_initializer, vec2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2); +} + +TEST_F(set_uniform_initializer, vec3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 3); +} + +TEST_F(set_uniform_initializer, vec4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 4); +} + +TEST_F(set_uniform_initializer, mat2x2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 2); +} + +TEST_F(set_uniform_initializer, mat2x3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 3); +} + +TEST_F(set_uniform_initializer, mat2x4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 4); +} + +TEST_F(set_uniform_initializer, mat3x2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 2); +} + +TEST_F(set_uniform_initializer, mat3x3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 3); +} + +TEST_F(set_uniform_initializer, mat3x4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 4); +} + +TEST_F(set_uniform_initializer, mat4x2_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 2); +} + +TEST_F(set_uniform_initializer, mat4x3_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 3); +} + +TEST_F(set_uniform_initializer, mat4x4_uniform) +{ + non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 4); +} + +static void +array_test(void *mem_ctx, struct gl_shader_program *prog, + unsigned actual_index, const char *name, + enum glsl_base_type base_type, + unsigned columns, unsigned rows, unsigned array_size, + unsigned excess_data_size) +{ + const glsl_type *const element_type = + glsl_type::get_instance(base_type, rows, columns); + + const unsigned red_zone_components = + establish_uniform_storage(prog, 3, actual_index, name, element_type, + array_size); + + /* The constant value generated may have more array elements than the + * uniform that it initializes. In the real compiler and linker this can + * happen when a uniform array is compacted because some of the tail + * elements are not used. In this case, the type of the uniform will be + * modified, but the initializer will not. + */ + ir_constant *val; + generate_array_data(mem_ctx, base_type, columns, rows, + array_size + excess_data_size, val); + + linker::set_uniform_initializer(mem_ctx, prog, name, element_type, val, + 0xF00F); + + verify_initialization(prog, actual_index); + verify_data(prog->UniformStorage[actual_index].storage, array_size, + val, red_zone_components, 0xF00F); +} + +TEST_F(set_uniform_initializer, int_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 1, 4, 0); +} + +TEST_F(set_uniform_initializer, ivec2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, ivec3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, ivec4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, uint_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 1, 4, 0); +} + +TEST_F(set_uniform_initializer, uvec2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, uvec3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, uvec4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, bool_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 1, 4, 0); +} + +TEST_F(set_uniform_initializer, bvec2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, bvec3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, bvec4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, float_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 1, 4, 0); +} + +TEST_F(set_uniform_initializer, vec2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, vec3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, vec4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, mat2x2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, mat2x3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, mat2x4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, mat3x2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, mat3x3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, mat3x4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, mat4x2_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 2, 4, 0); +} + +TEST_F(set_uniform_initializer, mat4x3_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 3, 4, 0); +} + +TEST_F(set_uniform_initializer, mat4x4_array_uniform) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 4, 4, 0); +} + +TEST_F(set_uniform_initializer, int_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 1, 4, 5); +} + +TEST_F(set_uniform_initializer, ivec2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, ivec3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, ivec4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, uint_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 1, 4, 5); +} + +TEST_F(set_uniform_initializer, uvec2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, uvec3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, uvec4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, bool_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 1, 4, 5); +} + +TEST_F(set_uniform_initializer, bvec2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, bvec3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, bvec4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, float_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 1, 4, 5); +} + +TEST_F(set_uniform_initializer, vec2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, vec3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, vec4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, mat2x2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, mat2x3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, mat2x4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, mat3x2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, mat3x3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, mat3x4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 4, 4, 5); +} + +TEST_F(set_uniform_initializer, mat4x2_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 2, 4, 5); +} + +TEST_F(set_uniform_initializer, mat4x3_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 3, 4, 5); +} + +TEST_F(set_uniform_initializer, mat4x4_array_uniform_excess_initializer) +{ + array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 4, 4, 5); +} diff --git a/src/compiler/glsl/tests/sexps.py b/src/compiler/glsl/tests/sexps.py new file mode 100644 index 00000000000..a714af8d236 --- /dev/null +++ b/src/compiler/glsl/tests/sexps.py @@ -0,0 +1,103 @@ +# coding=utf-8 +# +# Copyright © 2011 Intel Corporation +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# the rights to use, copy, modify, merge, publish, distribute, sublicense, +# and/or sell copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL +# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +# DEALINGS IN THE SOFTWARE. + +# This file contains helper functions for manipulating sexps in Python. +# +# We represent a sexp in Python using nested lists containing strings. +# So, for example, the sexp (constant float (1.000000)) is represented +# as ['constant', 'float', ['1.000000']]. + +import re + +def check_sexp(sexp): + """Verify that the argument is a proper sexp. + + That is, raise an exception if the argument is not a string or a + list, or if it contains anything that is not a string or a list at + any nesting level. + """ + if isinstance(sexp, list): + for s in sexp: + check_sexp(s) + elif not isinstance(sexp, basestring): + raise Exception('Not a sexp: {0!r}'.format(sexp)) + +def parse_sexp(sexp): + """Convert a string, of the form that would be output by mesa, + into a sexp represented as nested lists containing strings. + """ + sexp_token_regexp = re.compile( + '[a-zA-Z_]+(@[0-9]+)?|[0-9]+(\\.[0-9]+)?|[^ \n]') + stack = [[]] + for match in sexp_token_regexp.finditer(sexp): + token = match.group(0) + if token == '(': + stack.append([]) + elif token == ')': + if len(stack) == 1: + raise Exception('Unmatched )') + sexp = stack.pop() + stack[-1].append(sexp) + else: + stack[-1].append(token) + if len(stack) != 1: + raise Exception('Unmatched (') + if len(stack[0]) != 1: + raise Exception('Multiple sexps') + return stack[0][0] + +def sexp_to_string(sexp): + """Convert a sexp, represented as nested lists containing strings, + into a single string of the form parseable by mesa. + """ + if isinstance(sexp, basestring): + return sexp + assert isinstance(sexp, list) + result = '' + for s in sexp: + sub_result = sexp_to_string(s) + if result == '': + result = sub_result + elif '\n' not in result and '\n' not in sub_result and \ + len(result) + len(sub_result) + 1 <= 70: + result += ' ' + sub_result + else: + result += '\n' + sub_result + return '({0})'.format(result.replace('\n', '\n ')) + +def sort_decls(sexp): + """Sort all toplevel variable declarations in sexp. + + This is used to work around the fact that + ir_reader::read_instructions reorders declarations. + """ + assert isinstance(sexp, list) + decls = [] + other_code = [] + for s in sexp: + if isinstance(s, list) and len(s) >= 4 and s[0] == 'declare': + decls.append(s) + else: + other_code.append(s) + return sorted(decls) + other_code + diff --git a/src/compiler/glsl/tests/uniform_initializer_utils.cpp b/src/compiler/glsl/tests/uniform_initializer_utils.cpp new file mode 100644 index 00000000000..5006387036f --- /dev/null +++ b/src/compiler/glsl/tests/uniform_initializer_utils.cpp @@ -0,0 +1,255 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "uniform_initializer_utils.h" +#include + +void +fill_storage_array_with_sentinels(gl_constant_value *storage, + unsigned data_size, + unsigned red_zone_size) +{ + for (unsigned i = 0; i < data_size; i++) + storage[i].u = 0xDEADBEEF; + + for (unsigned i = 0; i < red_zone_size; i++) + storage[data_size + i].u = 0xBADDC0DE; +} + +/** + * Verfiy that markers past the end of the real uniform are unmodified + */ +static ::testing::AssertionResult +red_zone_is_intact(gl_constant_value *storage, + unsigned data_size, + unsigned red_zone_size) +{ + for (unsigned i = 0; i < red_zone_size; i++) { + const unsigned idx = data_size + i; + + if (storage[idx].u != 0xBADDC0DE) + return ::testing::AssertionFailure() + << "storage[" << idx << "].u = " << storage[idx].u + << ", exepected data values = " << data_size + << ", red-zone size = " << red_zone_size; + } + + return ::testing::AssertionSuccess(); +} + +static const int values[] = { + 2, 0, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53 +}; + +/** + * Generate a single data element. + * + * This is by both \c generate_data and \c generate_array_data to create the + * data. + */ +static void +generate_data_element(void *mem_ctx, const glsl_type *type, + ir_constant *&val, unsigned data_index_base) +{ + /* Set the initial data values for the generated constant. + */ + ir_constant_data data; + memset(&data, 0, sizeof(data)); + for (unsigned i = 0; i < type->components(); i++) { + const unsigned idx = (i + data_index_base) % ARRAY_SIZE(values); + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + data.i[i] = values[idx]; + break; + case GLSL_TYPE_FLOAT: + data.f[i] = float(values[idx]); + break; + case GLSL_TYPE_BOOL: + data.b[i] = bool(values[idx]); + break; + case GLSL_TYPE_DOUBLE: + data.d[i] = double(values[idx]); + break; + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_SUBROUTINE: + ASSERT_TRUE(false); + break; + } + } + + /* Generate and verify the constant. + */ + val = new(mem_ctx) ir_constant(type, &data); + + for (unsigned i = 0; i < type->components(); i++) { + switch (type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + ASSERT_EQ(data.i[i], val->value.i[i]); + break; + case GLSL_TYPE_FLOAT: + ASSERT_EQ(data.f[i], val->value.f[i]); + break; + case GLSL_TYPE_BOOL: + ASSERT_EQ(data.b[i], val->value.b[i]); + break; + case GLSL_TYPE_DOUBLE: + ASSERT_EQ(data.d[i], val->value.d[i]); + break; + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_SUBROUTINE: + ASSERT_TRUE(false); + break; + } + } +} + +void +generate_data(void *mem_ctx, enum glsl_base_type base_type, + unsigned columns, unsigned rows, + ir_constant *&val) +{ + /* Determine what the type of the generated constant should be. + */ + const glsl_type *const type = + glsl_type::get_instance(base_type, rows, columns); + ASSERT_FALSE(type->is_error()); + + generate_data_element(mem_ctx, type, val, 0); +} + +void +generate_array_data(void *mem_ctx, enum glsl_base_type base_type, + unsigned columns, unsigned rows, unsigned array_size, + ir_constant *&val) +{ + /* Determine what the type of the generated constant should be. + */ + const glsl_type *const element_type = + glsl_type::get_instance(base_type, rows, columns); + ASSERT_FALSE(element_type->is_error()); + + const glsl_type *const array_type = + glsl_type::get_array_instance(element_type, array_size); + ASSERT_FALSE(array_type->is_error()); + + /* Set the initial data values for the generated constant. + */ + exec_list values_for_array; + for (unsigned i = 0; i < array_size; i++) { + ir_constant *element; + + generate_data_element(mem_ctx, element_type, element, i); + values_for_array.push_tail(element); + } + + val = new(mem_ctx) ir_constant(array_type, &values_for_array); +} + +/** + * Verify that the data stored for the uniform matches the initializer + * + * \param storage Backing storage for the uniform + * \param storage_array_size Array size of the backing storage. This must be + * less than or equal to the array size of the type + * of \c val. If \c val is not an array, this must + * be zero. + * \param val Value of the initializer for the unifrom. + * \param red_zone + */ +void +verify_data(gl_constant_value *storage, unsigned storage_array_size, + ir_constant *val, unsigned red_zone_size, + unsigned int boolean_true) +{ + if (val->type->base_type == GLSL_TYPE_ARRAY) { + const glsl_type *const element_type = val->array_elements[0]->type; + + for (unsigned i = 0; i < storage_array_size; i++) { + verify_data(storage + (i * element_type->components()), 0, + val->array_elements[i], 0, boolean_true); + } + + const unsigned components = element_type->components(); + + if (red_zone_size > 0) { + EXPECT_TRUE(red_zone_is_intact(storage, + storage_array_size * components, + red_zone_size)); + } + } else { + ASSERT_EQ(0u, storage_array_size); + for (unsigned i = 0; i < val->type->components(); i++) { + switch (val->type->base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_SAMPLER: + case GLSL_TYPE_IMAGE: + EXPECT_EQ(val->value.i[i], storage[i].i); + break; + case GLSL_TYPE_FLOAT: + EXPECT_EQ(val->value.f[i], storage[i].f); + break; + case GLSL_TYPE_BOOL: + EXPECT_EQ(val->value.b[i] ? boolean_true : 0, storage[i].i); + break; + case GLSL_TYPE_DOUBLE: + EXPECT_EQ(val->value.d[i], *(double *)&storage[i*2].i); + break; + case GLSL_TYPE_ATOMIC_UINT: + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + case GLSL_TYPE_VOID: + case GLSL_TYPE_ERROR: + case GLSL_TYPE_INTERFACE: + case GLSL_TYPE_SUBROUTINE: + ASSERT_TRUE(false); + break; + } + } + + if (red_zone_size > 0) { + EXPECT_TRUE(red_zone_is_intact(storage, + val->type->components(), + red_zone_size)); + } + } +} diff --git a/src/compiler/glsl/tests/uniform_initializer_utils.h b/src/compiler/glsl/tests/uniform_initializer_utils.h new file mode 100644 index 00000000000..b4d0c10220f --- /dev/null +++ b/src/compiler/glsl/tests/uniform_initializer_utils.h @@ -0,0 +1,48 @@ +/* + * Copyright © 2012 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#pragma once + +#include "program/prog_parameter.h" +#include "ir.h" +#include "ir_uniform.h" + +extern void +fill_storage_array_with_sentinels(gl_constant_value *storage, + unsigned data_size, + unsigned red_zone_size); + +extern void +generate_data(void *mem_ctx, enum glsl_base_type base_type, + unsigned columns, unsigned rows, + ir_constant *&val); + +extern void +generate_array_data(void *mem_ctx, enum glsl_base_type base_type, + unsigned columns, unsigned rows, unsigned array_size, + ir_constant *&val); + +extern void +verify_data(gl_constant_value *storage, unsigned storage_array_size, + ir_constant *val, unsigned red_zone_size, + unsigned int boolean_true); diff --git a/src/compiler/glsl/tests/varyings_test.cpp b/src/compiler/glsl/tests/varyings_test.cpp new file mode 100644 index 00000000000..0c4e0a471b8 --- /dev/null +++ b/src/compiler/glsl/tests/varyings_test.cpp @@ -0,0 +1,349 @@ +/* + * Copyright © 2013 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ +#include +#include "main/compiler.h" +#include "main/mtypes.h" +#include "main/macros.h" +#include "util/ralloc.h" +#include "ir.h" +#include "program/hash_table.h" + +/** + * \file varyings_test.cpp + * + * Test various aspects of linking shader stage inputs and outputs. + */ + +namespace linker { +bool +populate_consumer_input_sets(void *mem_ctx, exec_list *ir, + hash_table *consumer_inputs, + hash_table *consumer_interface_inputs, + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX]); + +ir_variable * +get_matching_input(void *mem_ctx, + const ir_variable *output_var, + hash_table *consumer_inputs, + hash_table *consumer_interface_inputs, + ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX]); +} + +class link_varyings : public ::testing::Test { +public: + link_varyings(); + + virtual void SetUp(); + virtual void TearDown(); + + char *interface_field_name(const glsl_type *iface, unsigned field = 0) + { + return ralloc_asprintf(mem_ctx, + "%s.%s", + iface->name, + iface->fields.structure[field].name); + } + + void *mem_ctx; + exec_list ir; + hash_table *consumer_inputs; + hash_table *consumer_interface_inputs; + + const glsl_type *simple_interface; + ir_variable *junk[VARYING_SLOT_TESS_MAX]; +}; + +link_varyings::link_varyings() +{ + static const glsl_struct_field f[] = { + glsl_struct_field(glsl_type::vec(4), "v") + }; + + this->simple_interface = + glsl_type::get_interface_instance(f, + ARRAY_SIZE(f), + GLSL_INTERFACE_PACKING_STD140, + "simple_interface"); +} + +void +link_varyings::SetUp() +{ + this->mem_ctx = ralloc_context(NULL); + this->ir.make_empty(); + + this->consumer_inputs + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); + + this->consumer_interface_inputs + = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); +} + +void +link_varyings::TearDown() +{ + ralloc_free(this->mem_ctx); + this->mem_ctx = NULL; + + hash_table_dtor(this->consumer_inputs); + this->consumer_inputs = NULL; + hash_table_dtor(this->consumer_interface_inputs); + this->consumer_interface_inputs = NULL; +} + +/** + * Hash table callback function that counts the elements in the table + * + * \sa num_elements + */ +static void +ht_count_callback(const void *, void *, void *closure) +{ + unsigned int *counter = (unsigned int *) closure; + + (*counter)++; +} + +/** + * Helper function to count the number of elements in a hash table. + */ +static unsigned +num_elements(hash_table *ht) +{ + unsigned int counter = 0; + + hash_table_call_foreach(ht, ht_count_callback, (void *) &counter); + + return counter; +} + +/** + * Helper function to determine whether a hash table is empty. + */ +static bool +is_empty(hash_table *ht) +{ + return num_elements(ht) == 0; +} + +TEST_F(link_varyings, single_simple_input) +{ + ir_variable *const v = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + + ir.push_tail(v); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + EXPECT_EQ((void *) v, hash_table_find(consumer_inputs, "a")); + EXPECT_EQ(1u, num_elements(consumer_inputs)); + EXPECT_TRUE(is_empty(consumer_interface_inputs)); +} + +TEST_F(link_varyings, gl_ClipDistance) +{ + const glsl_type *const array_8_of_float = + glsl_type::get_array_instance(glsl_type::vec(1), 8); + + ir_variable *const clipdistance = + new(mem_ctx) ir_variable(array_8_of_float, + "gl_ClipDistance", + ir_var_shader_in); + + clipdistance->data.explicit_location = true; + clipdistance->data.location = VARYING_SLOT_CLIP_DIST0; + clipdistance->data.explicit_index = 0; + + ir.push_tail(clipdistance); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + EXPECT_EQ(clipdistance, junk[VARYING_SLOT_CLIP_DIST0]); + EXPECT_TRUE(is_empty(consumer_inputs)); + EXPECT_TRUE(is_empty(consumer_interface_inputs)); +} + +TEST_F(link_varyings, single_interface_input) +{ + ir_variable *const v = + new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, + simple_interface->fields.structure[0].name, + ir_var_shader_in); + + v->init_interface_type(simple_interface); + + ir.push_tail(v); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + char *const full_name = interface_field_name(simple_interface); + + EXPECT_EQ((void *) v, hash_table_find(consumer_interface_inputs, full_name)); + EXPECT_EQ(1u, num_elements(consumer_interface_inputs)); + EXPECT_TRUE(is_empty(consumer_inputs)); +} + +TEST_F(link_varyings, one_interface_and_one_simple_input) +{ + ir_variable *const v = + new(mem_ctx) ir_variable(glsl_type::vec(4), + "a", + ir_var_shader_in); + + + ir.push_tail(v); + + ir_variable *const iface = + new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, + simple_interface->fields.structure[0].name, + ir_var_shader_in); + + iface->init_interface_type(simple_interface); + + ir.push_tail(iface); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + char *const iface_field_name = interface_field_name(simple_interface); + + EXPECT_EQ((void *) iface, hash_table_find(consumer_interface_inputs, + iface_field_name)); + EXPECT_EQ(1u, num_elements(consumer_interface_inputs)); + + EXPECT_EQ((void *) v, hash_table_find(consumer_inputs, "a")); + EXPECT_EQ(1u, num_elements(consumer_inputs)); +} + +TEST_F(link_varyings, invalid_interface_input) +{ + ir_variable *const v = + new(mem_ctx) ir_variable(simple_interface, + "named_interface", + ir_var_shader_in); + + ASSERT_EQ(simple_interface, v->get_interface_type()); + + ir.push_tail(v); + + EXPECT_FALSE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); +} + +TEST_F(link_varyings, interface_field_doesnt_match_noninterface) +{ + char *const iface_field_name = interface_field_name(simple_interface); + + /* The input shader has a single input variable name "a.v" + */ + ir_variable *const in_v = + new(mem_ctx) ir_variable(glsl_type::vec(4), + iface_field_name, + ir_var_shader_in); + + ir.push_tail(in_v); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + /* Create an output variable, "v", that is part of an interface block named + * "a". They should not match. + */ + ir_variable *const out_v = + new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, + simple_interface->fields.structure[0].name, + ir_var_shader_in); + + out_v->init_interface_type(simple_interface); + + ir_variable *const match = + linker::get_matching_input(mem_ctx, + out_v, + consumer_inputs, + consumer_interface_inputs, + junk); + + EXPECT_EQ(NULL, match); +} + +TEST_F(link_varyings, interface_field_doesnt_match_noninterface_vice_versa) +{ + char *const iface_field_name = interface_field_name(simple_interface); + + /* In input shader has a single variable, "v", that is part of an interface + * block named "a". + */ + ir_variable *const in_v = + new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, + simple_interface->fields.structure[0].name, + ir_var_shader_in); + + in_v->init_interface_type(simple_interface); + + ir.push_tail(in_v); + + ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, + &ir, + consumer_inputs, + consumer_interface_inputs, + junk)); + + /* Create an output variable "a.v". They should not match. + */ + ir_variable *const out_v = + new(mem_ctx) ir_variable(glsl_type::vec(4), + iface_field_name, + ir_var_shader_out); + + ir_variable *const match = + linker::get_matching_input(mem_ctx, + out_v, + consumer_inputs, + consumer_interface_inputs, + junk); + + EXPECT_EQ(NULL, match); +} diff --git a/src/compiler/glsl_types.cpp b/src/compiler/glsl_types.cpp index 7e60e7c1098..17ebf07acbc 100644 --- a/src/compiler/glsl_types.cpp +++ b/src/compiler/glsl_types.cpp @@ -23,7 +23,7 @@ #include #include "main/macros.h" -#include "glsl/glsl_parser_extras.h" +#include "compiler/glsl/glsl_parser_extras.h" #include "glsl_types.h" #include "util/hash_table.h" diff --git a/src/compiler/nir/Makefile.sources b/src/compiler/nir/Makefile.sources new file mode 100644 index 00000000000..0755a100e65 --- /dev/null +++ b/src/compiler/nir/Makefile.sources @@ -0,0 +1,71 @@ +NIR_GENERATED_FILES = \ + nir_builder_opcodes.h \ + nir_constant_expressions.c \ + nir_opcodes.c \ + nir_opcodes.h \ + nir_opt_algebraic.c + +NIR_FILES = \ + glsl_to_nir.cpp \ + glsl_to_nir.h \ + nir.c \ + nir.h \ + nir_array.h \ + nir_builder.h \ + nir_clone.c \ + nir_constant_expressions.h \ + nir_control_flow.c \ + nir_control_flow.h \ + nir_control_flow_private.h \ + nir_dominance.c \ + nir_from_ssa.c \ + nir_gs_count_vertices.c \ + nir_intrinsics.c \ + nir_intrinsics.h \ + nir_instr_set.c \ + nir_instr_set.h \ + nir_liveness.c \ + nir_lower_alu_to_scalar.c \ + nir_lower_atomics.c \ + nir_lower_clip.c \ + nir_lower_global_vars_to_local.c \ + nir_lower_gs_intrinsics.c \ + nir_lower_load_const_to_scalar.c \ + nir_lower_locals_to_regs.c \ + nir_lower_idiv.c \ + nir_lower_io.c \ + nir_lower_outputs_to_temporaries.c \ + nir_lower_phis_to_scalar.c \ + nir_lower_samplers.c \ + nir_lower_system_values.c \ + nir_lower_tex.c \ + nir_lower_to_source_mods.c \ + nir_lower_two_sided_color.c \ + nir_lower_vars_to_ssa.c \ + nir_lower_var_copies.c \ + nir_lower_vec_to_movs.c \ + nir_metadata.c \ + nir_move_vec_src_uses_to_dest.c \ + nir_normalize_cubemap_coords.c \ + nir_opt_constant_folding.c \ + nir_opt_copy_propagate.c \ + nir_opt_cse.c \ + nir_opt_dce.c \ + nir_opt_dead_cf.c \ + nir_opt_gcm.c \ + nir_opt_global_to_local.c \ + nir_opt_peephole_select.c \ + nir_opt_remove_phis.c \ + nir_opt_undef.c \ + nir_print.c \ + nir_remove_dead_variables.c \ + nir_search.c \ + nir_search.h \ + nir_split_var_copies.c \ + nir_sweep.c \ + nir_to_ssa.c \ + nir_validate.c \ + nir_vla.h \ + nir_worklist.c \ + nir_worklist.h + diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp index 33b1f5c7b9e..4b76d234420 100644 --- a/src/compiler/nir/glsl_to_nir.cpp +++ b/src/compiler/nir/glsl_to_nir.cpp @@ -28,9 +28,9 @@ #include "glsl_to_nir.h" #include "nir_control_flow.h" #include "nir_builder.h" -#include "glsl/ir_visitor.h" -#include "glsl/ir_hierarchical_visitor.h" -#include "glsl/ir.h" +#include "compiler/glsl/ir_visitor.h" +#include "compiler/glsl/ir_hierarchical_visitor.h" +#include "compiler/glsl/ir.h" #include "main/imports.h" /* diff --git a/src/compiler/nir/glsl_to_nir.h b/src/compiler/nir/glsl_to_nir.h index 20d2a380a26..e3fe9b0246a 100644 --- a/src/compiler/nir/glsl_to_nir.h +++ b/src/compiler/nir/glsl_to_nir.h @@ -26,7 +26,7 @@ */ #include "nir.h" -#include "glsl/glsl_parser_extras.h" +#include "compiler/glsl/glsl_parser_extras.h" #ifdef __cplusplus extern "C" { diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 54e23eb4754..aec75fb930c 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -28,7 +28,7 @@ #pragma once #include "util/hash_table.h" -#include "glsl/list.h" +#include "compiler/glsl/list.h" #include "GL/gl.h" /* GLenum */ #include "util/list.h" #include "util/ralloc.h" diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c index 2cbc1b75348..1a4458d4f84 100644 --- a/src/compiler/nir/nir_lower_atomics.c +++ b/src/compiler/nir/nir_lower_atomics.c @@ -25,7 +25,7 @@ * */ -#include "glsl/ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" #include "nir.h" #include "main/config.h" #include diff --git a/src/compiler/nir/nir_lower_samplers.c b/src/compiler/nir/nir_lower_samplers.c index 9c912129f09..96e82914014 100644 --- a/src/compiler/nir/nir_lower_samplers.c +++ b/src/compiler/nir/nir_lower_samplers.c @@ -26,7 +26,7 @@ #include "nir.h" #include "nir_builder.h" #include "program/hash_table.h" -#include "glsl/ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" #include "main/compiler.h" #include "main/mtypes.h" diff --git a/src/compiler/nir_types.cpp b/src/compiler/nir_types.cpp index ddc43becf9a..a87dcd8dc6a 100644 --- a/src/compiler/nir_types.cpp +++ b/src/compiler/nir_types.cpp @@ -26,7 +26,7 @@ */ #include "nir_types.h" -#include "glsl/ir.h" +#include "compiler/glsl/ir.h" void glsl_print_type(const glsl_type *type, FILE *fp) diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c index 6649e403d08..3e7d69f73ed 100644 --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c @@ -26,7 +26,7 @@ #include "compiler/nir/nir.h" #include "compiler/nir/nir_control_flow.h" #include "compiler/nir/nir_builder.h" -#include "glsl/list.h" +#include "compiler/glsl/list.h" #include "compiler/shader_enums.h" #include "tgsi_to_nir.h" diff --git a/src/glsl/.gitignore b/src/glsl/.gitignore deleted file mode 100644 index dda423f83db..00000000000 --- a/src/glsl/.gitignore +++ /dev/null @@ -1,10 +0,0 @@ -glsl_compiler -glsl_lexer.cpp -glsl_parser.cpp -glsl_parser.h -glsl_parser.output -glsl_test -subtest-cr/ -subtest-lf/ -subtest-cr-lf/ -subtest-lf-cr/ diff --git a/src/glsl/Android.gen.mk b/src/glsl/Android.gen.mk deleted file mode 100644 index c5741b40bc5..00000000000 --- a/src/glsl/Android.gen.mk +++ /dev/null @@ -1,76 +0,0 @@ -# Mesa 3-D graphics library -# -# Copyright (C) 2010-2011 Chia-I Wu -# Copyright (C) 2010-2011 LunarG Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# included by glsl Android.mk for source generation - -ifeq ($(LOCAL_MODULE_CLASS),) -LOCAL_MODULE_CLASS := STATIC_LIBRARIES -endif - -intermediates := $(call local-generated-sources-dir) - -LOCAL_SRC_FILES := $(LOCAL_SRC_FILES) - -LOCAL_C_INCLUDES += \ - $(intermediates)/glcpp \ - $(MESA_TOP)/src/glsl/glcpp \ - -LOCAL_GENERATED_SOURCES += $(addprefix $(intermediates)/, \ - $(LIBGLCPP_GENERATED_FILES) \ - $(LIBGLSL_GENERATED_CXX_FILES)) - -define local-l-or-ll-to-c-or-cpp - @mkdir -p $(dir $@) - @echo "Mesa Lex: $(PRIVATE_MODULE) <= $<" - $(hide) $(LEX) --nounistd -o$@ $< -endef - -define glsl_local-y-to-c-and-h - @mkdir -p $(dir $@) - @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" - $(hide) $(YACC) -o $@ -p "glcpp_parser_" $< -endef - -define local-yy-to-cpp-and-h - @mkdir -p $(dir $@) - @echo "Mesa Yacc: $(PRIVATE_MODULE) <= $<" - $(hide) $(YACC) -p "_mesa_glsl_" -o $@ $< - touch $(@:$1=$(YACC_HEADER_SUFFIX)) - echo '#ifndef '$(@F:$1=_h) > $(@:$1=.h) - echo '#define '$(@F:$1=_h) >> $(@:$1=.h) - cat $(@:$1=$(YACC_HEADER_SUFFIX)) >> $(@:$1=.h) - echo '#endif' >> $(@:$1=.h) - rm -f $(@:$1=$(YACC_HEADER_SUFFIX)) -endef - -$(intermediates)/glsl_lexer.cpp: $(LOCAL_PATH)/glsl_lexer.ll - $(call local-l-or-ll-to-c-or-cpp) - -$(intermediates)/glsl_parser.cpp: $(LOCAL_PATH)/glsl_parser.yy - $(call local-yy-to-cpp-and-h,.cpp) - -$(intermediates)/glcpp/glcpp-lex.c: $(LOCAL_PATH)/glcpp/glcpp-lex.l - $(call local-l-or-ll-to-c-or-cpp) - -$(intermediates)/glcpp/glcpp-parse.c: $(LOCAL_PATH)/glcpp/glcpp-parse.y - $(call glsl_local-y-to-c-and-h) diff --git a/src/glsl/Android.mk b/src/glsl/Android.mk deleted file mode 100644 index 9cbb9a339a1..00000000000 --- a/src/glsl/Android.mk +++ /dev/null @@ -1,76 +0,0 @@ -# Mesa 3-D graphics library -# -# Copyright (C) 2010-2011 Chia-I Wu -# Copyright (C) 2010-2011 LunarG Inc. -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included -# in all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Android.mk for glsl - -LOCAL_PATH := $(call my-dir) - -include $(LOCAL_PATH)/Makefile.sources - -# --------------------------------------- -# Build libmesa_glsl -# --------------------------------------- - -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := \ - $(LIBGLCPP_FILES) \ - $(LIBGLSL_FILES) \ - $(NIR_FILES) - -LOCAL_C_INCLUDES := \ - $(MESA_TOP)/src/mapi \ - $(MESA_TOP)/src/mesa \ - $(MESA_TOP)/src/gallium/include \ - $(MESA_TOP)/src/gallium/auxiliary - -LOCAL_STATIC_LIBRARIES := libmesa_compiler - -LOCAL_MODULE := libmesa_glsl - -include $(LOCAL_PATH)/Android.gen.mk -include $(MESA_COMMON_MK) -include $(BUILD_STATIC_LIBRARY) - -# --------------------------------------- -# Build glsl_compiler -# --------------------------------------- - -include $(CLEAR_VARS) - -LOCAL_SRC_FILES := \ - $(GLSL_COMPILER_CXX_FILES) - -LOCAL_C_INCLUDES := \ - $(MESA_TOP)/src/mapi \ - $(MESA_TOP)/src/mesa \ - $(MESA_TOP)/src/gallium/include \ - $(MESA_TOP)/src/gallium/auxiliary - -LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_glsl_utils libmesa_util - -LOCAL_MODULE_TAGS := eng -LOCAL_MODULE := glsl_compiler - -include $(MESA_COMMON_MK) -include $(BUILD_EXECUTABLE) diff --git a/src/glsl/Makefile.am b/src/glsl/Makefile.am deleted file mode 100644 index 9954b812403..00000000000 --- a/src/glsl/Makefile.am +++ /dev/null @@ -1,228 +0,0 @@ -# Copyright © 2012 Jon TURNEY -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS -# IN THE SOFTWARE. - -AM_CPPFLAGS = \ - -I$(top_srcdir)/include \ - -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/mapi \ - -I$(top_srcdir)/src/mesa/ \ - -I$(top_srcdir)/src/gallium/include \ - -I$(top_srcdir)/src/gallium/auxiliary \ - -I$(top_srcdir)/src/glsl/glcpp \ - -I$(top_srcdir)/src/gtest/include \ - $(DEFINES) -AM_CFLAGS = \ - $(VISIBILITY_CFLAGS) \ - $(MSVC2013_COMPAT_CFLAGS) -AM_CXXFLAGS = \ - $(VISIBILITY_CXXFLAGS) \ - $(MSVC2013_COMPAT_CXXFLAGS) - -EXTRA_DIST = tests glcpp/tests README TODO glcpp/README \ - glsl_lexer.ll \ - glsl_parser.yy \ - glcpp/glcpp-lex.l \ - glcpp/glcpp-parse.y \ - SConscript - -include Makefile.sources - -TESTS = glcpp/tests/glcpp-test \ - glcpp/tests/glcpp-test-cr-lf \ - tests/blob-test \ - tests/general-ir-test \ - tests/optimization-test \ - tests/sampler-types-test \ - tests/uniform-initializer-test - -TESTS_ENVIRONMENT= \ - export PYTHON2=$(PYTHON2); \ - export PYTHON_FLAGS=$(PYTHON_FLAGS); - -noinst_LTLIBRARIES = libglsl.la libglcpp.la -check_PROGRAMS = \ - glcpp/glcpp \ - glsl_test \ - tests/blob-test \ - tests/general-ir-test \ - tests/sampler-types-test \ - tests/uniform-initializer-test - -noinst_PROGRAMS = glsl_compiler - -tests_blob_test_SOURCES = \ - tests/blob_test.c -tests_blob_test_LDADD = \ - $(top_builddir)/src/glsl/libglsl.la - -tests_general_ir_test_SOURCES = \ - standalone_scaffolding.cpp \ - tests/builtin_variable_test.cpp \ - tests/invalidate_locations_test.cpp \ - tests/general_ir_test.cpp \ - tests/varyings_test.cpp -tests_general_ir_test_CFLAGS = \ - $(PTHREAD_CFLAGS) -tests_general_ir_test_LDADD = \ - $(top_builddir)/src/gtest/libgtest.la \ - $(top_builddir)/src/glsl/libglsl.la \ - $(top_builddir)/src/libglsl_util.la \ - $(PTHREAD_LIBS) - -tests_uniform_initializer_test_SOURCES = \ - tests/copy_constant_to_storage_tests.cpp \ - tests/set_uniform_initializer_tests.cpp \ - tests/uniform_initializer_utils.cpp \ - tests/uniform_initializer_utils.h -tests_uniform_initializer_test_CFLAGS = \ - $(PTHREAD_CFLAGS) -tests_uniform_initializer_test_LDADD = \ - $(top_builddir)/src/gtest/libgtest.la \ - $(top_builddir)/src/glsl/libglsl.la \ - $(top_builddir)/src/libglsl_util.la \ - $(PTHREAD_LIBS) - -tests_sampler_types_test_SOURCES = \ - tests/sampler_types_test.cpp -tests_sampler_types_test_CFLAGS = \ - $(PTHREAD_CFLAGS) -tests_sampler_types_test_LDADD = \ - $(top_builddir)/src/gtest/libgtest.la \ - $(top_builddir)/src/glsl/libglsl.la \ - $(top_builddir)/src/libglsl_util.la \ - $(PTHREAD_LIBS) - -libglcpp_la_LIBADD = \ - $(top_builddir)/src/util/libmesautil.la -libglcpp_la_SOURCES = \ - glcpp/glcpp-lex.c \ - glcpp/glcpp-parse.c \ - glcpp/glcpp-parse.h \ - $(LIBGLCPP_FILES) - -glcpp_glcpp_SOURCES = \ - glcpp/glcpp.c -glcpp_glcpp_LDADD = \ - libglcpp.la \ - $(top_builddir)/src/libglsl_util.la \ - -lm - -libglsl_la_LIBADD = \ - $(top_builddir)/src/compiler/nir/libnir.la \ - libglcpp.la - -libglsl_la_SOURCES = \ - glsl_lexer.cpp \ - glsl_parser.cpp \ - glsl_parser.h \ - $(LIBGLSL_FILES) - - -glsl_compiler_SOURCES = \ - $(GLSL_COMPILER_CXX_FILES) - -glsl_compiler_LDADD = \ - libglsl.la \ - $(top_builddir)/src/libglsl_util.la \ - $(top_builddir)/src/util/libmesautil.la \ - $(PTHREAD_LIBS) - -glsl_test_SOURCES = \ - standalone_scaffolding.cpp \ - test.cpp \ - test_optpass.cpp \ - test_optpass.h - -glsl_test_LDADD = \ - libglsl.la \ - $(top_builddir)/src/libglsl_util.la \ - $(PTHREAD_LIBS) - -# We write our own rules for yacc and lex below. We'd rather use automake, -# but automake makes it especially difficult for a number of reasons: -# -# * < automake-1.12 generates .h files from .yy and .ypp files, but -# >=automake-1.12 generates .hh and .hpp files respectively. There's no -# good way of making a project that uses C++ yacc files compatible with -# both versions of automake. Strong work automake developers. -# -# * Since we're generating code from .l/.y files in a subdirectory (glcpp/) -# we'd like the resulting generated code to also go in glcpp/ for purposes -# of distribution. Automake gives no way to do this. -# -# * Since we're building multiple yacc parsers into one library (and via one -# Makefile) we have to use per-target YFLAGS. Using per-target YFLAGS causes -# automake to name the resulting generated code as _filename.c. -# Frankly, that's ugly and we don't want a libglcpp_glcpp_parser.h file. - -# In order to make build output print "LEX" and "YACC", we reproduce the -# automake variables below. - -AM_V_LEX = $(am__v_LEX_$(V)) -am__v_LEX_ = $(am__v_LEX_$(AM_DEFAULT_VERBOSITY)) -am__v_LEX_0 = @echo " LEX " $@; -am__v_LEX_1 = - -AM_V_YACC = $(am__v_YACC_$(V)) -am__v_YACC_ = $(am__v_YACC_$(AM_DEFAULT_VERBOSITY)) -am__v_YACC_0 = @echo " YACC " $@; -am__v_YACC_1 = - -MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D) -YACC_GEN = $(AM_V_YACC)$(YACC) $(YFLAGS) -LEX_GEN = $(AM_V_LEX)$(LEX) $(LFLAGS) - -glsl_parser.cpp glsl_parser.h: glsl_parser.yy - $(YACC_GEN) -o $@ -p "_mesa_glsl_" --defines=$(builddir)/glsl_parser.h $(srcdir)/glsl_parser.yy - -glsl_lexer.cpp: glsl_lexer.ll - $(LEX_GEN) -o $@ $(srcdir)/glsl_lexer.ll - -glcpp/glcpp-parse.c glcpp/glcpp-parse.h: glcpp/glcpp-parse.y - $(MKDIR_GEN) - $(YACC_GEN) -o $@ -p "glcpp_parser_" --defines=$(builddir)/glcpp/glcpp-parse.h $(srcdir)/glcpp/glcpp-parse.y - -glcpp/glcpp-lex.c: glcpp/glcpp-lex.l - $(MKDIR_GEN) - $(LEX_GEN) -o $@ $(srcdir)/glcpp/glcpp-lex.l - -# Only the parsers (specifically the header files generated at the same time) -# need to be in BUILT_SOURCES. Though if we list the parser headers YACC is -# called for the .c/.cpp file and the .h files. By listing the .c/.cpp files -# YACC is only executed once for each parser. The rest of the generated code -# will be created at the appropriate times according to standard automake -# dependency rules. -BUILT_SOURCES = \ - glsl_parser.cpp \ - glsl_lexer.cpp \ - glcpp/glcpp-parse.c \ - glcpp/glcpp-lex.c -CLEANFILES = \ - glcpp/glcpp-parse.h \ - glsl_parser.h \ - $(BUILT_SOURCES) - -clean-local: - $(RM) -r subtest-cr subtest-cr-lf subtest-lf subtest-lf-cr - -dist-hook: - $(RM) glcpp/tests/*.out - $(RM) glcpp/tests/subtest*/*.out diff --git a/src/glsl/Makefile.sources b/src/glsl/Makefile.sources deleted file mode 100644 index 08b40c5cc8f..00000000000 --- a/src/glsl/Makefile.sources +++ /dev/null @@ -1,222 +0,0 @@ -# shared source lists for Makefile, SConscript, and Android.mk - -# libglcpp - -LIBGLCPP_FILES = \ - glcpp/glcpp.h \ - glcpp/pp.c - -LIBGLCPP_GENERATED_FILES = \ - glcpp/glcpp-lex.c \ - glcpp/glcpp-parse.c - -NIR_GENERATED_FILES = \ - nir/nir_builder_opcodes.h \ - nir/nir_constant_expressions.c \ - nir/nir_opcodes.c \ - nir/nir_opcodes.h \ - nir/nir_opt_algebraic.c - -NIR_FILES = \ - nir/nir.c \ - nir/nir.h \ - nir/nir_array.h \ - nir/nir_builder.h \ - nir/nir_clone.c \ - nir/nir_constant_expressions.h \ - nir/nir_control_flow.c \ - nir/nir_control_flow.h \ - nir/nir_control_flow_private.h \ - nir/nir_dominance.c \ - nir/nir_from_ssa.c \ - nir/nir_gs_count_vertices.c \ - nir/nir_intrinsics.c \ - nir/nir_intrinsics.h \ - nir/nir_instr_set.c \ - nir/nir_instr_set.h \ - nir/nir_liveness.c \ - nir/nir_lower_alu_to_scalar.c \ - nir/nir_lower_atomics.c \ - nir/nir_lower_clip.c \ - nir/nir_lower_global_vars_to_local.c \ - nir/nir_lower_gs_intrinsics.c \ - nir/nir_lower_load_const_to_scalar.c \ - nir/nir_lower_locals_to_regs.c \ - nir/nir_lower_idiv.c \ - nir/nir_lower_io.c \ - nir/nir_lower_outputs_to_temporaries.c \ - nir/nir_lower_phis_to_scalar.c \ - nir/nir_lower_samplers.c \ - nir/nir_lower_system_values.c \ - nir/nir_lower_tex.c \ - nir/nir_lower_to_source_mods.c \ - nir/nir_lower_two_sided_color.c \ - nir/nir_lower_vars_to_ssa.c \ - nir/nir_lower_var_copies.c \ - nir/nir_lower_vec_to_movs.c \ - nir/nir_metadata.c \ - nir/nir_move_vec_src_uses_to_dest.c \ - nir/nir_normalize_cubemap_coords.c \ - nir/nir_opt_constant_folding.c \ - nir/nir_opt_copy_propagate.c \ - nir/nir_opt_cse.c \ - nir/nir_opt_dce.c \ - nir/nir_opt_dead_cf.c \ - nir/nir_opt_gcm.c \ - nir/nir_opt_global_to_local.c \ - nir/nir_opt_peephole_select.c \ - nir/nir_opt_remove_phis.c \ - nir/nir_opt_undef.c \ - nir/nir_print.c \ - nir/nir_remove_dead_variables.c \ - nir/nir_search.c \ - nir/nir_search.h \ - nir/nir_split_var_copies.c \ - nir/nir_sweep.c \ - nir/nir_to_ssa.c \ - nir/nir_validate.c \ - nir/nir_vla.h \ - nir/nir_worklist.c \ - nir/nir_worklist.h - -# libglsl - -LIBGLSL_FILES = \ - ast.h \ - ast_array_index.cpp \ - ast_expr.cpp \ - ast_function.cpp \ - ast_to_hir.cpp \ - ast_type.cpp \ - blob.c \ - blob.h \ - builtin_functions.cpp \ - builtin_types.cpp \ - builtin_variables.cpp \ - glsl_parser_extras.cpp \ - glsl_parser_extras.h \ - glsl_symbol_table.cpp \ - glsl_symbol_table.h \ - hir_field_selection.cpp \ - ir_basic_block.cpp \ - ir_basic_block.h \ - ir_builder.cpp \ - ir_builder.h \ - ir_clone.cpp \ - ir_constant_expression.cpp \ - ir.cpp \ - ir.h \ - ir_equals.cpp \ - ir_expression_flattening.cpp \ - ir_expression_flattening.h \ - ir_function_can_inline.cpp \ - ir_function_detect_recursion.cpp \ - ir_function_inlining.h \ - ir_function.cpp \ - ir_hierarchical_visitor.cpp \ - ir_hierarchical_visitor.h \ - ir_hv_accept.cpp \ - ir_import_prototypes.cpp \ - ir_optimization.h \ - ir_print_visitor.cpp \ - ir_print_visitor.h \ - ir_reader.cpp \ - ir_reader.h \ - ir_rvalue_visitor.cpp \ - ir_rvalue_visitor.h \ - ir_set_program_inouts.cpp \ - ir_uniform.h \ - ir_validate.cpp \ - ir_variable_refcount.cpp \ - ir_variable_refcount.h \ - ir_visitor.h \ - linker.cpp \ - linker.h \ - link_atomics.cpp \ - link_functions.cpp \ - link_interface_blocks.cpp \ - link_uniforms.cpp \ - link_uniform_initializers.cpp \ - link_uniform_block_active_visitor.cpp \ - link_uniform_block_active_visitor.h \ - link_uniform_blocks.cpp \ - link_varyings.cpp \ - link_varyings.h \ - list.h \ - loop_analysis.cpp \ - loop_analysis.h \ - loop_controls.cpp \ - loop_unroll.cpp \ - lower_buffer_access.cpp \ - lower_buffer_access.h \ - lower_clip_distance.cpp \ - lower_const_arrays_to_uniforms.cpp \ - lower_discard.cpp \ - lower_discard_flow.cpp \ - lower_if_to_cond_assign.cpp \ - lower_instructions.cpp \ - lower_jumps.cpp \ - lower_mat_op_to_vec.cpp \ - lower_noise.cpp \ - lower_offset_array.cpp \ - lower_packed_varyings.cpp \ - lower_named_interface_blocks.cpp \ - lower_packing_builtins.cpp \ - lower_subroutine.cpp \ - lower_tess_level.cpp \ - lower_texture_projection.cpp \ - lower_variable_index_to_cond_assign.cpp \ - lower_vec_index_to_cond_assign.cpp \ - lower_vec_index_to_swizzle.cpp \ - lower_vector.cpp \ - lower_vector_derefs.cpp \ - lower_vector_insert.cpp \ - lower_vertex_id.cpp \ - lower_output_reads.cpp \ - lower_shared_reference.cpp \ - lower_ubo_reference.cpp \ - opt_algebraic.cpp \ - opt_array_splitting.cpp \ - opt_conditional_discard.cpp \ - opt_constant_folding.cpp \ - opt_constant_propagation.cpp \ - opt_constant_variable.cpp \ - opt_copy_propagation.cpp \ - opt_copy_propagation_elements.cpp \ - opt_dead_builtin_variables.cpp \ - opt_dead_builtin_varyings.cpp \ - opt_dead_code.cpp \ - opt_dead_code_local.cpp \ - opt_dead_functions.cpp \ - opt_flatten_nested_if_blocks.cpp \ - opt_flip_matrices.cpp \ - opt_function_inlining.cpp \ - opt_if_simplification.cpp \ - opt_minmax.cpp \ - opt_noop_swizzle.cpp \ - opt_rebalance_tree.cpp \ - opt_redundant_jumps.cpp \ - opt_structure_splitting.cpp \ - opt_swizzle_swizzle.cpp \ - opt_tree_grafting.cpp \ - opt_vectorize.cpp \ - program.h \ - s_expression.cpp \ - s_expression.h - -# glsl to nir pass -GLSL_TO_NIR_FILES = \ - nir/glsl_to_nir.cpp \ - nir/glsl_to_nir.h - -# glsl_compiler - -GLSL_COMPILER_CXX_FILES = \ - standalone_scaffolding.cpp \ - standalone_scaffolding.h \ - main.cpp - -# libglsl generated sources -LIBGLSL_GENERATED_CXX_FILES = \ - glsl_lexer.cpp \ - glsl_parser.cpp diff --git a/src/glsl/README b/src/glsl/README deleted file mode 100644 index bfcf69f903a..00000000000 --- a/src/glsl/README +++ /dev/null @@ -1,228 +0,0 @@ -Welcome to Mesa's GLSL compiler. A brief overview of how things flow: - -1) lex and yacc-based preprocessor takes the incoming shader string -and produces a new string containing the preprocessed shader. This -takes care of things like #if, #ifdef, #define, and preprocessor macro -invocations. Note that #version, #extension, and some others are -passed straight through. See glcpp/* - -2) lex and yacc-based parser takes the preprocessed string and -generates the AST (abstract syntax tree). Almost no checking is -performed in this stage. See glsl_lexer.ll and glsl_parser.yy. - -3) The AST is converted to "HIR". This is the intermediate -representation of the compiler. Constructors are generated, function -calls are resolved to particular function signatures, and all the -semantic checking is performed. See ast_*.cpp for the conversion, and -ir.h for the IR structures. - -4) The driver (Mesa, or main.cpp for the standalone binary) performs -optimizations. These include copy propagation, dead code elimination, -constant folding, and others. Generally the driver will call -optimizations in a loop, as each may open up opportunities for other -optimizations to do additional work. See most files called ir_*.cpp - -5) linking is performed. This does checking to ensure that the -outputs of the vertex shader match the inputs of the fragment shader, -and assigns locations to uniforms, attributes, and varyings. See -linker.cpp. - -6) The driver may perform additional optimization at this point, as -for example dead code elimination previously couldn't remove functions -or global variable usage when we didn't know what other code would be -linked in. - -7) The driver performs code generation out of the IR, taking a linked -shader program and producing a compiled program for each stage. See -../mesa/program/ir_to_mesa.cpp for Mesa IR code generation. - -FAQ: - -Q: What is HIR versus IR versus LIR? - -A: The idea behind the naming was that ast_to_hir would produce a -high-level IR ("HIR"), with things like matrix operations, structure -assignments, etc., present. A series of lowering passes would occur -that do things like break matrix multiplication into a series of dot -products/MADs, make structure assignment be a series of assignment of -components, flatten if statements into conditional moves, and such, -producing a low level IR ("LIR"). - -However, it now appears that each driver will have different -requirements from a LIR. A 915-generation chipset wants all functions -inlined, all loops unrolled, all ifs flattened, no variable array -accesses, and matrix multiplication broken down. The Mesa IR backend -for swrast would like matrices and structure assignment broken down, -but it can support function calls and dynamic branching. A 965 vertex -shader IR backend could potentially even handle some matrix operations -without breaking them down, but the 965 fragment shader IR backend -would want to break to have (almost) all operations down channel-wise -and perform optimization on that. As a result, there's no single -low-level IR that will make everyone happy. So that usage has fallen -out of favor, and each driver will perform a series of lowering passes -to take the HIR down to whatever restrictions it wants to impose -before doing codegen. - -Q: How is the IR structured? - -A: The best way to get started seeing it would be to run the -standalone compiler against a shader: - -./glsl_compiler --dump-lir \ - ~/src/piglit/tests/shaders/glsl-orangebook-ch06-bump.frag - -So for example one of the ir_instructions in main() contains: - -(assign (constant bool (1)) (var_ref litColor) (expression vec3 * (var_ref Surf -aceColor) (var_ref __retval) ) ) - -Or more visually: - (assign) - / | \ - (var_ref) (expression *) (constant bool 1) - / / \ -(litColor) (var_ref) (var_ref) - / \ - (SurfaceColor) (__retval) - -which came from: - -litColor = SurfaceColor * max(dot(normDelta, LightDir), 0.0); - -(the max call is not represented in this expression tree, as it was a -function call that got inlined but not brought into this expression -tree) - -Each of those nodes is a subclass of ir_instruction. A particular -ir_instruction instance may only appear once in the whole IR tree with -the exception of ir_variables, which appear once as variable -declarations: - -(declare () vec3 normDelta) - -and multiple times as the targets of variable dereferences: -... -(assign (constant bool (1)) (var_ref __retval) (expression float dot - (var_ref normDelta) (var_ref LightDir) ) ) -... -(assign (constant bool (1)) (var_ref __retval) (expression vec3 - - (var_ref LightDir) (expression vec3 * (constant float (2.000000)) - (expression vec3 * (expression float dot (var_ref normDelta) (var_ref - LightDir) ) (var_ref normDelta) ) ) ) ) -... - -Each node has a type. Expressions may involve several different types: -(declare (uniform ) mat4 gl_ModelViewMatrix) -((assign (constant bool (1)) (var_ref constructor_tmp) (expression - vec4 * (var_ref gl_ModelViewMatrix) (var_ref gl_Vertex) ) ) - -An expression tree can be arbitrarily deep, and the compiler tries to -keep them structured like that so that things like algebraic -optimizations ((color * 1.0 == color) and ((mat1 * mat2) * vec == mat1 -* (mat2 * vec))) or recognizing operation patterns for code generation -(vec1 * vec2 + vec3 == mad(vec1, vec2, vec3)) are easier. This comes -at the expense of additional trickery in implementing some -optimizations like CSE where one must navigate an expression tree. - -Q: Why no SSA representation? - -A: Converting an IR tree to SSA form makes dead code elimination, -common subexpression elimination, and many other optimizations much -easier. However, in our primarily vector-based language, there's some -major questions as to how it would work. Do we do SSA on the scalar -or vector level? If we do it at the vector level, we're going to end -up with many different versions of the variable when encountering code -like: - -(assign (constant bool (1)) (swiz x (var_ref __retval) ) (var_ref a) ) -(assign (constant bool (1)) (swiz y (var_ref __retval) ) (var_ref b) ) -(assign (constant bool (1)) (swiz z (var_ref __retval) ) (var_ref c) ) - -If every masked update of a component relies on the previous value of -the variable, then we're probably going to be quite limited in our -dead code elimination wins, and recognizing common expressions may -just not happen. On the other hand, if we operate channel-wise, then -we'll be prone to optimizing the operation on one of the channels at -the expense of making its instruction flow different from the other -channels, and a vector-based GPU would end up with worse code than if -we didn't optimize operations on that channel! - -Once again, it appears that our optimization requirements are driven -significantly by the target architecture. For now, targeting the Mesa -IR backend, SSA does not appear to be that important to producing -excellent code, but we do expect to do some SSA-based optimizations -for the 965 fragment shader backend when that is developed. - -Q: How should I expand instructions that take multiple backend instructions? - -Sometimes you'll have to do the expansion in your code generation -- -see, for example, ir_to_mesa.cpp's handling of ir_unop_sqrt. However, -in many cases you'll want to do a pass over the IR to convert -non-native instructions to a series of native instructions. For -example, for the Mesa backend we have ir_div_to_mul_rcp.cpp because -Mesa IR (and many hardware backends) only have a reciprocal -instruction, not a divide. Implementing non-native instructions this -way gives the chance for constant folding to occur, so (a / 2.0) -becomes (a * 0.5) after codegen instead of (a * (1.0 / 2.0)) - -Q: How shoud I handle my special hardware instructions with respect to IR? - -Our current theory is that if multiple targets have an instruction for -some operation, then we should probably be able to represent that in -the IR. Generally this is in the form of an ir_{bin,un}op expression -type. For example, we initially implemented fract() using (a - -floor(a)), but both 945 and 965 have instructions to give that result, -and it would also simplify the implementation of mod(), so -ir_unop_fract was added. The following areas need updating to add a -new expression type: - -ir.h (new enum) -ir.cpp:operator_strs (used for ir_reader) -ir_constant_expression.cpp (you probably want to be able to constant fold) -ir_validate.cpp (check users have the right types) - -You may also need to update the backends if they will see the new expr type: - -../mesa/program/ir_to_mesa.cpp - -You can then use the new expression from builtins (if all backends -would rather see it), or scan the IR and convert to use your new -expression type (see ir_mod_to_floor, for example). - -Q: How is memory management handled in the compiler? - -The hierarchical memory allocator "talloc" developed for the Samba -project is used, so that things like optimization passes don't have to -worry about their garbage collection so much. It has a few nice -features, including low performance overhead and good debugging -support that's trivially available. - -Generally, each stage of the compile creates a talloc context and -allocates its memory out of that or children of it. At the end of the -stage, the pieces still live are stolen to a new context and the old -one freed, or the whole context is kept for use by the next stage. - -For IR transformations, a temporary context is used, then at the end -of all transformations, reparent_ir reparents all live nodes under the -shader's IR list, and the old context full of dead nodes is freed. -When developing a single IR transformation pass, this means that you -want to allocate instruction nodes out of the temporary context, so if -it becomes dead it doesn't live on as the child of a live node. At -the moment, optimization passes aren't passed that temporary context, -so they find it by calling talloc_parent() on a nearby IR node. The -talloc_parent() call is expensive, so many passes will cache the -result of the first talloc_parent(). Cleaning up all the optimization -passes to take a context argument and not call talloc_parent() is left -as an exercise. - -Q: What is the file naming convention in this directory? - -Initially, there really wasn't one. We have since adopted one: - - - Files that implement code lowering passes should be named lower_* - (e.g., lower_noise.cpp). - - Files that implement optimization passes should be named opt_*. - - Files that implement a class that is used throught the code should - take the name of that class (e.g., ir_hierarchical_visitor.cpp). - - Files that contain code not fitting in one of the previous - categories should have a sensible name (e.g., glsl_parser.yy). diff --git a/src/glsl/SConscript b/src/glsl/SConscript deleted file mode 100644 index ef82a9d317a..00000000000 --- a/src/glsl/SConscript +++ /dev/null @@ -1,122 +0,0 @@ -import common - -Import('*') - -from sys import executable as python_cmd - -env = env.Clone() - -env.MSVC2013Compat() - -env.Prepend(CPPPATH = [ - '#include', - '#src', - '#src/mapi', - '#src/mesa', - '#src/gallium/include', - '#src/gallium/auxiliary', - '#src/glsl', - '#src/glsl/glcpp', -]) - -env.Prepend(LIBS = [mesautil]) - -# Make glcpp-parse.h and glsl_parser.h reachable from the include path. -env.Append(CPPPATH = [Dir('.').abspath, Dir('glcpp').abspath]) - -glcpp_env = env.Clone() -glcpp_env.Append(YACCFLAGS = [ - '-d', - '-p', 'glcpp_parser_' -]) - -glsl_env = env.Clone() -glsl_env.Append(YACCFLAGS = [ - '--defines=%s' % File('glsl_parser.h').abspath, - '-p', '_mesa_glsl_', -]) - -# without this line scons will expect "glsl_parser.hpp" instead of -# "glsl_parser.h", causing glsl_parser.cpp to be regenerated every time -glsl_env['YACCHXXFILESUFFIX'] = '.h' - -glcpp_lexer = glcpp_env.CFile('glcpp/glcpp-lex.c', 'glcpp/glcpp-lex.l') -glcpp_parser = glcpp_env.CFile('glcpp/glcpp-parse.c', 'glcpp/glcpp-parse.y') -glsl_lexer = glsl_env.CXXFile('glsl_lexer.cpp', 'glsl_lexer.ll') -glsl_parser = glsl_env.CXXFile('glsl_parser.cpp', 'glsl_parser.yy') - -# common generated sources -glsl_sources = [ - glcpp_lexer, - glcpp_parser[0], - glsl_lexer, - glsl_parser[0], -] - -# parse Makefile.sources -source_lists = env.ParseSourceList('Makefile.sources') - -# add non-generated sources -for l in ('LIBGLCPP_FILES', 'LIBGLSL_FILES'): - glsl_sources += source_lists[l] - -if env['msvc']: - env.Prepend(CPPPATH = ['#/src/getopt']) - env.PrependUnique(LIBS = [getopt]) - -# Copy these files to avoid generation object files into src/mesa/program -env.Prepend(CPPPATH = ['#src/mesa/main']) -env.Command('imports.c', '#src/mesa/main/imports.c', Copy('$TARGET', '$SOURCE')) -# Copy these files to avoid generation object files into src/mesa/program -env.Prepend(CPPPATH = ['#src/mesa/program']) -env.Command('prog_hash_table.c', '#src/mesa/program/prog_hash_table.c', Copy('$TARGET', '$SOURCE')) -env.Command('symbol_table.c', '#src/mesa/program/symbol_table.c', Copy('$TARGET', '$SOURCE')) -env.Command('dummy_errors.c', '#src/mesa/program/dummy_errors.c', Copy('$TARGET', '$SOURCE')) - -compiler_objs = env.StaticObject(source_lists['GLSL_COMPILER_CXX_FILES']) - -mesa_objs = env.StaticObject([ - 'imports.c', - 'prog_hash_table.c', - 'symbol_table.c', - 'dummy_errors.c', -]) - -compiler_objs += mesa_objs - -glsl = env.ConvenienceLibrary( - target = 'glsl', - source = glsl_sources, -) - -# SCons builtin dependency scanner doesn't detect that glsl_lexer.ll depends on -# glsl_parser.h -env.Depends(glsl, glsl_parser) - -Export('glsl') - -# Skip building these programs as they will cause SCons error "Two environments -# with different actions were specified for the same target" -if env['crosscompile'] or env['embedded']: - Return() - -env = env.Clone() - -if env['platform'] == 'windows': - env.PrependUnique(LIBS = [ - 'user32', - ]) - -env.Prepend(LIBS = [compiler, glsl]) - -glsl_compiler = env.Program( - target = 'glsl_compiler', - source = compiler_objs, -) -env.Alias('glsl_compiler', glsl_compiler) - -glcpp = env.Program( - target = 'glcpp/glcpp', - source = ['glcpp/glcpp.c'] + mesa_objs, -) -env.Alias('glcpp', glcpp) diff --git a/src/glsl/TODO b/src/glsl/TODO deleted file mode 100644 index bd077a85678..00000000000 --- a/src/glsl/TODO +++ /dev/null @@ -1,12 +0,0 @@ -- Detect code paths in non-void functions that don't reach a return statement - -- Improve handling of constants and their initializers. Constant initializers - should never generate any code. This is trival for scalar constants. It is - also trivial for arrays, matrices, and vectors that are accessed with - constant index values. For others it is more complicated. Perhaps these - cases should be silently converted to uniforms? - -- Track source locations throughout the IR. There are currently several - places where we cannot emit line numbers for errors (and currently emit 0:0) - because we've "lost" the line number information. This is particularly - noticeable at link time. diff --git a/src/glsl/ast.h b/src/glsl/ast.h deleted file mode 100644 index 03df6c08b2b..00000000000 --- a/src/glsl/ast.h +++ /dev/null @@ -1,1204 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef AST_H -#define AST_H - -#include "list.h" -#include "glsl_parser_extras.h" - -struct _mesa_glsl_parse_state; - -struct YYLTYPE; - -/** - * \defgroup AST Abstract syntax tree node definitions - * - * An abstract syntax tree is generated by the parser. This is a fairly - * direct representation of the gramma derivation for the source program. - * No symantic checking is done during the generation of the AST. Only - * syntactic checking is done. Symantic checking is performed by a later - * stage that converts the AST to a more generic intermediate representation. - * - *@{ - */ -/** - * Base class of all abstract syntax tree nodes - */ -class ast_node { -public: - DECLARE_RALLOC_CXX_OPERATORS(ast_node); - - /** - * Print an AST node in something approximating the original GLSL code - */ - virtual void print(void) const; - - /** - * Convert the AST node to the high-level intermediate representation - */ - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - virtual bool has_sequence_subexpression() const; - - /** - * Retrieve the source location of an AST node - * - * This function is primarily used to get the source position of an AST node - * into a form that can be passed to \c _mesa_glsl_error. - * - * \sa _mesa_glsl_error, ast_node::set_location - */ - struct YYLTYPE get_location(void) const - { - struct YYLTYPE locp; - - locp.source = this->location.source; - locp.first_line = this->location.first_line; - locp.first_column = this->location.first_column; - locp.last_line = this->location.last_line; - locp.last_column = this->location.last_column; - - return locp; - } - - /** - * Set the source location of an AST node from a parser location - * - * \sa ast_node::get_location - */ - void set_location(const struct YYLTYPE &locp) - { - this->location.source = locp.source; - this->location.first_line = locp.first_line; - this->location.first_column = locp.first_column; - this->location.last_line = locp.last_line; - this->location.last_column = locp.last_column; - } - - /** - * Set the source location range of an AST node using two location nodes - * - * \sa ast_node::set_location - */ - void set_location_range(const struct YYLTYPE &begin, const struct YYLTYPE &end) - { - this->location.source = begin.source; - this->location.first_line = begin.first_line; - this->location.last_line = end.last_line; - this->location.first_column = begin.first_column; - this->location.last_column = end.last_column; - } - - /** - * Source location of the AST node. - */ - struct { - unsigned source; /**< GLSL source number. */ - unsigned first_line; /**< First line number within the source string. */ - unsigned first_column; /**< First column in the first line. */ - unsigned last_line; /**< Last line number within the source string. */ - unsigned last_column; /**< Last column in the last line. */ - } location; - - exec_node link; - -protected: - /** - * The only constructor is protected so that only derived class objects can - * be created. - */ - ast_node(void); -}; - - -/** - * Operators for AST expression nodes. - */ -enum ast_operators { - ast_assign, - ast_plus, /**< Unary + operator. */ - ast_neg, - ast_add, - ast_sub, - ast_mul, - ast_div, - ast_mod, - ast_lshift, - ast_rshift, - ast_less, - ast_greater, - ast_lequal, - ast_gequal, - ast_equal, - ast_nequal, - ast_bit_and, - ast_bit_xor, - ast_bit_or, - ast_bit_not, - ast_logic_and, - ast_logic_xor, - ast_logic_or, - ast_logic_not, - - ast_mul_assign, - ast_div_assign, - ast_mod_assign, - ast_add_assign, - ast_sub_assign, - ast_ls_assign, - ast_rs_assign, - ast_and_assign, - ast_xor_assign, - ast_or_assign, - - ast_conditional, - - ast_pre_inc, - ast_pre_dec, - ast_post_inc, - ast_post_dec, - ast_field_selection, - ast_array_index, - ast_unsized_array_dim, - - ast_function_call, - - ast_identifier, - ast_int_constant, - ast_uint_constant, - ast_float_constant, - ast_bool_constant, - ast_double_constant, - - ast_sequence, - ast_aggregate -}; - -/** - * Representation of any sort of expression. - */ -class ast_expression : public ast_node { -public: - ast_expression(int oper, ast_expression *, - ast_expression *, ast_expression *); - - ast_expression(const char *identifier) : - oper(ast_identifier) - { - subexpressions[0] = NULL; - subexpressions[1] = NULL; - subexpressions[2] = NULL; - primary_expression.identifier = identifier; - this->non_lvalue_description = NULL; - } - - static const char *operator_string(enum ast_operators op); - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - virtual void hir_no_rvalue(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - virtual bool has_sequence_subexpression() const; - - ir_rvalue *do_hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state, - bool needs_rvalue); - - virtual void print(void) const; - - enum ast_operators oper; - - ast_expression *subexpressions[3]; - - union { - const char *identifier; - int int_constant; - float float_constant; - unsigned uint_constant; - int bool_constant; - double double_constant; - } primary_expression; - - - /** - * List of expressions for an \c ast_sequence or parameters for an - * \c ast_function_call - */ - exec_list expressions; - - /** - * For things that can't be l-values, this describes what it is. - * - * This text is used by the code that generates IR for assignments to - * detect and emit useful messages for assignments to some things that - * can't be l-values. For example, pre- or post-incerement expressions. - * - * \note - * This pointer may be \c NULL. - */ - const char *non_lvalue_description; -}; - -class ast_expression_bin : public ast_expression { -public: - ast_expression_bin(int oper, ast_expression *, ast_expression *); - - virtual void print(void) const; -}; - -/** - * Subclass of expressions for function calls - */ -class ast_function_expression : public ast_expression { -public: - ast_function_expression(ast_expression *callee) - : ast_expression(ast_function_call, callee, - NULL, NULL), - cons(false) - { - /* empty */ - } - - ast_function_expression(class ast_type_specifier *type) - : ast_expression(ast_function_call, (ast_expression *) type, - NULL, NULL), - cons(true) - { - /* empty */ - } - - bool is_constructor() const - { - return cons; - } - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - virtual void hir_no_rvalue(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - virtual bool has_sequence_subexpression() const; - -private: - /** - * Is this function call actually a constructor? - */ - bool cons; - ir_rvalue * - handle_method(exec_list *instructions, - struct _mesa_glsl_parse_state *state); -}; - -class ast_subroutine_list : public ast_node -{ -public: - virtual void print(void) const; - exec_list declarations; -}; - -class ast_array_specifier : public ast_node { -public: - ast_array_specifier(const struct YYLTYPE &locp, ast_expression *dim) - { - set_location(locp); - array_dimensions.push_tail(&dim->link); - } - - void add_dimension(ast_expression *dim) - { - array_dimensions.push_tail(&dim->link); - } - - bool is_single_dimension() const - { - return this->array_dimensions.tail_pred->prev != NULL && - this->array_dimensions.tail_pred->prev->is_head_sentinel(); - } - - virtual void print(void) const; - - /* This list contains objects of type ast_node containing the - * array dimensions in outermost-to-innermost order. - */ - exec_list array_dimensions; -}; - -class ast_layout_expression : public ast_node { -public: - ast_layout_expression(const struct YYLTYPE &locp, ast_expression *expr) - { - set_location(locp); - layout_const_expressions.push_tail(&expr->link); - } - - bool process_qualifier_constant(struct _mesa_glsl_parse_state *state, - const char *qual_indentifier, - unsigned *value, bool can_be_zero); - - void merge_qualifier(ast_layout_expression *l_expr) - { - layout_const_expressions.append_list(&l_expr->layout_const_expressions); - } - - exec_list layout_const_expressions; -}; - -/** - * C-style aggregate initialization class - * - * Represents C-style initializers of vectors, matrices, arrays, and - * structures. E.g., vec3 pos = {1.0, 0.0, -1.0} is equivalent to - * vec3 pos = vec3(1.0, 0.0, -1.0). - * - * Specified in GLSL 4.20 and GL_ARB_shading_language_420pack. - * - * \sa _mesa_ast_set_aggregate_type - */ -class ast_aggregate_initializer : public ast_expression { -public: - ast_aggregate_initializer() - : ast_expression(ast_aggregate, NULL, NULL, NULL), - constructor_type(NULL) - { - /* empty */ - } - - /** - * glsl_type of the aggregate, which is inferred from the LHS of whatever - * the aggregate is being used to initialize. This can't be inferred at - * parse time (since the parser deals with ast_type_specifiers, not - * glsl_types), so the parser leaves it NULL. However, the ast-to-hir - * conversion code makes sure to fill it in with the appropriate type - * before hir() is called. - */ - const glsl_type *constructor_type; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - virtual void hir_no_rvalue(exec_list *instructions, - struct _mesa_glsl_parse_state *state); -}; - -/** - * Number of possible operators for an ast_expression - * - * This is done as a define instead of as an additional value in the enum so - * that the compiler won't generate spurious messages like "warning: - * enumeration value ‘ast_num_operators’ not handled in switch" - */ -#define AST_NUM_OPERATORS (ast_sequence + 1) - - -class ast_compound_statement : public ast_node { -public: - ast_compound_statement(int new_scope, ast_node *statements); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - int new_scope; - exec_list statements; -}; - -class ast_declaration : public ast_node { -public: - ast_declaration(const char *identifier, - ast_array_specifier *array_specifier, - ast_expression *initializer); - virtual void print(void) const; - - const char *identifier; - - ast_array_specifier *array_specifier; - - ast_expression *initializer; -}; - - -enum { - ast_precision_none = 0, /**< Absence of precision qualifier. */ - ast_precision_high, - ast_precision_medium, - ast_precision_low -}; - -struct ast_type_qualifier { - DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier); - - union { - struct { - unsigned invariant:1; - unsigned precise:1; - unsigned constant:1; - unsigned attribute:1; - unsigned varying:1; - unsigned in:1; - unsigned out:1; - unsigned centroid:1; - unsigned sample:1; - unsigned patch:1; - unsigned uniform:1; - unsigned buffer:1; - unsigned shared_storage:1; - unsigned smooth:1; - unsigned flat:1; - unsigned noperspective:1; - - /** \name Layout qualifiers for GL_ARB_fragment_coord_conventions */ - /*@{*/ - unsigned origin_upper_left:1; - unsigned pixel_center_integer:1; - /*@}*/ - - /** - * Flag set if GL_ARB_explicit_attrib_location "location" layout - * qualifier is used. - */ - unsigned explicit_location:1; - /** - * Flag set if GL_ARB_explicit_attrib_location "index" layout - * qualifier is used. - */ - unsigned explicit_index:1; - - /** - * Flag set if GL_ARB_shading_language_420pack "binding" layout - * qualifier is used. - */ - unsigned explicit_binding:1; - - /** - * Flag set if GL_ARB_shader_atomic counter "offset" layout - * qualifier is used. - */ - unsigned explicit_offset:1; - - /** \name Layout qualifiers for GL_AMD_conservative_depth */ - /** \{ */ - unsigned depth_any:1; - unsigned depth_greater:1; - unsigned depth_less:1; - unsigned depth_unchanged:1; - /** \} */ - - /** \name Layout qualifiers for GL_ARB_uniform_buffer_object */ - /** \{ */ - unsigned std140:1; - unsigned std430:1; - unsigned shared:1; - unsigned packed:1; - unsigned column_major:1; - unsigned row_major:1; - /** \} */ - - /** \name Layout qualifiers for GLSL 1.50 geometry shaders */ - /** \{ */ - unsigned prim_type:1; - unsigned max_vertices:1; - /** \} */ - - /** - * local_size_{x,y,z} flags for compute shaders. Bit 0 represents - * local_size_x, and so on. - */ - unsigned local_size:3; - - /** \name Layout and memory qualifiers for ARB_shader_image_load_store. */ - /** \{ */ - unsigned early_fragment_tests:1; - unsigned explicit_image_format:1; - unsigned coherent:1; - unsigned _volatile:1; - unsigned restrict_flag:1; - unsigned read_only:1; /**< "readonly" qualifier. */ - unsigned write_only:1; /**< "writeonly" qualifier. */ - /** \} */ - - /** \name Layout qualifiers for GL_ARB_gpu_shader5 */ - /** \{ */ - unsigned invocations:1; - unsigned stream:1; /**< Has stream value assigned */ - unsigned explicit_stream:1; /**< stream value assigned explicitly by shader code */ - /** \} */ - - /** \name Layout qualifiers for GL_ARB_tessellation_shader */ - /** \{ */ - /* tess eval input layout */ - /* gs prim_type reused for primitive mode */ - unsigned vertex_spacing:1; - unsigned ordering:1; - unsigned point_mode:1; - /* tess control output layout */ - unsigned vertices:1; - /** \} */ - - /** \name Qualifiers for GL_ARB_shader_subroutine */ - /** \{ */ - unsigned subroutine:1; /**< Is this marked 'subroutine' */ - unsigned subroutine_def:1; /**< Is this marked 'subroutine' with a list of types */ - /** \} */ - } - /** \brief Set of flags, accessed by name. */ - q; - - /** \brief Set of flags, accessed as a bitmask. */ - uint64_t i; - } flags; - - /** Precision of the type (highp/medium/lowp). */ - unsigned precision:2; - - /** Geometry shader invocations for GL_ARB_gpu_shader5. */ - ast_layout_expression *invocations; - - /** - * Location specified via GL_ARB_explicit_attrib_location layout - * - * \note - * This field is only valid if \c explicit_location is set. - */ - ast_expression *location; - /** - * Index specified via GL_ARB_explicit_attrib_location layout - * - * \note - * This field is only valid if \c explicit_index is set. - */ - ast_expression *index; - - /** Maximum output vertices in GLSL 1.50 geometry shaders. */ - ast_layout_expression *max_vertices; - - /** Stream in GLSL 1.50 geometry shaders. */ - ast_expression *stream; - - /** - * Input or output primitive type in GLSL 1.50 geometry shaders - * and tessellation shaders. - */ - GLenum prim_type; - - /** - * Binding specified via GL_ARB_shading_language_420pack's "binding" keyword. - * - * \note - * This field is only valid if \c explicit_binding is set. - */ - ast_expression *binding; - - /** - * Offset specified via GL_ARB_shader_atomic_counter's "offset" - * keyword. - * - * \note - * This field is only valid if \c explicit_offset is set. - */ - ast_expression *offset; - - /** - * Local size specified via GL_ARB_compute_shader's "local_size_{x,y,z}" - * layout qualifier. Element i of this array is only valid if - * flags.q.local_size & (1 << i) is set. - */ - ast_layout_expression *local_size[3]; - - /** Tessellation evaluation shader: vertex spacing (equal, fractional even/odd) */ - GLenum vertex_spacing; - - /** Tessellation evaluation shader: vertex ordering (CW or CCW) */ - GLenum ordering; - - /** Tessellation evaluation shader: point mode */ - bool point_mode; - - /** Tessellation control shader: number of output vertices */ - ast_layout_expression *vertices; - - /** - * Image format specified with an ARB_shader_image_load_store - * layout qualifier. - * - * \note - * This field is only valid if \c explicit_image_format is set. - */ - GLenum image_format; - - /** - * Base type of the data read from or written to this image. Only - * the following enumerants are allowed: GLSL_TYPE_UINT, - * GLSL_TYPE_INT, GLSL_TYPE_FLOAT. - * - * \note - * This field is only valid if \c explicit_image_format is set. - */ - glsl_base_type image_base_type; - - /** Flag to know if this represents a default value for a qualifier */ - bool is_default_qualifier; - - /** - * Return true if and only if an interpolation qualifier is present. - */ - bool has_interpolation() const; - - /** - * Return whether a layout qualifier is present. - */ - bool has_layout() const; - - /** - * Return whether a storage qualifier is present. - */ - bool has_storage() const; - - /** - * Return whether an auxiliary storage qualifier is present. - */ - bool has_auxiliary_storage() const; - - /** - * \brief Return string representation of interpolation qualifier. - * - * If an interpolation qualifier is present, then return that qualifier's - * string representation. Otherwise, return null. For example, if the - * noperspective bit is set, then this returns "noperspective". - * - * If multiple interpolation qualifiers are somehow present, then the - * returned string is undefined but not null. - */ - const char *interpolation_string() const; - - bool merge_qualifier(YYLTYPE *loc, - _mesa_glsl_parse_state *state, - const ast_type_qualifier &q, - bool is_single_layout_merge); - - bool merge_out_qualifier(YYLTYPE *loc, - _mesa_glsl_parse_state *state, - const ast_type_qualifier &q, - ast_node* &node, bool create_node); - - bool merge_in_qualifier(YYLTYPE *loc, - _mesa_glsl_parse_state *state, - const ast_type_qualifier &q, - ast_node* &node, bool create_node); - - ast_subroutine_list *subroutine_list; -}; - -class ast_declarator_list; - -class ast_struct_specifier : public ast_node { -public: - ast_struct_specifier(const char *identifier, - ast_declarator_list *declarator_list); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - const char *name; - ast_type_qualifier *layout; - /* List of ast_declarator_list * */ - exec_list declarations; - bool is_declaration; -}; - - - -class ast_type_specifier : public ast_node { -public: - /** Construct a type specifier from a type name */ - ast_type_specifier(const char *name) - : type_name(name), structure(NULL), array_specifier(NULL), - default_precision(ast_precision_none) - { - /* empty */ - } - - /** Construct a type specifier from a structure definition */ - ast_type_specifier(ast_struct_specifier *s) - : type_name(s->name), structure(s), array_specifier(NULL), - default_precision(ast_precision_none) - { - /* empty */ - } - - const struct glsl_type *glsl_type(const char **name, - struct _mesa_glsl_parse_state *state) - const; - - virtual void print(void) const; - - ir_rvalue *hir(exec_list *, struct _mesa_glsl_parse_state *); - - const char *type_name; - ast_struct_specifier *structure; - - ast_array_specifier *array_specifier; - - /** For precision statements, this is the given precision; otherwise none. */ - unsigned default_precision:2; -}; - - -class ast_fully_specified_type : public ast_node { -public: - virtual void print(void) const; - bool has_qualifiers(_mesa_glsl_parse_state *state) const; - - ast_fully_specified_type() : qualifier(), specifier(NULL) - { - } - - const struct glsl_type *glsl_type(const char **name, - struct _mesa_glsl_parse_state *state) - const; - - ast_type_qualifier qualifier; - ast_type_specifier *specifier; -}; - - -class ast_declarator_list : public ast_node { -public: - ast_declarator_list(ast_fully_specified_type *); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_fully_specified_type *type; - /** List of 'ast_declaration *' */ - exec_list declarations; - - /** - * Flags for redeclarations. In these cases, no type is specified, to - * `type` is allowed to be NULL. In all other cases, this would be an error. - */ - int invariant; /** < `invariant` redeclaration */ - int precise; /** < `precise` redeclaration */ -}; - - -class ast_parameter_declarator : public ast_node { -public: - ast_parameter_declarator() : - type(NULL), - identifier(NULL), - array_specifier(NULL), - formal_parameter(false), - is_void(false) - { - /* empty */ - } - - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_fully_specified_type *type; - const char *identifier; - ast_array_specifier *array_specifier; - - static void parameters_to_hir(exec_list *ast_parameters, - bool formal, exec_list *ir_parameters, - struct _mesa_glsl_parse_state *state); - -private: - /** Is this parameter declaration part of a formal parameter list? */ - bool formal_parameter; - - /** - * Is this parameter 'void' type? - * - * This field is set by \c ::hir. - */ - bool is_void; -}; - - -class ast_function : public ast_node { -public: - ast_function(void); - - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_fully_specified_type *return_type; - const char *identifier; - - exec_list parameters; - -private: - /** - * Is this prototype part of the function definition? - * - * Used by ast_function_definition::hir to process the parameters, etc. - * of the function. - * - * \sa ::hir - */ - bool is_definition; - - /** - * Function signature corresponding to this function prototype instance - * - * Used by ast_function_definition::hir to process the parameters, etc. - * of the function. - * - * \sa ::hir - */ - class ir_function_signature *signature; - - friend class ast_function_definition; -}; - - -class ast_expression_statement : public ast_node { -public: - ast_expression_statement(ast_expression *); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_expression *expression; -}; - - -class ast_case_label : public ast_node { -public: - ast_case_label(ast_expression *test_value); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - /** - * An test value of NULL means 'default'. - */ - ast_expression *test_value; -}; - - -class ast_case_label_list : public ast_node { -public: - ast_case_label_list(void); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - /** - * A list of case labels. - */ - exec_list labels; -}; - - -class ast_case_statement : public ast_node { -public: - ast_case_statement(ast_case_label_list *labels); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_case_label_list *labels; - - /** - * A list of statements. - */ - exec_list stmts; -}; - - -class ast_case_statement_list : public ast_node { -public: - ast_case_statement_list(void); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - /** - * A list of cases. - */ - exec_list cases; -}; - - -class ast_switch_body : public ast_node { -public: - ast_switch_body(ast_case_statement_list *stmts); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_case_statement_list *stmts; -}; - - -class ast_selection_statement : public ast_node { -public: - ast_selection_statement(ast_expression *condition, - ast_node *then_statement, - ast_node *else_statement); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_expression *condition; - ast_node *then_statement; - ast_node *else_statement; -}; - - -class ast_switch_statement : public ast_node { -public: - ast_switch_statement(ast_expression *test_expression, - ast_node *body); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_expression *test_expression; - ast_node *body; - -protected: - void test_to_hir(exec_list *, struct _mesa_glsl_parse_state *); -}; - -class ast_iteration_statement : public ast_node { -public: - ast_iteration_statement(int mode, ast_node *init, ast_node *condition, - ast_expression *rest_expression, ast_node *body); - - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *, struct _mesa_glsl_parse_state *); - - enum ast_iteration_modes { - ast_for, - ast_while, - ast_do_while - } mode; - - - ast_node *init_statement; - ast_node *condition; - ast_expression *rest_expression; - - ast_node *body; - - /** - * Generate IR from the condition of a loop - * - * This is factored out of ::hir because some loops have the condition - * test at the top (for and while), and others have it at the end (do-while). - */ - void condition_to_hir(exec_list *, struct _mesa_glsl_parse_state *); -}; - - -class ast_jump_statement : public ast_node { -public: - ast_jump_statement(int mode, ast_expression *return_value); - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - enum ast_jump_modes { - ast_continue, - ast_break, - ast_return, - ast_discard - } mode; - - ast_expression *opt_return_value; -}; - - -class ast_function_definition : public ast_node { -public: - ast_function_definition() : prototype(NULL), body(NULL) - { - } - - virtual void print(void) const; - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_function *prototype; - ast_compound_statement *body; -}; - -class ast_interface_block : public ast_node { -public: - ast_interface_block(ast_type_qualifier layout, - const char *instance_name, - ast_array_specifier *array_specifier) - : layout(layout), block_name(NULL), instance_name(instance_name), - array_specifier(array_specifier) - { - } - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - - ast_type_qualifier layout; - const char *block_name; - - /** - * Declared name of the block instance, if specified. - * - * If the block does not have an instance name, this field will be - * \c NULL. - */ - const char *instance_name; - - /** List of ast_declarator_list * */ - exec_list declarations; - - /** - * Declared array size of the block instance - * - * If the block is not declared as an array or if the block instance array - * is unsized, this field will be \c NULL. - */ - ast_array_specifier *array_specifier; -}; - - -/** - * AST node representing a declaration of the output layout for tessellation - * control shaders. - */ -class ast_tcs_output_layout : public ast_node -{ -public: - ast_tcs_output_layout(const struct YYLTYPE &locp) - { - set_location(locp); - } - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); -}; - - -/** - * AST node representing a declaration of the input layout for geometry - * shaders. - */ -class ast_gs_input_layout : public ast_node -{ -public: - ast_gs_input_layout(const struct YYLTYPE &locp, GLenum prim_type) - : prim_type(prim_type) - { - set_location(locp); - } - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - -private: - const GLenum prim_type; -}; - - -/** - * AST node representing a decalaration of the input layout for compute - * shaders. - */ -class ast_cs_input_layout : public ast_node -{ -public: - ast_cs_input_layout(const struct YYLTYPE &locp, - ast_layout_expression *const *local_size) - { - for (int i = 0; i < 3; i++) { - this->local_size[i] = local_size[i]; - } - set_location(locp); - } - - virtual ir_rvalue *hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - -private: - ast_layout_expression *local_size[3]; -}; - -/*@}*/ - -extern void -_mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state); - -extern ir_rvalue * -_mesa_ast_field_selection_to_hir(const ast_expression *expr, - exec_list *instructions, - struct _mesa_glsl_parse_state *state); - -extern ir_rvalue * -_mesa_ast_array_index_to_hir(void *mem_ctx, - struct _mesa_glsl_parse_state *state, - ir_rvalue *array, ir_rvalue *idx, - YYLTYPE &loc, YYLTYPE &idx_loc); - -extern void -_mesa_ast_set_aggregate_type(const glsl_type *type, - ast_expression *expr); - -void -emit_function(_mesa_glsl_parse_state *state, ir_function *f); - -extern void -check_builtin_array_max_size(const char *name, unsigned size, - YYLTYPE loc, struct _mesa_glsl_parse_state *state); - -extern void _mesa_ast_process_interface_block(YYLTYPE *locp, - _mesa_glsl_parse_state *state, - ast_interface_block *const block, - const struct ast_type_qualifier &q); - -#endif /* AST_H */ diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp deleted file mode 100644 index f5baeb9ea32..00000000000 --- a/src/glsl/ast_array_index.cpp +++ /dev/null @@ -1,333 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ast.h" -#include "compiler/glsl_types.h" -#include "ir.h" - -void -ast_array_specifier::print(void) const -{ - foreach_list_typed (ast_node, array_dimension, link, &this->array_dimensions) { - printf("[ "); - if (((ast_expression*)array_dimension)->oper != ast_unsized_array_dim) - array_dimension->print(); - printf("] "); - } -} - -/** - * If \c ir is a reference to an array for which we are tracking the max array - * element accessed, track that the given element has been accessed. - * Otherwise do nothing. - * - * This function also checks whether the array is a built-in array whose - * maximum size is too small to accommodate the given index, and if so uses - * loc and state to report the error. - */ -static void -update_max_array_access(ir_rvalue *ir, int idx, YYLTYPE *loc, - struct _mesa_glsl_parse_state *state) -{ - if (ir_dereference_variable *deref_var = ir->as_dereference_variable()) { - ir_variable *var = deref_var->var; - if (idx > (int)var->data.max_array_access) { - var->data.max_array_access = idx; - - /* Check whether this access will, as a side effect, implicitly cause - * the size of a built-in array to be too large. - */ - check_builtin_array_max_size(var->name, idx+1, *loc, state); - } - } else if (ir_dereference_record *deref_record = - ir->as_dereference_record()) { - /* There are three possibilities we need to consider: - * - * - Accessing an element of an array that is a member of a named - * interface block (e.g. ifc.foo[i]) - * - * - Accessing an element of an array that is a member of a named - * interface block array (e.g. ifc[j].foo[i]). - * - * - Accessing an element of an array that is a member of a named - * interface block array of arrays (e.g. ifc[j][k].foo[i]). - */ - ir_dereference_variable *deref_var = - deref_record->record->as_dereference_variable(); - if (deref_var == NULL) { - ir_dereference_array *deref_array = - deref_record->record->as_dereference_array(); - ir_dereference_array *deref_array_prev = NULL; - while (deref_array != NULL) { - deref_array_prev = deref_array; - deref_array = deref_array->array->as_dereference_array(); - } - if (deref_array_prev != NULL) - deref_var = deref_array_prev->array->as_dereference_variable(); - } - - if (deref_var != NULL) { - if (deref_var->var->is_interface_instance()) { - unsigned field_index = - deref_record->record->type->field_index(deref_record->field); - assert(field_index < deref_var->var->get_interface_type()->length); - - unsigned *const max_ifc_array_access = - deref_var->var->get_max_ifc_array_access(); - - assert(max_ifc_array_access != NULL); - - if (idx > (int)max_ifc_array_access[field_index]) { - max_ifc_array_access[field_index] = idx; - - /* Check whether this access will, as a side effect, implicitly - * cause the size of a built-in array to be too large. - */ - check_builtin_array_max_size(deref_record->field, idx+1, *loc, - state); - } - } - } - } -} - - -static int -get_implicit_array_size(struct _mesa_glsl_parse_state *state, - ir_rvalue *array) -{ - ir_variable *var = array->variable_referenced(); - - /* Inputs in control shader are implicitly sized - * to the maximum patch size. - */ - if (state->stage == MESA_SHADER_TESS_CTRL && - var->data.mode == ir_var_shader_in) { - return state->Const.MaxPatchVertices; - } - - /* Non-patch inputs in evaluation shader are implicitly sized - * to the maximum patch size. - */ - if (state->stage == MESA_SHADER_TESS_EVAL && - var->data.mode == ir_var_shader_in && - !var->data.patch) { - return state->Const.MaxPatchVertices; - } - - return 0; -} - - -ir_rvalue * -_mesa_ast_array_index_to_hir(void *mem_ctx, - struct _mesa_glsl_parse_state *state, - ir_rvalue *array, ir_rvalue *idx, - YYLTYPE &loc, YYLTYPE &idx_loc) -{ - if (!array->type->is_error() - && !array->type->is_array() - && !array->type->is_matrix() - && !array->type->is_vector()) { - _mesa_glsl_error(& idx_loc, state, - "cannot dereference non-array / non-matrix / " - "non-vector"); - } - - if (!idx->type->is_error()) { - if (!idx->type->is_integer()) { - _mesa_glsl_error(& idx_loc, state, "array index must be integer type"); - } else if (!idx->type->is_scalar()) { - _mesa_glsl_error(& idx_loc, state, "array index must be scalar"); - } - } - - /* If the array index is a constant expression and the array has a - * declared size, ensure that the access is in-bounds. If the array - * index is not a constant expression, ensure that the array has a - * declared size. - */ - ir_constant *const const_index = idx->constant_expression_value(); - if (const_index != NULL && idx->type->is_integer()) { - const int idx = const_index->value.i[0]; - const char *type_name = "error"; - unsigned bound = 0; - - /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec: - * - * "It is illegal to declare an array with a size, and then - * later (in the same shader) index the same array with an - * integral constant expression greater than or equal to the - * declared size. It is also illegal to index an array with a - * negative constant expression." - */ - if (array->type->is_matrix()) { - if (array->type->row_type()->vector_elements <= idx) { - type_name = "matrix"; - bound = array->type->row_type()->vector_elements; - } - } else if (array->type->is_vector()) { - if (array->type->vector_elements <= idx) { - type_name = "vector"; - bound = array->type->vector_elements; - } - } else { - /* glsl_type::array_size() returns -1 for non-array types. This means - * that we don't need to verify that the type is an array before - * doing the bounds checking. - */ - if ((array->type->array_size() > 0) - && (array->type->array_size() <= idx)) { - type_name = "array"; - bound = array->type->array_size(); - } - } - - if (bound > 0) { - _mesa_glsl_error(& loc, state, "%s index must be < %u", - type_name, bound); - } else if (idx < 0) { - _mesa_glsl_error(& loc, state, "%s index must be >= 0", - type_name); - } - - if (array->type->is_array()) - update_max_array_access(array, idx, &loc, state); - } else if (const_index == NULL && array->type->is_array()) { - if (array->type->is_unsized_array()) { - int implicit_size = get_implicit_array_size(state, array); - if (implicit_size) { - ir_variable *v = array->whole_variable_referenced(); - if (v != NULL) - v->data.max_array_access = implicit_size - 1; - } - else if (state->stage == MESA_SHADER_TESS_CTRL && - array->variable_referenced()->data.mode == ir_var_shader_out && - !array->variable_referenced()->data.patch) { - /* Tessellation control shader output non-patch arrays are - * initially unsized. Despite that, they are allowed to be - * indexed with a non-constant expression (typically - * "gl_InvocationID"). The array size will be determined - * by the linker. - */ - } - else if (array->variable_referenced()->data.mode != - ir_var_shader_storage) { - _mesa_glsl_error(&loc, state, "unsized array index must be constant"); - } - } else if (array->type->without_array()->is_interface() - && (array->variable_referenced()->data.mode == ir_var_uniform || - array->variable_referenced()->data.mode == ir_var_shader_storage) - && !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { - /* Page 50 in section 4.3.9 of the OpenGL ES 3.10 spec says: - * - * "All indices used to index a uniform or shader storage block - * array must be constant integral expressions." - */ - _mesa_glsl_error(&loc, state, "%s block array index must be constant", - array->variable_referenced()->data.mode - == ir_var_uniform ? "uniform" : "shader storage"); - } else { - /* whole_variable_referenced can return NULL if the array is a - * member of a structure. In this case it is safe to not update - * the max_array_access field because it is never used for fields - * of structures. - */ - ir_variable *v = array->whole_variable_referenced(); - if (v != NULL) - v->data.max_array_access = array->type->array_size() - 1; - } - - /* From page 23 (29 of the PDF) of the GLSL 1.30 spec: - * - * "Samplers aggregated into arrays within a shader (using square - * brackets [ ]) can only be indexed with integral constant - * expressions [...]." - * - * This restriction was added in GLSL 1.30. Shaders using earlier - * version of the language should not be rejected by the compiler - * front-end for using this construct. This allows useful things such - * as using a loop counter as the index to an array of samplers. If the - * loop in unrolled, the code should compile correctly. Instead, emit a - * warning. - * - * In GLSL 4.00 / ARB_gpu_shader5, this requirement is relaxed again to allow - * indexing with dynamically uniform expressions. Note that these are not - * required to be uniforms or expressions based on them, but merely that the - * values must not diverge between shader invocations run together. If the - * values *do* diverge, then the behavior of the operation requiring a - * dynamically uniform expression is undefined. - */ - if (array->type->without_array()->is_sampler()) { - if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { - if (state->is_version(130, 300)) - _mesa_glsl_error(&loc, state, - "sampler arrays indexed with non-constant " - "expressions are forbidden in GLSL %s " - "and later", - state->es_shader ? "ES 3.00" : "1.30"); - else if (state->es_shader) - _mesa_glsl_warning(&loc, state, - "sampler arrays indexed with non-constant " - "expressions will be forbidden in GLSL " - "3.00 and later"); - else - _mesa_glsl_warning(&loc, state, - "sampler arrays indexed with non-constant " - "expressions will be forbidden in GLSL " - "1.30 and later"); - } - } - - /* From page 27 of the GLSL ES 3.1 specification: - * - * "When aggregated into arrays within a shader, images can only be - * indexed with a constant integral expression." - * - * On the other hand the desktop GL specification extension allows - * non-constant indexing of image arrays, but behavior is left undefined - * in cases where the indexing expression is not dynamically uniform. - */ - if (state->es_shader && array->type->without_array()->is_image()) { - _mesa_glsl_error(&loc, state, - "image arrays indexed with non-constant " - "expressions are forbidden in GLSL ES."); - } - } - - /* After performing all of the error checking, generate the IR for the - * expression. - */ - if (array->type->is_array() - || array->type->is_matrix() - || array->type->is_vector()) { - return new(mem_ctx) ir_dereference_array(array, idx); - } else if (array->type->is_error()) { - return array; - } else { - ir_rvalue *result = new(mem_ctx) ir_dereference_array(array, idx); - result->type = glsl_type::error_type; - - return result; - } -} diff --git a/src/glsl/ast_expr.cpp b/src/glsl/ast_expr.cpp deleted file mode 100644 index e624d11cf3b..00000000000 --- a/src/glsl/ast_expr.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "ast.h" - -const char * -ast_expression::operator_string(enum ast_operators op) -{ - static const char *const operators[] = { - "=", - "+", - "-", - "+", - "-", - "*", - "/", - "%", - "<<", - ">>", - "<", - ">", - "<=", - ">=", - "==", - "!=", - "&", - "^", - "|", - "~", - "&&", - "^^", - "||", - "!", - - "*=", - "/=", - "%=", - "+=", - "-=", - "<<=", - ">>=", - "&=", - "^=", - "|=", - - "?:", - - "++", - "--", - "++", - "--", - ".", - }; - - assert((unsigned int)op < sizeof(operators) / sizeof(operators[0])); - - return operators[op]; -} - - -ast_expression_bin::ast_expression_bin(int oper, ast_expression *ex0, - ast_expression *ex1) : - ast_expression(oper, ex0, ex1, NULL) -{ - assert((oper >= ast_plus) && (oper <= ast_logic_not)); -} - - -void -ast_expression_bin::print(void) const -{ - subexpressions[0]->print(); - printf("%s ", operator_string(oper)); - subexpressions[1]->print(); -} diff --git a/src/glsl/ast_function.cpp b/src/glsl/ast_function.cpp deleted file mode 100644 index 0eb456a2b1f..00000000000 --- a/src/glsl/ast_function.cpp +++ /dev/null @@ -1,2098 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "glsl_symbol_table.h" -#include "ast.h" -#include "compiler/glsl_types.h" -#include "ir.h" -#include "main/core.h" /* for MIN2 */ -#include "main/shaderobj.h" - -static ir_rvalue * -convert_component(ir_rvalue *src, const glsl_type *desired_type); - -bool -apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from, - struct _mesa_glsl_parse_state *state); - -static unsigned -process_parameters(exec_list *instructions, exec_list *actual_parameters, - exec_list *parameters, - struct _mesa_glsl_parse_state *state) -{ - unsigned count = 0; - - foreach_list_typed(ast_node, ast, link, parameters) { - ir_rvalue *result = ast->hir(instructions, state); - - ir_constant *const constant = result->constant_expression_value(); - if (constant != NULL) - result = constant; - - actual_parameters->push_tail(result); - count++; - } - - return count; -} - - -/** - * Generate a source prototype for a function signature - * - * \param return_type Return type of the function. May be \c NULL. - * \param name Name of the function. - * \param parameters List of \c ir_instruction nodes representing the - * parameter list for the function. This may be either a - * formal (\c ir_variable) or actual (\c ir_rvalue) - * parameter list. Only the type is used. - * - * \return - * A ralloced string representing the prototype of the function. - */ -char * -prototype_string(const glsl_type *return_type, const char *name, - exec_list *parameters) -{ - char *str = NULL; - - if (return_type != NULL) - str = ralloc_asprintf(NULL, "%s ", return_type->name); - - ralloc_asprintf_append(&str, "%s(", name); - - const char *comma = ""; - foreach_in_list(const ir_variable, param, parameters) { - ralloc_asprintf_append(&str, "%s%s", comma, param->type->name); - comma = ", "; - } - - ralloc_strcat(&str, ")"); - return str; -} - -static bool -verify_image_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, - const ir_variable *formal, const ir_variable *actual) -{ - /** - * From the ARB_shader_image_load_store specification: - * - * "The values of image variables qualified with coherent, - * volatile, restrict, readonly, or writeonly may not be passed - * to functions whose formal parameters lack such - * qualifiers. [...] It is legal to have additional qualifiers - * on a formal parameter, but not to have fewer." - */ - if (actual->data.image_coherent && !formal->data.image_coherent) { - _mesa_glsl_error(loc, state, - "function call parameter `%s' drops " - "`coherent' qualifier", formal->name); - return false; - } - - if (actual->data.image_volatile && !formal->data.image_volatile) { - _mesa_glsl_error(loc, state, - "function call parameter `%s' drops " - "`volatile' qualifier", formal->name); - return false; - } - - if (actual->data.image_restrict && !formal->data.image_restrict) { - _mesa_glsl_error(loc, state, - "function call parameter `%s' drops " - "`restrict' qualifier", formal->name); - return false; - } - - if (actual->data.image_read_only && !formal->data.image_read_only) { - _mesa_glsl_error(loc, state, - "function call parameter `%s' drops " - "`readonly' qualifier", formal->name); - return false; - } - - if (actual->data.image_write_only && !formal->data.image_write_only) { - _mesa_glsl_error(loc, state, - "function call parameter `%s' drops " - "`writeonly' qualifier", formal->name); - return false; - } - - return true; -} - -static bool -verify_first_atomic_parameter(YYLTYPE *loc, _mesa_glsl_parse_state *state, - ir_variable *var) -{ - if (!var || - (!var->is_in_shader_storage_block() && - var->data.mode != ir_var_shader_shared)) { - _mesa_glsl_error(loc, state, "First argument to atomic function " - "must be a buffer or shared variable"); - return false; - } - return true; -} - -static bool -is_atomic_function(const char *func_name) -{ - return !strcmp(func_name, "atomicAdd") || - !strcmp(func_name, "atomicMin") || - !strcmp(func_name, "atomicMax") || - !strcmp(func_name, "atomicAnd") || - !strcmp(func_name, "atomicOr") || - !strcmp(func_name, "atomicXor") || - !strcmp(func_name, "atomicExchange") || - !strcmp(func_name, "atomicCompSwap"); -} - -/** - * Verify that 'out' and 'inout' actual parameters are lvalues. Also, verify - * that 'const_in' formal parameters (an extension in our IR) correspond to - * ir_constant actual parameters. - */ -static bool -verify_parameter_modes(_mesa_glsl_parse_state *state, - ir_function_signature *sig, - exec_list &actual_ir_parameters, - exec_list &actual_ast_parameters) -{ - exec_node *actual_ir_node = actual_ir_parameters.head; - exec_node *actual_ast_node = actual_ast_parameters.head; - - foreach_in_list(const ir_variable, formal, &sig->parameters) { - /* The lists must be the same length. */ - assert(!actual_ir_node->is_tail_sentinel()); - assert(!actual_ast_node->is_tail_sentinel()); - - const ir_rvalue *const actual = (ir_rvalue *) actual_ir_node; - const ast_expression *const actual_ast = - exec_node_data(ast_expression, actual_ast_node, link); - - /* FIXME: 'loc' is incorrect (as of 2011-01-21). It is always - * FIXME: 0:0(0). - */ - YYLTYPE loc = actual_ast->get_location(); - - /* Verify that 'const_in' parameters are ir_constants. */ - if (formal->data.mode == ir_var_const_in && - actual->ir_type != ir_type_constant) { - _mesa_glsl_error(&loc, state, - "parameter `in %s' must be a constant expression", - formal->name); - return false; - } - - /* Verify that shader_in parameters are shader inputs */ - if (formal->data.must_be_shader_input) { - ir_variable *var = actual->variable_referenced(); - if (var && var->data.mode != ir_var_shader_in) { - _mesa_glsl_error(&loc, state, - "parameter `%s` must be a shader input", - formal->name); - return false; - } - - if (actual->ir_type == ir_type_swizzle) { - _mesa_glsl_error(&loc, state, - "parameter `%s` must not be swizzled", - formal->name); - return false; - } - } - - /* Verify that 'out' and 'inout' actual parameters are lvalues. */ - if (formal->data.mode == ir_var_function_out - || formal->data.mode == ir_var_function_inout) { - const char *mode = NULL; - switch (formal->data.mode) { - case ir_var_function_out: mode = "out"; break; - case ir_var_function_inout: mode = "inout"; break; - default: assert(false); break; - } - - /* This AST-based check catches errors like f(i++). The IR-based - * is_lvalue() is insufficient because the actual parameter at the - * IR-level is just a temporary value, which is an l-value. - */ - if (actual_ast->non_lvalue_description != NULL) { - _mesa_glsl_error(&loc, state, - "function parameter '%s %s' references a %s", - mode, formal->name, - actual_ast->non_lvalue_description); - return false; - } - - ir_variable *var = actual->variable_referenced(); - if (var) - var->data.assigned = true; - - if (var && var->data.read_only) { - _mesa_glsl_error(&loc, state, - "function parameter '%s %s' references the " - "read-only variable '%s'", - mode, formal->name, - actual->variable_referenced()->name); - return false; - } else if (!actual->is_lvalue()) { - _mesa_glsl_error(&loc, state, - "function parameter '%s %s' is not an lvalue", - mode, formal->name); - return false; - } - } - - if (formal->type->is_image() && - actual->variable_referenced()) { - if (!verify_image_parameter(&loc, state, formal, - actual->variable_referenced())) - return false; - } - - actual_ir_node = actual_ir_node->next; - actual_ast_node = actual_ast_node->next; - } - - /* The first parameter of atomic functions must be a buffer variable */ - const char *func_name = sig->function_name(); - bool is_atomic = is_atomic_function(func_name); - if (is_atomic) { - const ir_rvalue *const actual = (ir_rvalue *) actual_ir_parameters.head; - - const ast_expression *const actual_ast = - exec_node_data(ast_expression, actual_ast_parameters.head, link); - YYLTYPE loc = actual_ast->get_location(); - - if (!verify_first_atomic_parameter(&loc, state, - actual->variable_referenced())) { - return false; - } - } - - return true; -} - -static void -fix_parameter(void *mem_ctx, ir_rvalue *actual, const glsl_type *formal_type, - exec_list *before_instructions, exec_list *after_instructions, - bool parameter_is_inout) -{ - ir_expression *const expr = actual->as_expression(); - - /* If the types match exactly and the parameter is not a vector-extract, - * nothing needs to be done to fix the parameter. - */ - if (formal_type == actual->type - && (expr == NULL || expr->operation != ir_binop_vector_extract)) - return; - - /* To convert an out parameter, we need to create a temporary variable to - * hold the value before conversion, and then perform the conversion after - * the function call returns. - * - * This has the effect of transforming code like this: - * - * void f(out int x); - * float value; - * f(value); - * - * Into IR that's equivalent to this: - * - * void f(out int x); - * float value; - * int out_parameter_conversion; - * f(out_parameter_conversion); - * value = float(out_parameter_conversion); - * - * If the parameter is an ir_expression of ir_binop_vector_extract, - * additional conversion is needed in the post-call re-write. - */ - ir_variable *tmp = - new(mem_ctx) ir_variable(formal_type, "inout_tmp", ir_var_temporary); - - before_instructions->push_tail(tmp); - - /* If the parameter is an inout parameter, copy the value of the actual - * parameter to the new temporary. Note that no type conversion is allowed - * here because inout parameters must match types exactly. - */ - if (parameter_is_inout) { - /* Inout parameters should never require conversion, since that would - * require an implicit conversion to exist both to and from the formal - * parameter type, and there are no bidirectional implicit conversions. - */ - assert (actual->type == formal_type); - - ir_dereference_variable *const deref_tmp_1 = - new(mem_ctx) ir_dereference_variable(tmp); - ir_assignment *const assignment = - new(mem_ctx) ir_assignment(deref_tmp_1, actual); - before_instructions->push_tail(assignment); - } - - /* Replace the parameter in the call with a dereference of the new - * temporary. - */ - ir_dereference_variable *const deref_tmp_2 = - new(mem_ctx) ir_dereference_variable(tmp); - actual->replace_with(deref_tmp_2); - - - /* Copy the temporary variable to the actual parameter with optional - * type conversion applied. - */ - ir_rvalue *rhs = new(mem_ctx) ir_dereference_variable(tmp); - if (actual->type != formal_type) - rhs = convert_component(rhs, actual->type); - - ir_rvalue *lhs = actual; - if (expr != NULL && expr->operation == ir_binop_vector_extract) { - lhs = new(mem_ctx) ir_dereference_array(expr->operands[0]->clone(mem_ctx, NULL), - expr->operands[1]->clone(mem_ctx, NULL)); - } - - ir_assignment *const assignment_2 = new(mem_ctx) ir_assignment(lhs, rhs); - after_instructions->push_tail(assignment_2); -} - -/** - * Generate a function call. - * - * For non-void functions, this returns a dereference of the temporary variable - * which stores the return value for the call. For void functions, this returns - * NULL. - */ -static ir_rvalue * -generate_call(exec_list *instructions, ir_function_signature *sig, - exec_list *actual_parameters, - ir_variable *sub_var, - ir_rvalue *array_idx, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - exec_list post_call_conversions; - - /* Perform implicit conversion of arguments. For out parameters, we need - * to place them in a temporary variable and do the conversion after the - * call takes place. Since we haven't emitted the call yet, we'll place - * the post-call conversions in a temporary exec_list, and emit them later. - */ - foreach_two_lists(formal_node, &sig->parameters, - actual_node, actual_parameters) { - ir_rvalue *actual = (ir_rvalue *) actual_node; - ir_variable *formal = (ir_variable *) formal_node; - - if (formal->type->is_numeric() || formal->type->is_boolean()) { - switch (formal->data.mode) { - case ir_var_const_in: - case ir_var_function_in: { - ir_rvalue *converted - = convert_component(actual, formal->type); - actual->replace_with(converted); - break; - } - case ir_var_function_out: - case ir_var_function_inout: - fix_parameter(ctx, actual, formal->type, - instructions, &post_call_conversions, - formal->data.mode == ir_var_function_inout); - break; - default: - assert (!"Illegal formal parameter mode"); - break; - } - } - } - - /* Section 4.3.2 (Const) of the GLSL 1.10.59 spec says: - * - * "Initializers for const declarations must be formed from literal - * values, other const variables (not including function call - * paramaters), or expressions of these. - * - * Constructors may be used in such expressions, but function calls may - * not." - * - * Section 4.3.3 (Constant Expressions) of the GLSL 1.20.8 spec says: - * - * "A constant expression is one of - * - * ... - * - * - a built-in function call whose arguments are all constant - * expressions, with the exception of the texture lookup - * functions, the noise functions, and ftransform. The built-in - * functions dFdx, dFdy, and fwidth must return 0 when evaluated - * inside an initializer with an argument that is a constant - * expression." - * - * Section 5.10 (Constant Expressions) of the GLSL ES 1.00.17 spec says: - * - * "A constant expression is one of - * - * ... - * - * - a built-in function call whose arguments are all constant - * expressions, with the exception of the texture lookup - * functions." - * - * Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec says: - * - * "A constant expression is one of - * - * ... - * - * - a built-in function call whose arguments are all constant - * expressions, with the exception of the texture lookup - * functions. The built-in functions dFdx, dFdy, and fwidth must - * return 0 when evaluated inside an initializer with an argument - * that is a constant expression." - * - * If the function call is a constant expression, don't generate any - * instructions; just generate an ir_constant. - */ - if (state->is_version(120, 100)) { - ir_constant *value = sig->constant_expression_value(actual_parameters, NULL); - if (value != NULL) { - return value; - } - } - - ir_dereference_variable *deref = NULL; - if (!sig->return_type->is_void()) { - /* Create a new temporary to hold the return value. */ - char *const name = ir_variable::temporaries_allocate_names - ? ralloc_asprintf(ctx, "%s_retval", sig->function_name()) - : NULL; - - ir_variable *var; - - var = new(ctx) ir_variable(sig->return_type, name, ir_var_temporary); - instructions->push_tail(var); - - ralloc_free(name); - - deref = new(ctx) ir_dereference_variable(var); - } - - ir_call *call = new(ctx) ir_call(sig, deref, actual_parameters, sub_var, array_idx); - instructions->push_tail(call); - - /* Also emit any necessary out-parameter conversions. */ - instructions->append_list(&post_call_conversions); - - return deref ? deref->clone(ctx, NULL) : NULL; -} - -/** - * Given a function name and parameter list, find the matching signature. - */ -static ir_function_signature * -match_function_by_name(const char *name, - exec_list *actual_parameters, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - ir_function *f = state->symbols->get_function(name); - ir_function_signature *local_sig = NULL; - ir_function_signature *sig = NULL; - - /* Is the function hidden by a record type constructor? */ - if (state->symbols->get_type(name)) - goto done; /* no match */ - - /* Is the function hidden by a variable (impossible in 1.10)? */ - if (!state->symbols->separate_function_namespace - && state->symbols->get_variable(name)) - goto done; /* no match */ - - if (f != NULL) { - /* In desktop GL, the presence of a user-defined signature hides any - * built-in signatures, so we must ignore them. In contrast, in ES2 - * user-defined signatures add new overloads, so we must consider them. - */ - bool allow_builtins = state->es_shader || !f->has_user_signature(); - - /* Look for a match in the local shader. If exact, we're done. */ - bool is_exact = false; - sig = local_sig = f->matching_signature(state, actual_parameters, - allow_builtins, &is_exact); - if (is_exact) - goto done; - - if (!allow_builtins) - goto done; - } - - /* Local shader has no exact candidates; check the built-ins. */ - _mesa_glsl_initialize_builtin_functions(); - sig = _mesa_glsl_find_builtin_function(state, name, actual_parameters); - -done: - if (sig != NULL) { - /* If the match is from a linked built-in shader, import the prototype. */ - if (sig != local_sig) { - if (f == NULL) { - f = new(ctx) ir_function(name); - state->symbols->add_global_function(f); - emit_function(state, f); - } - f->add_signature(sig->clone_prototype(f, NULL)); - } - } - return sig; -} - -static ir_function_signature * -match_subroutine_by_name(const char *name, - exec_list *actual_parameters, - struct _mesa_glsl_parse_state *state, - ir_variable **var_r) -{ - void *ctx = state; - ir_function_signature *sig = NULL; - ir_function *f, *found = NULL; - const char *new_name; - ir_variable *var; - bool is_exact = false; - - new_name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), name); - var = state->symbols->get_variable(new_name); - if (!var) - return NULL; - - for (int i = 0; i < state->num_subroutine_types; i++) { - f = state->subroutine_types[i]; - if (strcmp(f->name, var->type->without_array()->name)) - continue; - found = f; - break; - } - - if (!found) - return NULL; - *var_r = var; - sig = found->matching_signature(state, actual_parameters, - false, &is_exact); - return sig; -} - -static ir_rvalue * -generate_array_index(void *mem_ctx, exec_list *instructions, - struct _mesa_glsl_parse_state *state, YYLTYPE loc, - const ast_expression *array, ast_expression *idx, - const char **function_name, exec_list *actual_parameters) -{ - if (array->oper == ast_array_index) { - /* This handles arrays of arrays */ - ir_rvalue *outer_array = generate_array_index(mem_ctx, instructions, - state, loc, - array->subexpressions[0], - array->subexpressions[1], - function_name, actual_parameters); - ir_rvalue *outer_array_idx = idx->hir(instructions, state); - - YYLTYPE index_loc = idx->get_location(); - return _mesa_ast_array_index_to_hir(mem_ctx, state, outer_array, - outer_array_idx, loc, - index_loc); - } else { - ir_variable *sub_var = NULL; - *function_name = array->primary_expression.identifier; - - match_subroutine_by_name(*function_name, actual_parameters, - state, &sub_var); - - ir_rvalue *outer_array_idx = idx->hir(instructions, state); - return new(mem_ctx) ir_dereference_array(sub_var, outer_array_idx); - } -} - -static void -print_function_prototypes(_mesa_glsl_parse_state *state, YYLTYPE *loc, - ir_function *f) -{ - if (f == NULL) - return; - - foreach_in_list(ir_function_signature, sig, &f->signatures) { - if (sig->is_builtin() && !sig->is_builtin_available(state)) - continue; - - char *str = prototype_string(sig->return_type, f->name, &sig->parameters); - _mesa_glsl_error(loc, state, " %s", str); - ralloc_free(str); - } -} - -/** - * Raise a "no matching function" error, listing all possible overloads the - * compiler considered so developers can figure out what went wrong. - */ -static void -no_matching_function_error(const char *name, - YYLTYPE *loc, - exec_list *actual_parameters, - _mesa_glsl_parse_state *state) -{ - gl_shader *sh = _mesa_glsl_get_builtin_function_shader(); - - if (state->symbols->get_function(name) == NULL - && (!state->uses_builtin_functions - || sh->symbols->get_function(name) == NULL)) { - _mesa_glsl_error(loc, state, "no function with name '%s'", name); - } else { - char *str = prototype_string(NULL, name, actual_parameters); - _mesa_glsl_error(loc, state, - "no matching function for call to `%s'; candidates are:", - str); - ralloc_free(str); - - print_function_prototypes(state, loc, state->symbols->get_function(name)); - - if (state->uses_builtin_functions) { - print_function_prototypes(state, loc, sh->symbols->get_function(name)); - } - } -} - -/** - * Perform automatic type conversion of constructor parameters - * - * This implements the rules in the "Conversion and Scalar Constructors" - * section (GLSL 1.10 section 5.4.1), not the "Implicit Conversions" rules. - */ -static ir_rvalue * -convert_component(ir_rvalue *src, const glsl_type *desired_type) -{ - void *ctx = ralloc_parent(src); - const unsigned a = desired_type->base_type; - const unsigned b = src->type->base_type; - ir_expression *result = NULL; - - if (src->type->is_error()) - return src; - - assert(a <= GLSL_TYPE_BOOL); - assert(b <= GLSL_TYPE_BOOL); - - if (a == b) - return src; - - switch (a) { - case GLSL_TYPE_UINT: - switch (b) { - case GLSL_TYPE_INT: - result = new(ctx) ir_expression(ir_unop_i2u, src); - break; - case GLSL_TYPE_FLOAT: - result = new(ctx) ir_expression(ir_unop_f2u, src); - break; - case GLSL_TYPE_BOOL: - result = new(ctx) ir_expression(ir_unop_i2u, - new(ctx) ir_expression(ir_unop_b2i, src)); - break; - case GLSL_TYPE_DOUBLE: - result = new(ctx) ir_expression(ir_unop_d2u, src); - break; - } - break; - case GLSL_TYPE_INT: - switch (b) { - case GLSL_TYPE_UINT: - result = new(ctx) ir_expression(ir_unop_u2i, src); - break; - case GLSL_TYPE_FLOAT: - result = new(ctx) ir_expression(ir_unop_f2i, src); - break; - case GLSL_TYPE_BOOL: - result = new(ctx) ir_expression(ir_unop_b2i, src); - break; - case GLSL_TYPE_DOUBLE: - result = new(ctx) ir_expression(ir_unop_d2i, src); - break; - } - break; - case GLSL_TYPE_FLOAT: - switch (b) { - case GLSL_TYPE_UINT: - result = new(ctx) ir_expression(ir_unop_u2f, desired_type, src, NULL); - break; - case GLSL_TYPE_INT: - result = new(ctx) ir_expression(ir_unop_i2f, desired_type, src, NULL); - break; - case GLSL_TYPE_BOOL: - result = new(ctx) ir_expression(ir_unop_b2f, desired_type, src, NULL); - break; - case GLSL_TYPE_DOUBLE: - result = new(ctx) ir_expression(ir_unop_d2f, desired_type, src, NULL); - break; - } - break; - case GLSL_TYPE_BOOL: - switch (b) { - case GLSL_TYPE_UINT: - result = new(ctx) ir_expression(ir_unop_i2b, - new(ctx) ir_expression(ir_unop_u2i, src)); - break; - case GLSL_TYPE_INT: - result = new(ctx) ir_expression(ir_unop_i2b, desired_type, src, NULL); - break; - case GLSL_TYPE_FLOAT: - result = new(ctx) ir_expression(ir_unop_f2b, desired_type, src, NULL); - break; - case GLSL_TYPE_DOUBLE: - result = new(ctx) ir_expression(ir_unop_d2b, desired_type, src, NULL); - break; - } - break; - case GLSL_TYPE_DOUBLE: - switch (b) { - case GLSL_TYPE_INT: - result = new(ctx) ir_expression(ir_unop_i2d, src); - break; - case GLSL_TYPE_UINT: - result = new(ctx) ir_expression(ir_unop_u2d, src); - break; - case GLSL_TYPE_BOOL: - result = new(ctx) ir_expression(ir_unop_f2d, - new(ctx) ir_expression(ir_unop_b2f, src)); - break; - case GLSL_TYPE_FLOAT: - result = new(ctx) ir_expression(ir_unop_f2d, desired_type, src, NULL); - break; - } - } - - assert(result != NULL); - assert(result->type == desired_type); - - /* Try constant folding; it may fold in the conversion we just added. */ - ir_constant *const constant = result->constant_expression_value(); - return (constant != NULL) ? (ir_rvalue *) constant : (ir_rvalue *) result; -} - -/** - * Dereference a specific component from a scalar, vector, or matrix - */ -static ir_rvalue * -dereference_component(ir_rvalue *src, unsigned component) -{ - void *ctx = ralloc_parent(src); - assert(component < src->type->components()); - - /* If the source is a constant, just create a new constant instead of a - * dereference of the existing constant. - */ - ir_constant *constant = src->as_constant(); - if (constant) - return new(ctx) ir_constant(constant, component); - - if (src->type->is_scalar()) { - return src; - } else if (src->type->is_vector()) { - return new(ctx) ir_swizzle(src, component, 0, 0, 0, 1); - } else { - assert(src->type->is_matrix()); - - /* Dereference a row of the matrix, then call this function again to get - * a specific element from that row. - */ - const int c = component / src->type->column_type()->vector_elements; - const int r = component % src->type->column_type()->vector_elements; - ir_constant *const col_index = new(ctx) ir_constant(c); - ir_dereference *const col = new(ctx) ir_dereference_array(src, col_index); - - col->type = src->type->column_type(); - - return dereference_component(col, r); - } - - assert(!"Should not get here."); - return NULL; -} - - -static ir_rvalue * -process_vec_mat_constructor(exec_list *instructions, - const glsl_type *constructor_type, - YYLTYPE *loc, exec_list *parameters, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - - /* The ARB_shading_language_420pack spec says: - * - * "If an initializer is a list of initializers enclosed in curly braces, - * the variable being declared must be a vector, a matrix, an array, or a - * structure. - * - * int i = { 1 }; // illegal, i is not an aggregate" - */ - if (constructor_type->vector_elements <= 1) { - _mesa_glsl_error(loc, state, "aggregates can only initialize vectors, " - "matrices, arrays, and structs"); - return ir_rvalue::error_value(ctx); - } - - exec_list actual_parameters; - const unsigned parameter_count = - process_parameters(instructions, &actual_parameters, parameters, state); - - if (parameter_count == 0 - || (constructor_type->is_vector() && - constructor_type->vector_elements != parameter_count) - || (constructor_type->is_matrix() && - constructor_type->matrix_columns != parameter_count)) { - _mesa_glsl_error(loc, state, "%s constructor must have %u parameters", - constructor_type->is_vector() ? "vector" : "matrix", - constructor_type->vector_elements); - return ir_rvalue::error_value(ctx); - } - - bool all_parameters_are_constant = true; - - /* Type cast each parameter and, if possible, fold constants. */ - foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { - ir_rvalue *result = ir; - - /* Apply implicit conversions (not the scalar constructor rules!). See - * the spec quote above. */ - if (constructor_type->base_type != result->type->base_type) { - const glsl_type *desired_type = - glsl_type::get_instance(constructor_type->base_type, - ir->type->vector_elements, - ir->type->matrix_columns); - if (result->type->can_implicitly_convert_to(desired_type, state)) { - /* Even though convert_component() implements the constructor - * conversion rules (not the implicit conversion rules), its safe - * to use it here because we already checked that the implicit - * conversion is legal. - */ - result = convert_component(ir, desired_type); - } - } - - if (constructor_type->is_matrix()) { - if (result->type != constructor_type->column_type()) { - _mesa_glsl_error(loc, state, "type error in matrix constructor: " - "expected: %s, found %s", - constructor_type->column_type()->name, - result->type->name); - return ir_rvalue::error_value(ctx); - } - } else if (result->type != constructor_type->get_scalar_type()) { - _mesa_glsl_error(loc, state, "type error in vector constructor: " - "expected: %s, found %s", - constructor_type->get_scalar_type()->name, - result->type->name); - return ir_rvalue::error_value(ctx); - } - - /* Attempt to convert the parameter to a constant valued expression. - * After doing so, track whether or not all the parameters to the - * constructor are trivially constant valued expressions. - */ - ir_rvalue *const constant = result->constant_expression_value(); - - if (constant != NULL) - result = constant; - else - all_parameters_are_constant = false; - - ir->replace_with(result); - } - - if (all_parameters_are_constant) - return new(ctx) ir_constant(constructor_type, &actual_parameters); - - ir_variable *var = new(ctx) ir_variable(constructor_type, "vec_mat_ctor", - ir_var_temporary); - instructions->push_tail(var); - - int i = 0; - - foreach_in_list(ir_rvalue, rhs, &actual_parameters) { - ir_instruction *assignment = NULL; - - if (var->type->is_matrix()) { - ir_rvalue *lhs = new(ctx) ir_dereference_array(var, - new(ctx) ir_constant(i)); - assignment = new(ctx) ir_assignment(lhs, rhs, NULL); - } else { - /* use writemask rather than index for vector */ - assert(var->type->is_vector()); - assert(i < 4); - ir_dereference *lhs = new(ctx) ir_dereference_variable(var); - assignment = new(ctx) ir_assignment(lhs, rhs, NULL, (unsigned)(1 << i)); - } - - instructions->push_tail(assignment); - - i++; - } - - return new(ctx) ir_dereference_variable(var); -} - - -static ir_rvalue * -process_array_constructor(exec_list *instructions, - const glsl_type *constructor_type, - YYLTYPE *loc, exec_list *parameters, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - /* Array constructors come in two forms: sized and unsized. Sized array - * constructors look like 'vec4[2](a, b)', where 'a' and 'b' are vec4 - * variables. In this case the number of parameters must exactly match the - * specified size of the array. - * - * Unsized array constructors look like 'vec4[](a, b)', where 'a' and 'b' - * are vec4 variables. In this case the size of the array being constructed - * is determined by the number of parameters. - * - * From page 52 (page 58 of the PDF) of the GLSL 1.50 spec: - * - * "There must be exactly the same number of arguments as the size of - * the array being constructed. If no size is present in the - * constructor, then the array is explicitly sized to the number of - * arguments provided. The arguments are assigned in order, starting at - * element 0, to the elements of the constructed array. Each argument - * must be the same type as the element type of the array, or be a type - * that can be converted to the element type of the array according to - * Section 4.1.10 "Implicit Conversions."" - */ - exec_list actual_parameters; - const unsigned parameter_count = - process_parameters(instructions, &actual_parameters, parameters, state); - bool is_unsized_array = constructor_type->is_unsized_array(); - - if ((parameter_count == 0) || - (!is_unsized_array && (constructor_type->length != parameter_count))) { - const unsigned min_param = is_unsized_array - ? 1 : constructor_type->length; - - _mesa_glsl_error(loc, state, "array constructor must have %s %u " - "parameter%s", - is_unsized_array ? "at least" : "exactly", - min_param, (min_param <= 1) ? "" : "s"); - return ir_rvalue::error_value(ctx); - } - - if (is_unsized_array) { - constructor_type = - glsl_type::get_array_instance(constructor_type->fields.array, - parameter_count); - assert(constructor_type != NULL); - assert(constructor_type->length == parameter_count); - } - - bool all_parameters_are_constant = true; - const glsl_type *element_type = constructor_type->fields.array; - - /* Type cast each parameter and, if possible, fold constants. */ - foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { - ir_rvalue *result = ir; - - const glsl_base_type element_base_type = - constructor_type->fields.array->base_type; - - /* Apply implicit conversions (not the scalar constructor rules!). See - * the spec quote above. */ - if (element_base_type != result->type->base_type) { - const glsl_type *desired_type = - glsl_type::get_instance(element_base_type, - ir->type->vector_elements, - ir->type->matrix_columns); - - if (result->type->can_implicitly_convert_to(desired_type, state)) { - /* Even though convert_component() implements the constructor - * conversion rules (not the implicit conversion rules), its safe - * to use it here because we already checked that the implicit - * conversion is legal. - */ - result = convert_component(ir, desired_type); - } - } - - if (constructor_type->fields.array->is_unsized_array()) { - /* As the inner parameters of the constructor are created without - * knowledge of each other we need to check to make sure unsized - * parameters of unsized constructors all end up with the same size. - * - * e.g we make sure to fail for a constructor like this: - * vec4[][] a = vec4[][](vec4[](vec4(0.0), vec4(1.0)), - * vec4[](vec4(0.0), vec4(1.0), vec4(1.0)), - * vec4[](vec4(0.0), vec4(1.0))); - */ - if (element_type->is_unsized_array()) { - /* This is the first parameter so just get the type */ - element_type = result->type; - } else if (element_type != result->type) { - _mesa_glsl_error(loc, state, "type error in array constructor: " - "expected: %s, found %s", - element_type->name, - result->type->name); - return ir_rvalue::error_value(ctx); - } - } else if (result->type != constructor_type->fields.array) { - _mesa_glsl_error(loc, state, "type error in array constructor: " - "expected: %s, found %s", - constructor_type->fields.array->name, - result->type->name); - return ir_rvalue::error_value(ctx); - } else { - element_type = result->type; - } - - /* Attempt to convert the parameter to a constant valued expression. - * After doing so, track whether or not all the parameters to the - * constructor are trivially constant valued expressions. - */ - ir_rvalue *const constant = result->constant_expression_value(); - - if (constant != NULL) - result = constant; - else - all_parameters_are_constant = false; - - ir->replace_with(result); - } - - if (constructor_type->fields.array->is_unsized_array()) { - constructor_type = - glsl_type::get_array_instance(element_type, - parameter_count); - assert(constructor_type != NULL); - assert(constructor_type->length == parameter_count); - } - - if (all_parameters_are_constant) - return new(ctx) ir_constant(constructor_type, &actual_parameters); - - ir_variable *var = new(ctx) ir_variable(constructor_type, "array_ctor", - ir_var_temporary); - instructions->push_tail(var); - - int i = 0; - foreach_in_list(ir_rvalue, rhs, &actual_parameters) { - ir_rvalue *lhs = new(ctx) ir_dereference_array(var, - new(ctx) ir_constant(i)); - - ir_instruction *assignment = new(ctx) ir_assignment(lhs, rhs, NULL); - instructions->push_tail(assignment); - - i++; - } - - return new(ctx) ir_dereference_variable(var); -} - - -/** - * Try to convert a record constructor to a constant expression - */ -static ir_constant * -constant_record_constructor(const glsl_type *constructor_type, - exec_list *parameters, void *mem_ctx) -{ - foreach_in_list(ir_instruction, node, parameters) { - ir_constant *constant = node->as_constant(); - if (constant == NULL) - return NULL; - node->replace_with(constant); - } - - return new(mem_ctx) ir_constant(constructor_type, parameters); -} - - -/** - * Determine if a list consists of a single scalar r-value - */ -bool -single_scalar_parameter(exec_list *parameters) -{ - const ir_rvalue *const p = (ir_rvalue *) parameters->head; - assert(((ir_rvalue *)p)->as_rvalue() != NULL); - - return (p->type->is_scalar() && p->next->is_tail_sentinel()); -} - - -/** - * Generate inline code for a vector constructor - * - * The generated constructor code will consist of a temporary variable - * declaration of the same type as the constructor. A sequence of assignments - * from constructor parameters to the temporary will follow. - * - * \return - * An \c ir_dereference_variable of the temprorary generated in the constructor - * body. - */ -ir_rvalue * -emit_inline_vector_constructor(const glsl_type *type, - exec_list *instructions, - exec_list *parameters, - void *ctx) -{ - assert(!parameters->is_empty()); - - ir_variable *var = new(ctx) ir_variable(type, "vec_ctor", ir_var_temporary); - instructions->push_tail(var); - - /* There are three kinds of vector constructors. - * - * - Construct a vector from a single scalar by replicating that scalar to - * all components of the vector. - * - * - Construct a vector from at least a matrix. This case should already - * have been taken care of in ast_function_expression::hir by breaking - * down the matrix into a series of column vectors. - * - * - Construct a vector from an arbirary combination of vectors and - * scalars. The components of the constructor parameters are assigned - * to the vector in order until the vector is full. - */ - const unsigned lhs_components = type->components(); - if (single_scalar_parameter(parameters)) { - ir_rvalue *first_param = (ir_rvalue *)parameters->head; - ir_rvalue *rhs = new(ctx) ir_swizzle(first_param, 0, 0, 0, 0, - lhs_components); - ir_dereference_variable *lhs = new(ctx) ir_dereference_variable(var); - const unsigned mask = (1U << lhs_components) - 1; - - assert(rhs->type == lhs->type); - - ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL, mask); - instructions->push_tail(inst); - } else { - unsigned base_component = 0; - unsigned base_lhs_component = 0; - ir_constant_data data; - unsigned constant_mask = 0, constant_components = 0; - - memset(&data, 0, sizeof(data)); - - foreach_in_list(ir_rvalue, param, parameters) { - unsigned rhs_components = param->type->components(); - - /* Do not try to assign more components to the vector than it has! - */ - if ((rhs_components + base_lhs_component) > lhs_components) { - rhs_components = lhs_components - base_lhs_component; - } - - const ir_constant *const c = param->as_constant(); - if (c != NULL) { - for (unsigned i = 0; i < rhs_components; i++) { - switch (c->type->base_type) { - case GLSL_TYPE_UINT: - data.u[i + base_component] = c->get_uint_component(i); - break; - case GLSL_TYPE_INT: - data.i[i + base_component] = c->get_int_component(i); - break; - case GLSL_TYPE_FLOAT: - data.f[i + base_component] = c->get_float_component(i); - break; - case GLSL_TYPE_DOUBLE: - data.d[i + base_component] = c->get_double_component(i); - break; - case GLSL_TYPE_BOOL: - data.b[i + base_component] = c->get_bool_component(i); - break; - default: - assert(!"Should not get here."); - break; - } - } - - /* Mask of fields to be written in the assignment. - */ - constant_mask |= ((1U << rhs_components) - 1) << base_lhs_component; - constant_components += rhs_components; - - base_component += rhs_components; - } - /* Advance the component index by the number of components - * that were just assigned. - */ - base_lhs_component += rhs_components; - } - - if (constant_mask != 0) { - ir_dereference *lhs = new(ctx) ir_dereference_variable(var); - const glsl_type *rhs_type = glsl_type::get_instance(var->type->base_type, - constant_components, - 1); - ir_rvalue *rhs = new(ctx) ir_constant(rhs_type, &data); - - ir_instruction *inst = - new(ctx) ir_assignment(lhs, rhs, NULL, constant_mask); - instructions->push_tail(inst); - } - - base_component = 0; - foreach_in_list(ir_rvalue, param, parameters) { - unsigned rhs_components = param->type->components(); - - /* Do not try to assign more components to the vector than it has! - */ - if ((rhs_components + base_component) > lhs_components) { - rhs_components = lhs_components - base_component; - } - - /* If we do not have any components left to copy, break out of the - * loop. This can happen when initializing a vec4 with a mat3 as the - * mat3 would have been broken into a series of column vectors. - */ - if (rhs_components == 0) { - break; - } - - const ir_constant *const c = param->as_constant(); - if (c == NULL) { - /* Mask of fields to be written in the assignment. - */ - const unsigned write_mask = ((1U << rhs_components) - 1) - << base_component; - - ir_dereference *lhs = new(ctx) ir_dereference_variable(var); - - /* Generate a swizzle so that LHS and RHS sizes match. - */ - ir_rvalue *rhs = - new(ctx) ir_swizzle(param, 0, 1, 2, 3, rhs_components); - - ir_instruction *inst = - new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); - instructions->push_tail(inst); - } - - /* Advance the component index by the number of components that were - * just assigned. - */ - base_component += rhs_components; - } - } - return new(ctx) ir_dereference_variable(var); -} - - -/** - * Generate assignment of a portion of a vector to a portion of a matrix column - * - * \param src_base First component of the source to be used in assignment - * \param column Column of destination to be assiged - * \param row_base First component of the destination column to be assigned - * \param count Number of components to be assigned - * - * \note - * \c src_base + \c count must be less than or equal to the number of components - * in the source vector. - */ -ir_instruction * -assign_to_matrix_column(ir_variable *var, unsigned column, unsigned row_base, - ir_rvalue *src, unsigned src_base, unsigned count, - void *mem_ctx) -{ - ir_constant *col_idx = new(mem_ctx) ir_constant(column); - ir_dereference *column_ref = new(mem_ctx) ir_dereference_array(var, col_idx); - - assert(column_ref->type->components() >= (row_base + count)); - assert(src->type->components() >= (src_base + count)); - - /* Generate a swizzle that extracts the number of components from the source - * that are to be assigned to the column of the matrix. - */ - if (count < src->type->vector_elements) { - src = new(mem_ctx) ir_swizzle(src, - src_base + 0, src_base + 1, - src_base + 2, src_base + 3, - count); - } - - /* Mask of fields to be written in the assignment. - */ - const unsigned write_mask = ((1U << count) - 1) << row_base; - - return new(mem_ctx) ir_assignment(column_ref, src, NULL, write_mask); -} - - -/** - * Generate inline code for a matrix constructor - * - * The generated constructor code will consist of a temporary variable - * declaration of the same type as the constructor. A sequence of assignments - * from constructor parameters to the temporary will follow. - * - * \return - * An \c ir_dereference_variable of the temprorary generated in the constructor - * body. - */ -ir_rvalue * -emit_inline_matrix_constructor(const glsl_type *type, - exec_list *instructions, - exec_list *parameters, - void *ctx) -{ - assert(!parameters->is_empty()); - - ir_variable *var = new(ctx) ir_variable(type, "mat_ctor", ir_var_temporary); - instructions->push_tail(var); - - /* There are three kinds of matrix constructors. - * - * - Construct a matrix from a single scalar by replicating that scalar to - * along the diagonal of the matrix and setting all other components to - * zero. - * - * - Construct a matrix from an arbirary combination of vectors and - * scalars. The components of the constructor parameters are assigned - * to the matrix in column-major order until the matrix is full. - * - * - Construct a matrix from a single matrix. The source matrix is copied - * to the upper left portion of the constructed matrix, and the remaining - * elements take values from the identity matrix. - */ - ir_rvalue *const first_param = (ir_rvalue *) parameters->head; - if (single_scalar_parameter(parameters)) { - /* Assign the scalar to the X component of a vec4, and fill the remaining - * components with zero. - */ - glsl_base_type param_base_type = first_param->type->base_type; - assert(param_base_type == GLSL_TYPE_FLOAT || - param_base_type == GLSL_TYPE_DOUBLE); - ir_variable *rhs_var = - new(ctx) ir_variable(glsl_type::get_instance(param_base_type, 4, 1), - "mat_ctor_vec", - ir_var_temporary); - instructions->push_tail(rhs_var); - - ir_constant_data zero; - for (unsigned i = 0; i < 4; i++) - if (param_base_type == GLSL_TYPE_FLOAT) - zero.f[i] = 0.0; - else - zero.d[i] = 0.0; - - ir_instruction *inst = - new(ctx) ir_assignment(new(ctx) ir_dereference_variable(rhs_var), - new(ctx) ir_constant(rhs_var->type, &zero), - NULL); - instructions->push_tail(inst); - - ir_dereference *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); - - inst = new(ctx) ir_assignment(rhs_ref, first_param, NULL, 0x01); - instructions->push_tail(inst); - - /* Assign the temporary vector to each column of the destination matrix - * with a swizzle that puts the X component on the diagonal of the - * matrix. In some cases this may mean that the X component does not - * get assigned into the column at all (i.e., when the matrix has more - * columns than rows). - */ - static const unsigned rhs_swiz[4][4] = { - { 0, 1, 1, 1 }, - { 1, 0, 1, 1 }, - { 1, 1, 0, 1 }, - { 1, 1, 1, 0 } - }; - - const unsigned cols_to_init = MIN2(type->matrix_columns, - type->vector_elements); - for (unsigned i = 0; i < cols_to_init; i++) { - ir_constant *const col_idx = new(ctx) ir_constant(i); - ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); - - ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); - ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, rhs_swiz[i], - type->vector_elements); - - inst = new(ctx) ir_assignment(col_ref, rhs, NULL); - instructions->push_tail(inst); - } - - for (unsigned i = cols_to_init; i < type->matrix_columns; i++) { - ir_constant *const col_idx = new(ctx) ir_constant(i); - ir_rvalue *const col_ref = new(ctx) ir_dereference_array(var, col_idx); - - ir_rvalue *const rhs_ref = new(ctx) ir_dereference_variable(rhs_var); - ir_rvalue *const rhs = new(ctx) ir_swizzle(rhs_ref, 1, 1, 1, 1, - type->vector_elements); - - inst = new(ctx) ir_assignment(col_ref, rhs, NULL); - instructions->push_tail(inst); - } - } else if (first_param->type->is_matrix()) { - /* From page 50 (56 of the PDF) of the GLSL 1.50 spec: - * - * "If a matrix is constructed from a matrix, then each component - * (column i, row j) in the result that has a corresponding - * component (column i, row j) in the argument will be initialized - * from there. All other components will be initialized to the - * identity matrix. If a matrix argument is given to a matrix - * constructor, it is an error to have any other arguments." - */ - assert(first_param->next->is_tail_sentinel()); - ir_rvalue *const src_matrix = first_param; - - /* If the source matrix is smaller, pre-initialize the relavent parts of - * the destination matrix to the identity matrix. - */ - if ((src_matrix->type->matrix_columns < var->type->matrix_columns) - || (src_matrix->type->vector_elements < var->type->vector_elements)) { - - /* If the source matrix has fewer rows, every column of the destination - * must be initialized. Otherwise only the columns in the destination - * that do not exist in the source must be initialized. - */ - unsigned col = - (src_matrix->type->vector_elements < var->type->vector_elements) - ? 0 : src_matrix->type->matrix_columns; - - const glsl_type *const col_type = var->type->column_type(); - for (/* empty */; col < var->type->matrix_columns; col++) { - ir_constant_data ident; - - ident.f[0] = 0.0; - ident.f[1] = 0.0; - ident.f[2] = 0.0; - ident.f[3] = 0.0; - - ident.f[col] = 1.0; - - ir_rvalue *const rhs = new(ctx) ir_constant(col_type, &ident); - - ir_rvalue *const lhs = - new(ctx) ir_dereference_array(var, new(ctx) ir_constant(col)); - - ir_instruction *inst = new(ctx) ir_assignment(lhs, rhs, NULL); - instructions->push_tail(inst); - } - } - - /* Assign columns from the source matrix to the destination matrix. - * - * Since the parameter will be used in the RHS of multiple assignments, - * generate a temporary and copy the paramter there. - */ - ir_variable *const rhs_var = - new(ctx) ir_variable(first_param->type, "mat_ctor_mat", - ir_var_temporary); - instructions->push_tail(rhs_var); - - ir_dereference *const rhs_var_ref = - new(ctx) ir_dereference_variable(rhs_var); - ir_instruction *const inst = - new(ctx) ir_assignment(rhs_var_ref, first_param, NULL); - instructions->push_tail(inst); - - const unsigned last_row = MIN2(src_matrix->type->vector_elements, - var->type->vector_elements); - const unsigned last_col = MIN2(src_matrix->type->matrix_columns, - var->type->matrix_columns); - - unsigned swiz[4] = { 0, 0, 0, 0 }; - for (unsigned i = 1; i < last_row; i++) - swiz[i] = i; - - const unsigned write_mask = (1U << last_row) - 1; - - for (unsigned i = 0; i < last_col; i++) { - ir_dereference *const lhs = - new(ctx) ir_dereference_array(var, new(ctx) ir_constant(i)); - ir_rvalue *const rhs_col = - new(ctx) ir_dereference_array(rhs_var, new(ctx) ir_constant(i)); - - /* If one matrix has columns that are smaller than the columns of the - * other matrix, wrap the column access of the larger with a swizzle - * so that the LHS and RHS of the assignment have the same size (and - * therefore have the same type). - * - * It would be perfectly valid to unconditionally generate the - * swizzles, this this will typically result in a more compact IR tree. - */ - ir_rvalue *rhs; - if (lhs->type->vector_elements != rhs_col->type->vector_elements) { - rhs = new(ctx) ir_swizzle(rhs_col, swiz, last_row); - } else { - rhs = rhs_col; - } - - ir_instruction *inst = - new(ctx) ir_assignment(lhs, rhs, NULL, write_mask); - instructions->push_tail(inst); - } - } else { - const unsigned cols = type->matrix_columns; - const unsigned rows = type->vector_elements; - unsigned remaining_slots = rows * cols; - unsigned col_idx = 0; - unsigned row_idx = 0; - - foreach_in_list(ir_rvalue, rhs, parameters) { - unsigned rhs_components = rhs->type->components(); - unsigned rhs_base = 0; - - if (remaining_slots == 0) - break; - - /* Since the parameter might be used in the RHS of two assignments, - * generate a temporary and copy the paramter there. - */ - ir_variable *rhs_var = - new(ctx) ir_variable(rhs->type, "mat_ctor_vec", ir_var_temporary); - instructions->push_tail(rhs_var); - - ir_dereference *rhs_var_ref = - new(ctx) ir_dereference_variable(rhs_var); - ir_instruction *inst = new(ctx) ir_assignment(rhs_var_ref, rhs, NULL); - instructions->push_tail(inst); - - do { - /* Assign the current parameter to as many components of the matrix - * as it will fill. - * - * NOTE: A single vector parameter can span two matrix columns. A - * single vec4, for example, can completely fill a mat2. - */ - unsigned count = MIN2(rows - row_idx, - rhs_components - rhs_base); - - rhs_var_ref = new(ctx) ir_dereference_variable(rhs_var); - ir_instruction *inst = assign_to_matrix_column(var, col_idx, - row_idx, - rhs_var_ref, - rhs_base, - count, ctx); - instructions->push_tail(inst); - rhs_base += count; - row_idx += count; - remaining_slots -= count; - - /* Sometimes, there is still data left in the parameters and - * components left to be set in the destination but in other - * column. - */ - if (row_idx >= rows) { - row_idx = 0; - col_idx++; - } - } while(remaining_slots > 0 && rhs_base < rhs_components); - } - } - - return new(ctx) ir_dereference_variable(var); -} - - -ir_rvalue * -emit_inline_record_constructor(const glsl_type *type, - exec_list *instructions, - exec_list *parameters, - void *mem_ctx) -{ - ir_variable *const var = - new(mem_ctx) ir_variable(type, "record_ctor", ir_var_temporary); - ir_dereference_variable *const d = new(mem_ctx) ir_dereference_variable(var); - - instructions->push_tail(var); - - exec_node *node = parameters->head; - for (unsigned i = 0; i < type->length; i++) { - assert(!node->is_tail_sentinel()); - - ir_dereference *const lhs = - new(mem_ctx) ir_dereference_record(d->clone(mem_ctx, NULL), - type->fields.structure[i].name); - - ir_rvalue *const rhs = ((ir_instruction *) node)->as_rvalue(); - assert(rhs != NULL); - - ir_instruction *const assign = new(mem_ctx) ir_assignment(lhs, rhs, NULL); - - instructions->push_tail(assign); - node = node->next; - } - - return d; -} - - -static ir_rvalue * -process_record_constructor(exec_list *instructions, - const glsl_type *constructor_type, - YYLTYPE *loc, exec_list *parameters, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - exec_list actual_parameters; - - process_parameters(instructions, &actual_parameters, - parameters, state); - - exec_node *node = actual_parameters.head; - for (unsigned i = 0; i < constructor_type->length; i++) { - ir_rvalue *ir = (ir_rvalue *) node; - - if (node->is_tail_sentinel()) { - _mesa_glsl_error(loc, state, - "insufficient parameters to constructor for `%s'", - constructor_type->name); - return ir_rvalue::error_value(ctx); - } - - if (apply_implicit_conversion(constructor_type->fields.structure[i].type, - ir, state)) { - node->replace_with(ir); - } else { - _mesa_glsl_error(loc, state, - "parameter type mismatch in constructor for `%s.%s' " - "(%s vs %s)", - constructor_type->name, - constructor_type->fields.structure[i].name, - ir->type->name, - constructor_type->fields.structure[i].type->name); - return ir_rvalue::error_value(ctx);; - } - - node = node->next; - } - - if (!node->is_tail_sentinel()) { - _mesa_glsl_error(loc, state, "too many parameters in constructor " - "for `%s'", constructor_type->name); - return ir_rvalue::error_value(ctx); - } - - ir_rvalue *const constant = - constant_record_constructor(constructor_type, &actual_parameters, - state); - - return (constant != NULL) - ? constant - : emit_inline_record_constructor(constructor_type, instructions, - &actual_parameters, state); -} - -ir_rvalue * -ast_function_expression::handle_method(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - const ast_expression *field = subexpressions[0]; - ir_rvalue *op; - ir_rvalue *result; - void *ctx = state; - /* Handle "method calls" in GLSL 1.20 - namely, array.length() */ - YYLTYPE loc = get_location(); - state->check_version(120, 300, &loc, "methods not supported"); - - const char *method; - method = field->primary_expression.identifier; - - op = field->subexpressions[0]->hir(instructions, state); - if (strcmp(method, "length") == 0) { - if (!this->expressions.is_empty()) { - _mesa_glsl_error(&loc, state, "length method takes no arguments"); - goto fail; - } - - if (op->type->is_array()) { - if (op->type->is_unsized_array()) { - if (!state->has_shader_storage_buffer_objects()) { - _mesa_glsl_error(&loc, state, "length called on unsized array" - " only available with " - "ARB_shader_storage_buffer_object"); - } - /* Calculate length of an unsized array in run-time */ - result = new(ctx) ir_expression(ir_unop_ssbo_unsized_array_length, op); - } else { - result = new(ctx) ir_constant(op->type->array_size()); - } - } else if (op->type->is_vector()) { - if (state->has_420pack()) { - /* .length() returns int. */ - result = new(ctx) ir_constant((int) op->type->vector_elements); - } else { - _mesa_glsl_error(&loc, state, "length method on matrix only available" - "with ARB_shading_language_420pack"); - goto fail; - } - } else if (op->type->is_matrix()) { - if (state->has_420pack()) { - /* .length() returns int. */ - result = new(ctx) ir_constant((int) op->type->matrix_columns); - } else { - _mesa_glsl_error(&loc, state, "length method on matrix only available" - "with ARB_shading_language_420pack"); - goto fail; - } - } else { - _mesa_glsl_error(&loc, state, "length called on scalar."); - goto fail; - } - } else { - _mesa_glsl_error(&loc, state, "unknown method: `%s'", method); - goto fail; - } - return result; -fail: - return ir_rvalue::error_value(ctx); -} - -ir_rvalue * -ast_function_expression::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - /* There are three sorts of function calls. - * - * 1. constructors - The first subexpression is an ast_type_specifier. - * 2. methods - Only the .length() method of array types. - * 3. functions - Calls to regular old functions. - * - */ - if (is_constructor()) { - const ast_type_specifier *type = (ast_type_specifier *) subexpressions[0]; - YYLTYPE loc = type->get_location(); - const char *name; - - const glsl_type *const constructor_type = type->glsl_type(& name, state); - - /* constructor_type can be NULL if a variable with the same name as the - * structure has come into scope. - */ - if (constructor_type == NULL) { - _mesa_glsl_error(& loc, state, "unknown type `%s' (structure name " - "may be shadowed by a variable with the same name)", - type->type_name); - return ir_rvalue::error_value(ctx); - } - - - /* Constructors for opaque types are illegal. - */ - if (constructor_type->contains_opaque()) { - _mesa_glsl_error(& loc, state, "cannot construct opaque type `%s'", - constructor_type->name); - return ir_rvalue::error_value(ctx); - } - - if (constructor_type->is_array()) { - if (!state->check_version(120, 300, &loc, - "array constructors forbidden")) { - return ir_rvalue::error_value(ctx); - } - - return process_array_constructor(instructions, constructor_type, - & loc, &this->expressions, state); - } - - - /* There are two kinds of constructor calls. Constructors for arrays and - * structures must have the exact number of arguments with matching types - * in the correct order. These constructors follow essentially the same - * type matching rules as functions. - * - * Constructors for built-in language types, such as mat4 and vec2, are - * free form. The only requirements are that the parameters must provide - * enough values of the correct scalar type and that no arguments are - * given past the last used argument. - * - * When using the C-style initializer syntax from GLSL 4.20, constructors - * must have the exact number of arguments with matching types in the - * correct order. - */ - if (constructor_type->is_record()) { - return process_record_constructor(instructions, constructor_type, - &loc, &this->expressions, - state); - } - - if (!constructor_type->is_numeric() && !constructor_type->is_boolean()) - return ir_rvalue::error_value(ctx); - - /* Total number of components of the type being constructed. */ - const unsigned type_components = constructor_type->components(); - - /* Number of components from parameters that have actually been - * consumed. This is used to perform several kinds of error checking. - */ - unsigned components_used = 0; - - unsigned matrix_parameters = 0; - unsigned nonmatrix_parameters = 0; - exec_list actual_parameters; - - foreach_list_typed(ast_node, ast, link, &this->expressions) { - ir_rvalue *result = ast->hir(instructions, state); - - /* From page 50 (page 56 of the PDF) of the GLSL 1.50 spec: - * - * "It is an error to provide extra arguments beyond this - * last used argument." - */ - if (components_used >= type_components) { - _mesa_glsl_error(& loc, state, "too many parameters to `%s' " - "constructor", - constructor_type->name); - return ir_rvalue::error_value(ctx); - } - - if (!result->type->is_numeric() && !result->type->is_boolean()) { - _mesa_glsl_error(& loc, state, "cannot construct `%s' from a " - "non-numeric data type", - constructor_type->name); - return ir_rvalue::error_value(ctx); - } - - /* Count the number of matrix and nonmatrix parameters. This - * is used below to enforce some of the constructor rules. - */ - if (result->type->is_matrix()) - matrix_parameters++; - else - nonmatrix_parameters++; - - actual_parameters.push_tail(result); - components_used += result->type->components(); - } - - /* From page 28 (page 34 of the PDF) of the GLSL 1.10 spec: - * - * "It is an error to construct matrices from other matrices. This - * is reserved for future use." - */ - if (matrix_parameters > 0 - && constructor_type->is_matrix() - && !state->check_version(120, 100, &loc, - "cannot construct `%s' from a matrix", - constructor_type->name)) { - return ir_rvalue::error_value(ctx); - } - - /* From page 50 (page 56 of the PDF) of the GLSL 1.50 spec: - * - * "If a matrix argument is given to a matrix constructor, it is - * an error to have any other arguments." - */ - if ((matrix_parameters > 0) - && ((matrix_parameters + nonmatrix_parameters) > 1) - && constructor_type->is_matrix()) { - _mesa_glsl_error(& loc, state, "for matrix `%s' constructor, " - "matrix must be only parameter", - constructor_type->name); - return ir_rvalue::error_value(ctx); - } - - /* From page 28 (page 34 of the PDF) of the GLSL 1.10 spec: - * - * "In these cases, there must be enough components provided in the - * arguments to provide an initializer for every component in the - * constructed value." - */ - if (components_used < type_components && components_used != 1 - && matrix_parameters == 0) { - _mesa_glsl_error(& loc, state, "too few components to construct " - "`%s'", - constructor_type->name); - return ir_rvalue::error_value(ctx); - } - - /* Matrices can never be consumed as is by any constructor but matrix - * constructors. If the constructor type is not matrix, always break the - * matrix up into a series of column vectors. - */ - if (!constructor_type->is_matrix()) { - foreach_in_list_safe(ir_rvalue, matrix, &actual_parameters) { - if (!matrix->type->is_matrix()) - continue; - - /* Create a temporary containing the matrix. */ - ir_variable *var = new(ctx) ir_variable(matrix->type, "matrix_tmp", - ir_var_temporary); - instructions->push_tail(var); - instructions->push_tail(new(ctx) ir_assignment(new(ctx) - ir_dereference_variable(var), matrix, NULL)); - var->constant_value = matrix->constant_expression_value(); - - /* Replace the matrix with dereferences of its columns. */ - for (int i = 0; i < matrix->type->matrix_columns; i++) { - matrix->insert_before(new (ctx) ir_dereference_array(var, - new(ctx) ir_constant(i))); - } - matrix->remove(); - } - } - - bool all_parameters_are_constant = true; - - /* Type cast each parameter and, if possible, fold constants.*/ - foreach_in_list_safe(ir_rvalue, ir, &actual_parameters) { - const glsl_type *desired_type = - glsl_type::get_instance(constructor_type->base_type, - ir->type->vector_elements, - ir->type->matrix_columns); - ir_rvalue *result = convert_component(ir, desired_type); - - /* Attempt to convert the parameter to a constant valued expression. - * After doing so, track whether or not all the parameters to the - * constructor are trivially constant valued expressions. - */ - ir_rvalue *const constant = result->constant_expression_value(); - - if (constant != NULL) - result = constant; - else - all_parameters_are_constant = false; - - if (result != ir) { - ir->replace_with(result); - } - } - - /* If all of the parameters are trivially constant, create a - * constant representing the complete collection of parameters. - */ - if (all_parameters_are_constant) { - return new(ctx) ir_constant(constructor_type, &actual_parameters); - } else if (constructor_type->is_scalar()) { - return dereference_component((ir_rvalue *) actual_parameters.head, - 0); - } else if (constructor_type->is_vector()) { - return emit_inline_vector_constructor(constructor_type, - instructions, - &actual_parameters, - ctx); - } else { - assert(constructor_type->is_matrix()); - return emit_inline_matrix_constructor(constructor_type, - instructions, - &actual_parameters, - ctx); - } - } else if (subexpressions[0]->oper == ast_field_selection) { - return handle_method(instructions, state); - } else { - const ast_expression *id = subexpressions[0]; - const char *func_name; - YYLTYPE loc = get_location(); - exec_list actual_parameters; - ir_variable *sub_var = NULL; - ir_rvalue *array_idx = NULL; - - process_parameters(instructions, &actual_parameters, &this->expressions, - state); - - if (id->oper == ast_array_index) { - array_idx = generate_array_index(ctx, instructions, state, loc, - id->subexpressions[0], - id->subexpressions[1], &func_name, - &actual_parameters); - } else { - func_name = id->primary_expression.identifier; - } - - ir_function_signature *sig = - match_function_by_name(func_name, &actual_parameters, state); - - ir_rvalue *value = NULL; - if (sig == NULL) { - sig = match_subroutine_by_name(func_name, &actual_parameters, state, &sub_var); - } - - if (sig == NULL) { - no_matching_function_error(func_name, &loc, &actual_parameters, state); - value = ir_rvalue::error_value(ctx); - } else if (!verify_parameter_modes(state, sig, actual_parameters, this->expressions)) { - /* an error has already been emitted */ - value = ir_rvalue::error_value(ctx); - } else { - value = generate_call(instructions, sig, &actual_parameters, sub_var, array_idx, state); - if (!value) { - ir_variable *const tmp = new(ctx) ir_variable(glsl_type::void_type, - "void_var", - ir_var_temporary); - instructions->push_tail(tmp); - value = new(ctx) ir_dereference_variable(tmp); - } - } - - return value; - } - - unreachable("not reached"); -} - -bool -ast_function_expression::has_sequence_subexpression() const -{ - foreach_list_typed(const ast_node, ast, link, &this->expressions) { - if (ast->has_sequence_subexpression()) - return true; - } - - return false; -} - -ir_rvalue * -ast_aggregate_initializer::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - YYLTYPE loc = this->get_location(); - - if (!this->constructor_type) { - _mesa_glsl_error(&loc, state, "type of C-style initializer unknown"); - return ir_rvalue::error_value(ctx); - } - const glsl_type *const constructor_type = this->constructor_type; - - if (!state->has_420pack()) { - _mesa_glsl_error(&loc, state, "C-style initialization requires the " - "GL_ARB_shading_language_420pack extension"); - return ir_rvalue::error_value(ctx); - } - - if (constructor_type->is_array()) { - return process_array_constructor(instructions, constructor_type, &loc, - &this->expressions, state); - } - - if (constructor_type->is_record()) { - return process_record_constructor(instructions, constructor_type, &loc, - &this->expressions, state); - } - - return process_vec_mat_constructor(instructions, constructor_type, &loc, - &this->expressions, state); -} diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp deleted file mode 100644 index dfd31966eb0..00000000000 --- a/src/glsl/ast_to_hir.cpp +++ /dev/null @@ -1,7583 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ast_to_hir.c - * Convert abstract syntax to to high-level intermediate reprensentation (HIR). - * - * During the conversion to HIR, the majority of the symantic checking is - * preformed on the program. This includes: - * - * * Symbol table management - * * Type checking - * * Function binding - * - * The majority of this work could be done during parsing, and the parser could - * probably generate HIR directly. However, this results in frequent changes - * to the parser code. Since we do not assume that every system this complier - * is built on will have Flex and Bison installed, we have to store the code - * generated by these tools in our version control system. In other parts of - * the system we've seen problems where a parser was changed but the generated - * code was not committed, merge conflicts where created because two developers - * had slightly different versions of Bison installed, etc. - * - * I have also noticed that running Bison generated parsers in GDB is very - * irritating. When you get a segfault on '$$ = $1->foo', you can't very - * well 'print $1' in GDB. - * - * As a result, my preference is to put as little C code as possible in the - * parser (and lexer) sources. - */ - -#include "glsl_symbol_table.h" -#include "glsl_parser_extras.h" -#include "ast.h" -#include "compiler/glsl_types.h" -#include "program/hash_table.h" -#include "main/shaderobj.h" -#include "ir.h" -#include "ir_builder.h" - -using namespace ir_builder; - -static void -detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, - exec_list *instructions); -static void -remove_per_vertex_blocks(exec_list *instructions, - _mesa_glsl_parse_state *state, ir_variable_mode mode); - -/** - * Visitor class that finds the first instance of any write-only variable that - * is ever read, if any - */ -class read_from_write_only_variable_visitor : public ir_hierarchical_visitor -{ -public: - read_from_write_only_variable_visitor() : found(NULL) - { - } - - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - if (this->in_assignee) - return visit_continue; - - ir_variable *var = ir->variable_referenced(); - /* We can have image_write_only set on both images and buffer variables, - * but in the former there is a distinction between reads from - * the variable itself (write_only) and from the memory they point to - * (image_write_only), while in the case of buffer variables there is - * no such distinction, that is why this check here is limited to - * buffer variables alone. - */ - if (!var || var->data.mode != ir_var_shader_storage) - return visit_continue; - - if (var->data.image_write_only) { - found = var; - return visit_stop; - } - - return visit_continue; - } - - ir_variable *get_variable() { - return found; - } - - virtual ir_visitor_status visit_enter(ir_expression *ir) - { - /* .length() doesn't actually read anything */ - if (ir->operation == ir_unop_ssbo_unsized_array_length) - return visit_continue_with_parent; - - return visit_continue; - } - -private: - ir_variable *found; -}; - -void -_mesa_ast_to_hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) -{ - _mesa_glsl_initialize_variables(instructions, state); - - state->symbols->separate_function_namespace = state->language_version == 110; - - state->current_function = NULL; - - state->toplevel_ir = instructions; - - state->gs_input_prim_type_specified = false; - state->tcs_output_vertices_specified = false; - state->cs_input_local_size_specified = false; - - /* Section 4.2 of the GLSL 1.20 specification states: - * "The built-in functions are scoped in a scope outside the global scope - * users declare global variables in. That is, a shader's global scope, - * available for user-defined functions and global variables, is nested - * inside the scope containing the built-in functions." - * - * Since built-in functions like ftransform() access built-in variables, - * it follows that those must be in the outer scope as well. - * - * We push scope here to create this nesting effect...but don't pop. - * This way, a shader's globals are still in the symbol table for use - * by the linker. - */ - state->symbols->push_scope(); - - foreach_list_typed (ast_node, ast, link, & state->translation_unit) - ast->hir(instructions, state); - - detect_recursion_unlinked(state, instructions); - detect_conflicting_assignments(state, instructions); - - state->toplevel_ir = NULL; - - /* Move all of the variable declarations to the front of the IR list, and - * reverse the order. This has the (intended!) side effect that vertex - * shader inputs and fragment shader outputs will appear in the IR in the - * same order that they appeared in the shader code. This results in the - * locations being assigned in the declared order. Many (arguably buggy) - * applications depend on this behavior, and it matches what nearly all - * other drivers do. - */ - foreach_in_list_safe(ir_instruction, node, instructions) { - ir_variable *const var = node->as_variable(); - - if (var == NULL) - continue; - - var->remove(); - instructions->push_head(var); - } - - /* Figure out if gl_FragCoord is actually used in fragment shader */ - ir_variable *const var = state->symbols->get_variable("gl_FragCoord"); - if (var != NULL) - state->fs_uses_gl_fragcoord = var->data.used; - - /* From section 7.1 (Built-In Language Variables) of the GLSL 4.10 spec: - * - * If multiple shaders using members of a built-in block belonging to - * the same interface are linked together in the same program, they - * must all redeclare the built-in block in the same way, as described - * in section 4.3.7 "Interface Blocks" for interface block matching, or - * a link error will result. - * - * The phrase "using members of a built-in block" implies that if two - * shaders are linked together and one of them *does not use* any members - * of the built-in block, then that shader does not need to have a matching - * redeclaration of the built-in block. - * - * This appears to be a clarification to the behaviour established for - * gl_PerVertex by GLSL 1.50, therefore implement it regardless of GLSL - * version. - * - * The definition of "interface" in section 4.3.7 that applies here is as - * follows: - * - * The boundary between adjacent programmable pipeline stages: This - * spans all the outputs in all compilation units of the first stage - * and all the inputs in all compilation units of the second stage. - * - * Therefore this rule applies to both inter- and intra-stage linking. - * - * The easiest way to implement this is to check whether the shader uses - * gl_PerVertex right after ast-to-ir conversion, and if it doesn't, simply - * remove all the relevant variable declaration from the IR, so that the - * linker won't see them and complain about mismatches. - */ - remove_per_vertex_blocks(instructions, state, ir_var_shader_in); - remove_per_vertex_blocks(instructions, state, ir_var_shader_out); - - /* Check that we don't have reads from write-only variables */ - read_from_write_only_variable_visitor v; - v.run(instructions); - ir_variable *error_var = v.get_variable(); - if (error_var) { - /* It would be nice to have proper location information, but for that - * we would need to check this as we process each kind of AST node - */ - YYLTYPE loc; - memset(&loc, 0, sizeof(loc)); - _mesa_glsl_error(&loc, state, "Read from write-only variable `%s'", - error_var->name); - } -} - - -static ir_expression_operation -get_conversion_operation(const glsl_type *to, const glsl_type *from, - struct _mesa_glsl_parse_state *state) -{ - switch (to->base_type) { - case GLSL_TYPE_FLOAT: - switch (from->base_type) { - case GLSL_TYPE_INT: return ir_unop_i2f; - case GLSL_TYPE_UINT: return ir_unop_u2f; - case GLSL_TYPE_DOUBLE: return ir_unop_d2f; - default: return (ir_expression_operation)0; - } - - case GLSL_TYPE_UINT: - if (!state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) - return (ir_expression_operation)0; - switch (from->base_type) { - case GLSL_TYPE_INT: return ir_unop_i2u; - default: return (ir_expression_operation)0; - } - - case GLSL_TYPE_DOUBLE: - if (!state->has_double()) - return (ir_expression_operation)0; - switch (from->base_type) { - case GLSL_TYPE_INT: return ir_unop_i2d; - case GLSL_TYPE_UINT: return ir_unop_u2d; - case GLSL_TYPE_FLOAT: return ir_unop_f2d; - default: return (ir_expression_operation)0; - } - - default: return (ir_expression_operation)0; - } -} - - -/** - * If a conversion is available, convert one operand to a different type - * - * The \c from \c ir_rvalue is converted "in place". - * - * \param to Type that the operand it to be converted to - * \param from Operand that is being converted - * \param state GLSL compiler state - * - * \return - * If a conversion is possible (or unnecessary), \c true is returned. - * Otherwise \c false is returned. - */ -bool -apply_implicit_conversion(const glsl_type *to, ir_rvalue * &from, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - if (to->base_type == from->type->base_type) - return true; - - /* Prior to GLSL 1.20, there are no implicit conversions */ - if (!state->is_version(120, 0)) - return false; - - /* From page 27 (page 33 of the PDF) of the GLSL 1.50 spec: - * - * "There are no implicit array or structure conversions. For - * example, an array of int cannot be implicitly converted to an - * array of float. - */ - if (!to->is_numeric() || !from->type->is_numeric()) - return false; - - /* We don't actually want the specific type `to`, we want a type - * with the same base type as `to`, but the same vector width as - * `from`. - */ - to = glsl_type::get_instance(to->base_type, from->type->vector_elements, - from->type->matrix_columns); - - ir_expression_operation op = get_conversion_operation(to, from->type, state); - if (op) { - from = new(ctx) ir_expression(op, to, from, NULL); - return true; - } else { - return false; - } -} - - -static const struct glsl_type * -arithmetic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, - bool multiply, - struct _mesa_glsl_parse_state *state, YYLTYPE *loc) -{ - const glsl_type *type_a = value_a->type; - const glsl_type *type_b = value_b->type; - - /* From GLSL 1.50 spec, page 56: - * - * "The arithmetic binary operators add (+), subtract (-), - * multiply (*), and divide (/) operate on integer and - * floating-point scalars, vectors, and matrices." - */ - if (!type_a->is_numeric() || !type_b->is_numeric()) { - _mesa_glsl_error(loc, state, - "operands to arithmetic operators must be numeric"); - return glsl_type::error_type; - } - - - /* "If one operand is floating-point based and the other is - * not, then the conversions from Section 4.1.10 "Implicit - * Conversions" are applied to the non-floating-point-based operand." - */ - if (!apply_implicit_conversion(type_a, value_b, state) - && !apply_implicit_conversion(type_b, value_a, state)) { - _mesa_glsl_error(loc, state, - "could not implicitly convert operands to " - "arithmetic operator"); - return glsl_type::error_type; - } - type_a = value_a->type; - type_b = value_b->type; - - /* "If the operands are integer types, they must both be signed or - * both be unsigned." - * - * From this rule and the preceeding conversion it can be inferred that - * both types must be GLSL_TYPE_FLOAT, or GLSL_TYPE_UINT, or GLSL_TYPE_INT. - * The is_numeric check above already filtered out the case where either - * type is not one of these, so now the base types need only be tested for - * equality. - */ - if (type_a->base_type != type_b->base_type) { - _mesa_glsl_error(loc, state, - "base type mismatch for arithmetic operator"); - return glsl_type::error_type; - } - - /* "All arithmetic binary operators result in the same fundamental type - * (signed integer, unsigned integer, or floating-point) as the - * operands they operate on, after operand type conversion. After - * conversion, the following cases are valid - * - * * The two operands are scalars. In this case the operation is - * applied, resulting in a scalar." - */ - if (type_a->is_scalar() && type_b->is_scalar()) - return type_a; - - /* "* One operand is a scalar, and the other is a vector or matrix. - * In this case, the scalar operation is applied independently to each - * component of the vector or matrix, resulting in the same size - * vector or matrix." - */ - if (type_a->is_scalar()) { - if (!type_b->is_scalar()) - return type_b; - } else if (type_b->is_scalar()) { - return type_a; - } - - /* All of the combinations of , , - * , , and have been - * handled. - */ - assert(!type_a->is_scalar()); - assert(!type_b->is_scalar()); - - /* "* The two operands are vectors of the same size. In this case, the - * operation is done component-wise resulting in the same size - * vector." - */ - if (type_a->is_vector() && type_b->is_vector()) { - if (type_a == type_b) { - return type_a; - } else { - _mesa_glsl_error(loc, state, - "vector size mismatch for arithmetic operator"); - return glsl_type::error_type; - } - } - - /* All of the combinations of , , - * , , , and - * have been handled. At least one of the operands must - * be matrix. Further, since there are no integer matrix types, the base - * type of both operands must be float. - */ - assert(type_a->is_matrix() || type_b->is_matrix()); - assert(type_a->base_type == GLSL_TYPE_FLOAT || - type_a->base_type == GLSL_TYPE_DOUBLE); - assert(type_b->base_type == GLSL_TYPE_FLOAT || - type_b->base_type == GLSL_TYPE_DOUBLE); - - /* "* The operator is add (+), subtract (-), or divide (/), and the - * operands are matrices with the same number of rows and the same - * number of columns. In this case, the operation is done component- - * wise resulting in the same size matrix." - * * The operator is multiply (*), where both operands are matrices or - * one operand is a vector and the other a matrix. A right vector - * operand is treated as a column vector and a left vector operand as a - * row vector. In all these cases, it is required that the number of - * columns of the left operand is equal to the number of rows of the - * right operand. Then, the multiply (*) operation does a linear - * algebraic multiply, yielding an object that has the same number of - * rows as the left operand and the same number of columns as the right - * operand. Section 5.10 "Vector and Matrix Operations" explains in - * more detail how vectors and matrices are operated on." - */ - if (! multiply) { - if (type_a == type_b) - return type_a; - } else { - const glsl_type *type = glsl_type::get_mul_type(type_a, type_b); - - if (type == glsl_type::error_type) { - _mesa_glsl_error(loc, state, - "size mismatch for matrix multiplication"); - } - - return type; - } - - - /* "All other cases are illegal." - */ - _mesa_glsl_error(loc, state, "type mismatch"); - return glsl_type::error_type; -} - - -static const struct glsl_type * -unary_arithmetic_result_type(const struct glsl_type *type, - struct _mesa_glsl_parse_state *state, YYLTYPE *loc) -{ - /* From GLSL 1.50 spec, page 57: - * - * "The arithmetic unary operators negate (-), post- and pre-increment - * and decrement (-- and ++) operate on integer or floating-point - * values (including vectors and matrices). All unary operators work - * component-wise on their operands. These result with the same type - * they operated on." - */ - if (!type->is_numeric()) { - _mesa_glsl_error(loc, state, - "operands to arithmetic operators must be numeric"); - return glsl_type::error_type; - } - - return type; -} - -/** - * \brief Return the result type of a bit-logic operation. - * - * If the given types to the bit-logic operator are invalid, return - * glsl_type::error_type. - * - * \param value_a LHS of bit-logic op - * \param value_b RHS of bit-logic op - */ -static const struct glsl_type * -bit_logic_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, - ast_operators op, - struct _mesa_glsl_parse_state *state, YYLTYPE *loc) -{ - const glsl_type *type_a = value_a->type; - const glsl_type *type_b = value_b->type; - - if (!state->check_bitwise_operations_allowed(loc)) { - return glsl_type::error_type; - } - - /* From page 50 (page 56 of PDF) of GLSL 1.30 spec: - * - * "The bitwise operators and (&), exclusive-or (^), and inclusive-or - * (|). The operands must be of type signed or unsigned integers or - * integer vectors." - */ - if (!type_a->is_integer()) { - _mesa_glsl_error(loc, state, "LHS of `%s' must be an integer", - ast_expression::operator_string(op)); - return glsl_type::error_type; - } - if (!type_b->is_integer()) { - _mesa_glsl_error(loc, state, "RHS of `%s' must be an integer", - ast_expression::operator_string(op)); - return glsl_type::error_type; - } - - /* Prior to GLSL 4.0 / GL_ARB_gpu_shader5, implicit conversions didn't - * make sense for bitwise operations, as they don't operate on floats. - * - * GLSL 4.0 added implicit int -> uint conversions, which are relevant - * here. It wasn't clear whether or not we should apply them to bitwise - * operations. However, Khronos has decided that they should in future - * language revisions. Applications also rely on this behavior. We opt - * to apply them in general, but issue a portability warning. - * - * See https://www.khronos.org/bugzilla/show_bug.cgi?id=1405 - */ - if (type_a->base_type != type_b->base_type) { - if (!apply_implicit_conversion(type_a, value_b, state) - && !apply_implicit_conversion(type_b, value_a, state)) { - _mesa_glsl_error(loc, state, - "could not implicitly convert operands to " - "`%s` operator", - ast_expression::operator_string(op)); - return glsl_type::error_type; - } else { - _mesa_glsl_warning(loc, state, - "some implementations may not support implicit " - "int -> uint conversions for `%s' operators; " - "consider casting explicitly for portability", - ast_expression::operator_string(op)); - } - type_a = value_a->type; - type_b = value_b->type; - } - - /* "The fundamental types of the operands (signed or unsigned) must - * match," - */ - if (type_a->base_type != type_b->base_type) { - _mesa_glsl_error(loc, state, "operands of `%s' must have the same " - "base type", ast_expression::operator_string(op)); - return glsl_type::error_type; - } - - /* "The operands cannot be vectors of differing size." */ - if (type_a->is_vector() && - type_b->is_vector() && - type_a->vector_elements != type_b->vector_elements) { - _mesa_glsl_error(loc, state, "operands of `%s' cannot be vectors of " - "different sizes", ast_expression::operator_string(op)); - return glsl_type::error_type; - } - - /* "If one operand is a scalar and the other a vector, the scalar is - * applied component-wise to the vector, resulting in the same type as - * the vector. The fundamental types of the operands [...] will be the - * resulting fundamental type." - */ - if (type_a->is_scalar()) - return type_b; - else - return type_a; -} - -static const struct glsl_type * -modulus_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, - struct _mesa_glsl_parse_state *state, YYLTYPE *loc) -{ - const glsl_type *type_a = value_a->type; - const glsl_type *type_b = value_b->type; - - if (!state->check_version(130, 300, loc, "operator '%%' is reserved")) { - return glsl_type::error_type; - } - - /* Section 5.9 (Expressions) of the GLSL 4.00 specification says: - * - * "The operator modulus (%) operates on signed or unsigned integers or - * integer vectors." - */ - if (!type_a->is_integer()) { - _mesa_glsl_error(loc, state, "LHS of operator %% must be an integer"); - return glsl_type::error_type; - } - if (!type_b->is_integer()) { - _mesa_glsl_error(loc, state, "RHS of operator %% must be an integer"); - return glsl_type::error_type; - } - - /* "If the fundamental types in the operands do not match, then the - * conversions from section 4.1.10 "Implicit Conversions" are applied - * to create matching types." - * - * Note that GLSL 4.00 (and GL_ARB_gpu_shader5) introduced implicit - * int -> uint conversion rules. Prior to that, there were no implicit - * conversions. So it's harmless to apply them universally - no implicit - * conversions will exist. If the types don't match, we'll receive false, - * and raise an error, satisfying the GLSL 1.50 spec, page 56: - * - * "The operand types must both be signed or unsigned." - */ - if (!apply_implicit_conversion(type_a, value_b, state) && - !apply_implicit_conversion(type_b, value_a, state)) { - _mesa_glsl_error(loc, state, - "could not implicitly convert operands to " - "modulus (%%) operator"); - return glsl_type::error_type; - } - type_a = value_a->type; - type_b = value_b->type; - - /* "The operands cannot be vectors of differing size. If one operand is - * a scalar and the other vector, then the scalar is applied component- - * wise to the vector, resulting in the same type as the vector. If both - * are vectors of the same size, the result is computed component-wise." - */ - if (type_a->is_vector()) { - if (!type_b->is_vector() - || (type_a->vector_elements == type_b->vector_elements)) - return type_a; - } else - return type_b; - - /* "The operator modulus (%) is not defined for any other data types - * (non-integer types)." - */ - _mesa_glsl_error(loc, state, "type mismatch"); - return glsl_type::error_type; -} - - -static const struct glsl_type * -relational_result_type(ir_rvalue * &value_a, ir_rvalue * &value_b, - struct _mesa_glsl_parse_state *state, YYLTYPE *loc) -{ - const glsl_type *type_a = value_a->type; - const glsl_type *type_b = value_b->type; - - /* From GLSL 1.50 spec, page 56: - * "The relational operators greater than (>), less than (<), greater - * than or equal (>=), and less than or equal (<=) operate only on - * scalar integer and scalar floating-point expressions." - */ - if (!type_a->is_numeric() - || !type_b->is_numeric() - || !type_a->is_scalar() - || !type_b->is_scalar()) { - _mesa_glsl_error(loc, state, - "operands to relational operators must be scalar and " - "numeric"); - return glsl_type::error_type; - } - - /* "Either the operands' types must match, or the conversions from - * Section 4.1.10 "Implicit Conversions" will be applied to the integer - * operand, after which the types must match." - */ - if (!apply_implicit_conversion(type_a, value_b, state) - && !apply_implicit_conversion(type_b, value_a, state)) { - _mesa_glsl_error(loc, state, - "could not implicitly convert operands to " - "relational operator"); - return glsl_type::error_type; - } - type_a = value_a->type; - type_b = value_b->type; - - if (type_a->base_type != type_b->base_type) { - _mesa_glsl_error(loc, state, "base type mismatch"); - return glsl_type::error_type; - } - - /* "The result is scalar Boolean." - */ - return glsl_type::bool_type; -} - -/** - * \brief Return the result type of a bit-shift operation. - * - * If the given types to the bit-shift operator are invalid, return - * glsl_type::error_type. - * - * \param type_a Type of LHS of bit-shift op - * \param type_b Type of RHS of bit-shift op - */ -static const struct glsl_type * -shift_result_type(const struct glsl_type *type_a, - const struct glsl_type *type_b, - ast_operators op, - struct _mesa_glsl_parse_state *state, YYLTYPE *loc) -{ - if (!state->check_bitwise_operations_allowed(loc)) { - return glsl_type::error_type; - } - - /* From page 50 (page 56 of the PDF) of the GLSL 1.30 spec: - * - * "The shift operators (<<) and (>>). For both operators, the operands - * must be signed or unsigned integers or integer vectors. One operand - * can be signed while the other is unsigned." - */ - if (!type_a->is_integer()) { - _mesa_glsl_error(loc, state, "LHS of operator %s must be an integer or " - "integer vector", ast_expression::operator_string(op)); - return glsl_type::error_type; - - } - if (!type_b->is_integer()) { - _mesa_glsl_error(loc, state, "RHS of operator %s must be an integer or " - "integer vector", ast_expression::operator_string(op)); - return glsl_type::error_type; - } - - /* "If the first operand is a scalar, the second operand has to be - * a scalar as well." - */ - if (type_a->is_scalar() && !type_b->is_scalar()) { - _mesa_glsl_error(loc, state, "if the first operand of %s is scalar, the " - "second must be scalar as well", - ast_expression::operator_string(op)); - return glsl_type::error_type; - } - - /* If both operands are vectors, check that they have same number of - * elements. - */ - if (type_a->is_vector() && - type_b->is_vector() && - type_a->vector_elements != type_b->vector_elements) { - _mesa_glsl_error(loc, state, "vector operands to operator %s must " - "have same number of elements", - ast_expression::operator_string(op)); - return glsl_type::error_type; - } - - /* "In all cases, the resulting type will be the same type as the left - * operand." - */ - return type_a; -} - -/** - * Returns the innermost array index expression in an rvalue tree. - * This is the largest indexing level -- if an array of blocks, then - * it is the block index rather than an indexing expression for an - * array-typed member of an array of blocks. - */ -static ir_rvalue * -find_innermost_array_index(ir_rvalue *rv) -{ - ir_dereference_array *last = NULL; - while (rv) { - if (rv->as_dereference_array()) { - last = rv->as_dereference_array(); - rv = last->array; - } else if (rv->as_dereference_record()) - rv = rv->as_dereference_record()->record; - else if (rv->as_swizzle()) - rv = rv->as_swizzle()->val; - else - rv = NULL; - } - - if (last) - return last->array_index; - - return NULL; -} - -/** - * Validates that a value can be assigned to a location with a specified type - * - * Validates that \c rhs can be assigned to some location. If the types are - * not an exact match but an automatic conversion is possible, \c rhs will be - * converted. - * - * \return - * \c NULL if \c rhs cannot be assigned to a location with type \c lhs_type. - * Otherwise the actual RHS to be assigned will be returned. This may be - * \c rhs, or it may be \c rhs after some type conversion. - * - * \note - * In addition to being used for assignments, this function is used to - * type-check return values. - */ -static ir_rvalue * -validate_assignment(struct _mesa_glsl_parse_state *state, - YYLTYPE loc, ir_rvalue *lhs, - ir_rvalue *rhs, bool is_initializer) -{ - /* If there is already some error in the RHS, just return it. Anything - * else will lead to an avalanche of error message back to the user. - */ - if (rhs->type->is_error()) - return rhs; - - /* In the Tessellation Control Shader: - * If a per-vertex output variable is used as an l-value, it is an error - * if the expression indicating the vertex number is not the identifier - * `gl_InvocationID`. - */ - if (state->stage == MESA_SHADER_TESS_CTRL) { - ir_variable *var = lhs->variable_referenced(); - if (var->data.mode == ir_var_shader_out && !var->data.patch) { - ir_rvalue *index = find_innermost_array_index(lhs); - ir_variable *index_var = index ? index->variable_referenced() : NULL; - if (!index_var || strcmp(index_var->name, "gl_InvocationID") != 0) { - _mesa_glsl_error(&loc, state, - "Tessellation control shader outputs can only " - "be indexed by gl_InvocationID"); - return NULL; - } - } - } - - /* If the types are identical, the assignment can trivially proceed. - */ - if (rhs->type == lhs->type) - return rhs; - - /* If the array element types are the same and the LHS is unsized, - * the assignment is okay for initializers embedded in variable - * declarations. - * - * Note: Whole-array assignments are not permitted in GLSL 1.10, but this - * is handled by ir_dereference::is_lvalue. - */ - const glsl_type *lhs_t = lhs->type; - const glsl_type *rhs_t = rhs->type; - bool unsized_array = false; - while(lhs_t->is_array()) { - if (rhs_t == lhs_t) - break; /* the rest of the inner arrays match so break out early */ - if (!rhs_t->is_array()) { - unsized_array = false; - break; /* number of dimensions mismatch */ - } - if (lhs_t->length == rhs_t->length) { - lhs_t = lhs_t->fields.array; - rhs_t = rhs_t->fields.array; - continue; - } else if (lhs_t->is_unsized_array()) { - unsized_array = true; - } else { - unsized_array = false; - break; /* sized array mismatch */ - } - lhs_t = lhs_t->fields.array; - rhs_t = rhs_t->fields.array; - } - if (unsized_array) { - if (is_initializer) { - return rhs; - } else { - _mesa_glsl_error(&loc, state, - "implicitly sized arrays cannot be assigned"); - return NULL; - } - } - - /* Check for implicit conversion in GLSL 1.20 */ - if (apply_implicit_conversion(lhs->type, rhs, state)) { - if (rhs->type == lhs->type) - return rhs; - } - - _mesa_glsl_error(&loc, state, - "%s of type %s cannot be assigned to " - "variable of type %s", - is_initializer ? "initializer" : "value", - rhs->type->name, lhs->type->name); - - return NULL; -} - -static void -mark_whole_array_access(ir_rvalue *access) -{ - ir_dereference_variable *deref = access->as_dereference_variable(); - - if (deref && deref->var) { - deref->var->data.max_array_access = deref->type->length - 1; - } -} - -static bool -do_assignment(exec_list *instructions, struct _mesa_glsl_parse_state *state, - const char *non_lvalue_description, - ir_rvalue *lhs, ir_rvalue *rhs, - ir_rvalue **out_rvalue, bool needs_rvalue, - bool is_initializer, - YYLTYPE lhs_loc) -{ - void *ctx = state; - bool error_emitted = (lhs->type->is_error() || rhs->type->is_error()); - - ir_variable *lhs_var = lhs->variable_referenced(); - if (lhs_var) - lhs_var->data.assigned = true; - - if (!error_emitted) { - if (non_lvalue_description != NULL) { - _mesa_glsl_error(&lhs_loc, state, - "assignment to %s", - non_lvalue_description); - error_emitted = true; - } else if (lhs_var != NULL && (lhs_var->data.read_only || - (lhs_var->data.mode == ir_var_shader_storage && - lhs_var->data.image_read_only))) { - /* We can have image_read_only set on both images and buffer variables, - * but in the former there is a distinction between assignments to - * the variable itself (read_only) and to the memory they point to - * (image_read_only), while in the case of buffer variables there is - * no such distinction, that is why this check here is limited to - * buffer variables alone. - */ - _mesa_glsl_error(&lhs_loc, state, - "assignment to read-only variable '%s'", - lhs_var->name); - error_emitted = true; - } else if (lhs->type->is_array() && - !state->check_version(120, 300, &lhs_loc, - "whole array assignment forbidden")) { - /* From page 32 (page 38 of the PDF) of the GLSL 1.10 spec: - * - * "Other binary or unary expressions, non-dereferenced - * arrays, function names, swizzles with repeated fields, - * and constants cannot be l-values." - * - * The restriction on arrays is lifted in GLSL 1.20 and GLSL ES 3.00. - */ - error_emitted = true; - } else if (!lhs->is_lvalue()) { - _mesa_glsl_error(& lhs_loc, state, "non-lvalue in assignment"); - error_emitted = true; - } - } - - ir_rvalue *new_rhs = - validate_assignment(state, lhs_loc, lhs, rhs, is_initializer); - if (new_rhs != NULL) { - rhs = new_rhs; - - /* If the LHS array was not declared with a size, it takes it size from - * the RHS. If the LHS is an l-value and a whole array, it must be a - * dereference of a variable. Any other case would require that the LHS - * is either not an l-value or not a whole array. - */ - if (lhs->type->is_unsized_array()) { - ir_dereference *const d = lhs->as_dereference(); - - assert(d != NULL); - - ir_variable *const var = d->variable_referenced(); - - assert(var != NULL); - - if (var->data.max_array_access >= unsigned(rhs->type->array_size())) { - /* FINISHME: This should actually log the location of the RHS. */ - _mesa_glsl_error(& lhs_loc, state, "array size must be > %u due to " - "previous access", - var->data.max_array_access); - } - - var->type = glsl_type::get_array_instance(lhs->type->fields.array, - rhs->type->array_size()); - d->type = var->type; - } - if (lhs->type->is_array()) { - mark_whole_array_access(rhs); - mark_whole_array_access(lhs); - } - } - - /* Most callers of do_assignment (assign, add_assign, pre_inc/dec, - * but not post_inc) need the converted assigned value as an rvalue - * to handle things like: - * - * i = j += 1; - */ - if (needs_rvalue) { - ir_variable *var = new(ctx) ir_variable(rhs->type, "assignment_tmp", - ir_var_temporary); - instructions->push_tail(var); - instructions->push_tail(assign(var, rhs)); - - if (!error_emitted) { - ir_dereference_variable *deref_var = new(ctx) ir_dereference_variable(var); - instructions->push_tail(new(ctx) ir_assignment(lhs, deref_var)); - } - ir_rvalue *rvalue = new(ctx) ir_dereference_variable(var); - - *out_rvalue = rvalue; - } else { - if (!error_emitted) - instructions->push_tail(new(ctx) ir_assignment(lhs, rhs)); - *out_rvalue = NULL; - } - - return error_emitted; -} - -static ir_rvalue * -get_lvalue_copy(exec_list *instructions, ir_rvalue *lvalue) -{ - void *ctx = ralloc_parent(lvalue); - ir_variable *var; - - var = new(ctx) ir_variable(lvalue->type, "_post_incdec_tmp", - ir_var_temporary); - instructions->push_tail(var); - - instructions->push_tail(new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var), - lvalue)); - - return new(ctx) ir_dereference_variable(var); -} - - -ir_rvalue * -ast_node::hir(exec_list *instructions, struct _mesa_glsl_parse_state *state) -{ - (void) instructions; - (void) state; - - return NULL; -} - -bool -ast_node::has_sequence_subexpression() const -{ - return false; -} - -void -ast_function_expression::hir_no_rvalue(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - (void)hir(instructions, state); -} - -void -ast_aggregate_initializer::hir_no_rvalue(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - (void)hir(instructions, state); -} - -static ir_rvalue * -do_comparison(void *mem_ctx, int operation, ir_rvalue *op0, ir_rvalue *op1) -{ - int join_op; - ir_rvalue *cmp = NULL; - - if (operation == ir_binop_all_equal) - join_op = ir_binop_logic_and; - else - join_op = ir_binop_logic_or; - - switch (op0->type->base_type) { - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_BOOL: - case GLSL_TYPE_DOUBLE: - return new(mem_ctx) ir_expression(operation, op0, op1); - - case GLSL_TYPE_ARRAY: { - for (unsigned int i = 0; i < op0->type->length; i++) { - ir_rvalue *e0, *e1, *result; - - e0 = new(mem_ctx) ir_dereference_array(op0->clone(mem_ctx, NULL), - new(mem_ctx) ir_constant(i)); - e1 = new(mem_ctx) ir_dereference_array(op1->clone(mem_ctx, NULL), - new(mem_ctx) ir_constant(i)); - result = do_comparison(mem_ctx, operation, e0, e1); - - if (cmp) { - cmp = new(mem_ctx) ir_expression(join_op, cmp, result); - } else { - cmp = result; - } - } - - mark_whole_array_access(op0); - mark_whole_array_access(op1); - break; - } - - case GLSL_TYPE_STRUCT: { - for (unsigned int i = 0; i < op0->type->length; i++) { - ir_rvalue *e0, *e1, *result; - const char *field_name = op0->type->fields.structure[i].name; - - e0 = new(mem_ctx) ir_dereference_record(op0->clone(mem_ctx, NULL), - field_name); - e1 = new(mem_ctx) ir_dereference_record(op1->clone(mem_ctx, NULL), - field_name); - result = do_comparison(mem_ctx, operation, e0, e1); - - if (cmp) { - cmp = new(mem_ctx) ir_expression(join_op, cmp, result); - } else { - cmp = result; - } - } - break; - } - - case GLSL_TYPE_ERROR: - case GLSL_TYPE_VOID: - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_SUBROUTINE: - /* I assume a comparison of a struct containing a sampler just - * ignores the sampler present in the type. - */ - break; - } - - if (cmp == NULL) - cmp = new(mem_ctx) ir_constant(true); - - return cmp; -} - -/* For logical operations, we want to ensure that the operands are - * scalar booleans. If it isn't, emit an error and return a constant - * boolean to avoid triggering cascading error messages. - */ -ir_rvalue * -get_scalar_boolean_operand(exec_list *instructions, - struct _mesa_glsl_parse_state *state, - ast_expression *parent_expr, - int operand, - const char *operand_name, - bool *error_emitted) -{ - ast_expression *expr = parent_expr->subexpressions[operand]; - void *ctx = state; - ir_rvalue *val = expr->hir(instructions, state); - - if (val->type->is_boolean() && val->type->is_scalar()) - return val; - - if (!*error_emitted) { - YYLTYPE loc = expr->get_location(); - _mesa_glsl_error(&loc, state, "%s of `%s' must be scalar boolean", - operand_name, - parent_expr->operator_string(parent_expr->oper)); - *error_emitted = true; - } - - return new(ctx) ir_constant(true); -} - -/** - * If name refers to a builtin array whose maximum allowed size is less than - * size, report an error and return true. Otherwise return false. - */ -void -check_builtin_array_max_size(const char *name, unsigned size, - YYLTYPE loc, struct _mesa_glsl_parse_state *state) -{ - if ((strcmp("gl_TexCoord", name) == 0) - && (size > state->Const.MaxTextureCoords)) { - /* From page 54 (page 60 of the PDF) of the GLSL 1.20 spec: - * - * "The size [of gl_TexCoord] can be at most - * gl_MaxTextureCoords." - */ - _mesa_glsl_error(&loc, state, "`gl_TexCoord' array size cannot " - "be larger than gl_MaxTextureCoords (%u)", - state->Const.MaxTextureCoords); - } else if (strcmp("gl_ClipDistance", name) == 0 - && size > state->Const.MaxClipPlanes) { - /* From section 7.1 (Vertex Shader Special Variables) of the - * GLSL 1.30 spec: - * - * "The gl_ClipDistance array is predeclared as unsized and - * must be sized by the shader either redeclaring it with a - * size or indexing it only with integral constant - * expressions. ... The size can be at most - * gl_MaxClipDistances." - */ - _mesa_glsl_error(&loc, state, "`gl_ClipDistance' array size cannot " - "be larger than gl_MaxClipDistances (%u)", - state->Const.MaxClipPlanes); - } -} - -/** - * Create the constant 1, of a which is appropriate for incrementing and - * decrementing values of the given GLSL type. For example, if type is vec4, - * this creates a constant value of 1.0 having type float. - * - * If the given type is invalid for increment and decrement operators, return - * a floating point 1--the error will be detected later. - */ -static ir_rvalue * -constant_one_for_inc_dec(void *ctx, const glsl_type *type) -{ - switch (type->base_type) { - case GLSL_TYPE_UINT: - return new(ctx) ir_constant((unsigned) 1); - case GLSL_TYPE_INT: - return new(ctx) ir_constant(1); - default: - case GLSL_TYPE_FLOAT: - return new(ctx) ir_constant(1.0f); - } -} - -ir_rvalue * -ast_expression::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - return do_hir(instructions, state, true); -} - -void -ast_expression::hir_no_rvalue(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - do_hir(instructions, state, false); -} - -ir_rvalue * -ast_expression::do_hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state, - bool needs_rvalue) -{ - void *ctx = state; - static const int operations[AST_NUM_OPERATORS] = { - -1, /* ast_assign doesn't convert to ir_expression. */ - -1, /* ast_plus doesn't convert to ir_expression. */ - ir_unop_neg, - ir_binop_add, - ir_binop_sub, - ir_binop_mul, - ir_binop_div, - ir_binop_mod, - ir_binop_lshift, - ir_binop_rshift, - ir_binop_less, - ir_binop_greater, - ir_binop_lequal, - ir_binop_gequal, - ir_binop_all_equal, - ir_binop_any_nequal, - ir_binop_bit_and, - ir_binop_bit_xor, - ir_binop_bit_or, - ir_unop_bit_not, - ir_binop_logic_and, - ir_binop_logic_xor, - ir_binop_logic_or, - ir_unop_logic_not, - - /* Note: The following block of expression types actually convert - * to multiple IR instructions. - */ - ir_binop_mul, /* ast_mul_assign */ - ir_binop_div, /* ast_div_assign */ - ir_binop_mod, /* ast_mod_assign */ - ir_binop_add, /* ast_add_assign */ - ir_binop_sub, /* ast_sub_assign */ - ir_binop_lshift, /* ast_ls_assign */ - ir_binop_rshift, /* ast_rs_assign */ - ir_binop_bit_and, /* ast_and_assign */ - ir_binop_bit_xor, /* ast_xor_assign */ - ir_binop_bit_or, /* ast_or_assign */ - - -1, /* ast_conditional doesn't convert to ir_expression. */ - ir_binop_add, /* ast_pre_inc. */ - ir_binop_sub, /* ast_pre_dec. */ - ir_binop_add, /* ast_post_inc. */ - ir_binop_sub, /* ast_post_dec. */ - -1, /* ast_field_selection doesn't conv to ir_expression. */ - -1, /* ast_array_index doesn't convert to ir_expression. */ - -1, /* ast_function_call doesn't conv to ir_expression. */ - -1, /* ast_identifier doesn't convert to ir_expression. */ - -1, /* ast_int_constant doesn't convert to ir_expression. */ - -1, /* ast_uint_constant doesn't conv to ir_expression. */ - -1, /* ast_float_constant doesn't conv to ir_expression. */ - -1, /* ast_bool_constant doesn't conv to ir_expression. */ - -1, /* ast_sequence doesn't convert to ir_expression. */ - }; - ir_rvalue *result = NULL; - ir_rvalue *op[3]; - const struct glsl_type *type; /* a temporary variable for switch cases */ - bool error_emitted = false; - YYLTYPE loc; - - loc = this->get_location(); - - switch (this->oper) { - case ast_aggregate: - assert(!"ast_aggregate: Should never get here."); - break; - - case ast_assign: { - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - - error_emitted = - do_assignment(instructions, state, - this->subexpressions[0]->non_lvalue_description, - op[0], op[1], &result, needs_rvalue, false, - this->subexpressions[0]->get_location()); - break; - } - - case ast_plus: - op[0] = this->subexpressions[0]->hir(instructions, state); - - type = unary_arithmetic_result_type(op[0]->type, state, & loc); - - error_emitted = type->is_error(); - - result = op[0]; - break; - - case ast_neg: - op[0] = this->subexpressions[0]->hir(instructions, state); - - type = unary_arithmetic_result_type(op[0]->type, state, & loc); - - error_emitted = type->is_error(); - - result = new(ctx) ir_expression(operations[this->oper], type, - op[0], NULL); - break; - - case ast_add: - case ast_sub: - case ast_mul: - case ast_div: - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - - type = arithmetic_result_type(op[0], op[1], - (this->oper == ast_mul), - state, & loc); - error_emitted = type->is_error(); - - result = new(ctx) ir_expression(operations[this->oper], type, - op[0], op[1]); - break; - - case ast_mod: - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - - type = modulus_result_type(op[0], op[1], state, &loc); - - assert(operations[this->oper] == ir_binop_mod); - - result = new(ctx) ir_expression(operations[this->oper], type, - op[0], op[1]); - error_emitted = type->is_error(); - break; - - case ast_lshift: - case ast_rshift: - if (!state->check_bitwise_operations_allowed(&loc)) { - error_emitted = true; - } - - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - type = shift_result_type(op[0]->type, op[1]->type, this->oper, state, - &loc); - result = new(ctx) ir_expression(operations[this->oper], type, - op[0], op[1]); - error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); - break; - - case ast_less: - case ast_greater: - case ast_lequal: - case ast_gequal: - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - - type = relational_result_type(op[0], op[1], state, & loc); - - /* The relational operators must either generate an error or result - * in a scalar boolean. See page 57 of the GLSL 1.50 spec. - */ - assert(type->is_error() - || ((type->base_type == GLSL_TYPE_BOOL) - && type->is_scalar())); - - result = new(ctx) ir_expression(operations[this->oper], type, - op[0], op[1]); - error_emitted = type->is_error(); - break; - - case ast_nequal: - case ast_equal: - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - - /* From page 58 (page 64 of the PDF) of the GLSL 1.50 spec: - * - * "The equality operators equal (==), and not equal (!=) - * operate on all types. They result in a scalar Boolean. If - * the operand types do not match, then there must be a - * conversion from Section 4.1.10 "Implicit Conversions" - * applied to one operand that can make them match, in which - * case this conversion is done." - */ - - if (op[0]->type == glsl_type::void_type || op[1]->type == glsl_type::void_type) { - _mesa_glsl_error(& loc, state, "`%s': wrong operand types: " - "no operation `%1$s' exists that takes a left-hand " - "operand of type 'void' or a right operand of type " - "'void'", (this->oper == ast_equal) ? "==" : "!="); - error_emitted = true; - } else if ((!apply_implicit_conversion(op[0]->type, op[1], state) - && !apply_implicit_conversion(op[1]->type, op[0], state)) - || (op[0]->type != op[1]->type)) { - _mesa_glsl_error(& loc, state, "operands of `%s' must have the same " - "type", (this->oper == ast_equal) ? "==" : "!="); - error_emitted = true; - } else if ((op[0]->type->is_array() || op[1]->type->is_array()) && - !state->check_version(120, 300, &loc, - "array comparisons forbidden")) { - error_emitted = true; - } else if ((op[0]->type->contains_opaque() || - op[1]->type->contains_opaque())) { - _mesa_glsl_error(&loc, state, "opaque type comparisons forbidden"); - error_emitted = true; - } - - if (error_emitted) { - result = new(ctx) ir_constant(false); - } else { - result = do_comparison(ctx, operations[this->oper], op[0], op[1]); - assert(result->type == glsl_type::bool_type); - } - break; - - case ast_bit_and: - case ast_bit_xor: - case ast_bit_or: - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc); - result = new(ctx) ir_expression(operations[this->oper], type, - op[0], op[1]); - error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); - break; - - case ast_bit_not: - op[0] = this->subexpressions[0]->hir(instructions, state); - - if (!state->check_bitwise_operations_allowed(&loc)) { - error_emitted = true; - } - - if (!op[0]->type->is_integer()) { - _mesa_glsl_error(&loc, state, "operand of `~' must be an integer"); - error_emitted = true; - } - - type = error_emitted ? glsl_type::error_type : op[0]->type; - result = new(ctx) ir_expression(ir_unop_bit_not, type, op[0], NULL); - break; - - case ast_logic_and: { - exec_list rhs_instructions; - op[0] = get_scalar_boolean_operand(instructions, state, this, 0, - "LHS", &error_emitted); - op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1, - "RHS", &error_emitted); - - if (rhs_instructions.is_empty()) { - result = new(ctx) ir_expression(ir_binop_logic_and, op[0], op[1]); - type = result->type; - } else { - ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type, - "and_tmp", - ir_var_temporary); - instructions->push_tail(tmp); - - ir_if *const stmt = new(ctx) ir_if(op[0]); - instructions->push_tail(stmt); - - stmt->then_instructions.append_list(&rhs_instructions); - ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp); - ir_assignment *const then_assign = - new(ctx) ir_assignment(then_deref, op[1]); - stmt->then_instructions.push_tail(then_assign); - - ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp); - ir_assignment *const else_assign = - new(ctx) ir_assignment(else_deref, new(ctx) ir_constant(false)); - stmt->else_instructions.push_tail(else_assign); - - result = new(ctx) ir_dereference_variable(tmp); - type = tmp->type; - } - break; - } - - case ast_logic_or: { - exec_list rhs_instructions; - op[0] = get_scalar_boolean_operand(instructions, state, this, 0, - "LHS", &error_emitted); - op[1] = get_scalar_boolean_operand(&rhs_instructions, state, this, 1, - "RHS", &error_emitted); - - if (rhs_instructions.is_empty()) { - result = new(ctx) ir_expression(ir_binop_logic_or, op[0], op[1]); - type = result->type; - } else { - ir_variable *const tmp = new(ctx) ir_variable(glsl_type::bool_type, - "or_tmp", - ir_var_temporary); - instructions->push_tail(tmp); - - ir_if *const stmt = new(ctx) ir_if(op[0]); - instructions->push_tail(stmt); - - ir_dereference *const then_deref = new(ctx) ir_dereference_variable(tmp); - ir_assignment *const then_assign = - new(ctx) ir_assignment(then_deref, new(ctx) ir_constant(true)); - stmt->then_instructions.push_tail(then_assign); - - stmt->else_instructions.append_list(&rhs_instructions); - ir_dereference *const else_deref = new(ctx) ir_dereference_variable(tmp); - ir_assignment *const else_assign = - new(ctx) ir_assignment(else_deref, op[1]); - stmt->else_instructions.push_tail(else_assign); - - result = new(ctx) ir_dereference_variable(tmp); - type = tmp->type; - } - break; - } - - case ast_logic_xor: - /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec: - * - * "The logical binary operators and (&&), or ( | | ), and - * exclusive or (^^). They operate only on two Boolean - * expressions and result in a Boolean expression." - */ - op[0] = get_scalar_boolean_operand(instructions, state, this, 0, "LHS", - &error_emitted); - op[1] = get_scalar_boolean_operand(instructions, state, this, 1, "RHS", - &error_emitted); - - result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type, - op[0], op[1]); - break; - - case ast_logic_not: - op[0] = get_scalar_boolean_operand(instructions, state, this, 0, - "operand", &error_emitted); - - result = new(ctx) ir_expression(operations[this->oper], glsl_type::bool_type, - op[0], NULL); - break; - - case ast_mul_assign: - case ast_div_assign: - case ast_add_assign: - case ast_sub_assign: { - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - - type = arithmetic_result_type(op[0], op[1], - (this->oper == ast_mul_assign), - state, & loc); - - ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], type, - op[0], op[1]); - - error_emitted = - do_assignment(instructions, state, - this->subexpressions[0]->non_lvalue_description, - op[0]->clone(ctx, NULL), temp_rhs, - &result, needs_rvalue, false, - this->subexpressions[0]->get_location()); - - /* GLSL 1.10 does not allow array assignment. However, we don't have to - * explicitly test for this because none of the binary expression - * operators allow array operands either. - */ - - break; - } - - case ast_mod_assign: { - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - - type = modulus_result_type(op[0], op[1], state, &loc); - - assert(operations[this->oper] == ir_binop_mod); - - ir_rvalue *temp_rhs; - temp_rhs = new(ctx) ir_expression(operations[this->oper], type, - op[0], op[1]); - - error_emitted = - do_assignment(instructions, state, - this->subexpressions[0]->non_lvalue_description, - op[0]->clone(ctx, NULL), temp_rhs, - &result, needs_rvalue, false, - this->subexpressions[0]->get_location()); - break; - } - - case ast_ls_assign: - case ast_rs_assign: { - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - type = shift_result_type(op[0]->type, op[1]->type, this->oper, state, - &loc); - ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], - type, op[0], op[1]); - error_emitted = - do_assignment(instructions, state, - this->subexpressions[0]->non_lvalue_description, - op[0]->clone(ctx, NULL), temp_rhs, - &result, needs_rvalue, false, - this->subexpressions[0]->get_location()); - break; - } - - case ast_and_assign: - case ast_xor_assign: - case ast_or_assign: { - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = this->subexpressions[1]->hir(instructions, state); - type = bit_logic_result_type(op[0], op[1], this->oper, state, &loc); - ir_rvalue *temp_rhs = new(ctx) ir_expression(operations[this->oper], - type, op[0], op[1]); - error_emitted = - do_assignment(instructions, state, - this->subexpressions[0]->non_lvalue_description, - op[0]->clone(ctx, NULL), temp_rhs, - &result, needs_rvalue, false, - this->subexpressions[0]->get_location()); - break; - } - - case ast_conditional: { - /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec: - * - * "The ternary selection operator (?:). It operates on three - * expressions (exp1 ? exp2 : exp3). This operator evaluates the - * first expression, which must result in a scalar Boolean." - */ - op[0] = get_scalar_boolean_operand(instructions, state, this, 0, - "condition", &error_emitted); - - /* The :? operator is implemented by generating an anonymous temporary - * followed by an if-statement. The last instruction in each branch of - * the if-statement assigns a value to the anonymous temporary. This - * temporary is the r-value of the expression. - */ - exec_list then_instructions; - exec_list else_instructions; - - op[1] = this->subexpressions[1]->hir(&then_instructions, state); - op[2] = this->subexpressions[2]->hir(&else_instructions, state); - - /* From page 59 (page 65 of the PDF) of the GLSL 1.50 spec: - * - * "The second and third expressions can be any type, as - * long their types match, or there is a conversion in - * Section 4.1.10 "Implicit Conversions" that can be applied - * to one of the expressions to make their types match. This - * resulting matching type is the type of the entire - * expression." - */ - if ((!apply_implicit_conversion(op[1]->type, op[2], state) - && !apply_implicit_conversion(op[2]->type, op[1], state)) - || (op[1]->type != op[2]->type)) { - YYLTYPE loc = this->subexpressions[1]->get_location(); - - _mesa_glsl_error(& loc, state, "second and third operands of ?: " - "operator must have matching types"); - error_emitted = true; - type = glsl_type::error_type; - } else { - type = op[1]->type; - } - - /* From page 33 (page 39 of the PDF) of the GLSL 1.10 spec: - * - * "The second and third expressions must be the same type, but can - * be of any type other than an array." - */ - if (type->is_array() && - !state->check_version(120, 300, &loc, - "second and third operands of ?: operator " - "cannot be arrays")) { - error_emitted = true; - } - - /* From section 4.1.7 of the GLSL 4.50 spec (Opaque Types): - * - * "Except for array indexing, structure member selection, and - * parentheses, opaque variables are not allowed to be operands in - * expressions; such use results in a compile-time error." - */ - if (type->contains_opaque()) { - _mesa_glsl_error(&loc, state, "opaque variables cannot be operands " - "of the ?: operator"); - error_emitted = true; - } - - ir_constant *cond_val = op[0]->constant_expression_value(); - - if (then_instructions.is_empty() - && else_instructions.is_empty() - && cond_val != NULL) { - result = cond_val->value.b[0] ? op[1] : op[2]; - } else { - /* The copy to conditional_tmp reads the whole array. */ - if (type->is_array()) { - mark_whole_array_access(op[1]); - mark_whole_array_access(op[2]); - } - - ir_variable *const tmp = - new(ctx) ir_variable(type, "conditional_tmp", ir_var_temporary); - instructions->push_tail(tmp); - - ir_if *const stmt = new(ctx) ir_if(op[0]); - instructions->push_tail(stmt); - - then_instructions.move_nodes_to(& stmt->then_instructions); - ir_dereference *const then_deref = - new(ctx) ir_dereference_variable(tmp); - ir_assignment *const then_assign = - new(ctx) ir_assignment(then_deref, op[1]); - stmt->then_instructions.push_tail(then_assign); - - else_instructions.move_nodes_to(& stmt->else_instructions); - ir_dereference *const else_deref = - new(ctx) ir_dereference_variable(tmp); - ir_assignment *const else_assign = - new(ctx) ir_assignment(else_deref, op[2]); - stmt->else_instructions.push_tail(else_assign); - - result = new(ctx) ir_dereference_variable(tmp); - } - break; - } - - case ast_pre_inc: - case ast_pre_dec: { - this->non_lvalue_description = (this->oper == ast_pre_inc) - ? "pre-increment operation" : "pre-decrement operation"; - - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = constant_one_for_inc_dec(ctx, op[0]->type); - - type = arithmetic_result_type(op[0], op[1], false, state, & loc); - - ir_rvalue *temp_rhs; - temp_rhs = new(ctx) ir_expression(operations[this->oper], type, - op[0], op[1]); - - error_emitted = - do_assignment(instructions, state, - this->subexpressions[0]->non_lvalue_description, - op[0]->clone(ctx, NULL), temp_rhs, - &result, needs_rvalue, false, - this->subexpressions[0]->get_location()); - break; - } - - case ast_post_inc: - case ast_post_dec: { - this->non_lvalue_description = (this->oper == ast_post_inc) - ? "post-increment operation" : "post-decrement operation"; - op[0] = this->subexpressions[0]->hir(instructions, state); - op[1] = constant_one_for_inc_dec(ctx, op[0]->type); - - error_emitted = op[0]->type->is_error() || op[1]->type->is_error(); - - type = arithmetic_result_type(op[0], op[1], false, state, & loc); - - ir_rvalue *temp_rhs; - temp_rhs = new(ctx) ir_expression(operations[this->oper], type, - op[0], op[1]); - - /* Get a temporary of a copy of the lvalue before it's modified. - * This may get thrown away later. - */ - result = get_lvalue_copy(instructions, op[0]->clone(ctx, NULL)); - - ir_rvalue *junk_rvalue; - error_emitted = - do_assignment(instructions, state, - this->subexpressions[0]->non_lvalue_description, - op[0]->clone(ctx, NULL), temp_rhs, - &junk_rvalue, false, false, - this->subexpressions[0]->get_location()); - - break; - } - - case ast_field_selection: - result = _mesa_ast_field_selection_to_hir(this, instructions, state); - break; - - case ast_array_index: { - YYLTYPE index_loc = subexpressions[1]->get_location(); - - op[0] = subexpressions[0]->hir(instructions, state); - op[1] = subexpressions[1]->hir(instructions, state); - - result = _mesa_ast_array_index_to_hir(ctx, state, op[0], op[1], - loc, index_loc); - - if (result->type->is_error()) - error_emitted = true; - - break; - } - - case ast_unsized_array_dim: - assert(!"ast_unsized_array_dim: Should never get here."); - break; - - case ast_function_call: - /* Should *NEVER* get here. ast_function_call should always be handled - * by ast_function_expression::hir. - */ - assert(0); - break; - - case ast_identifier: { - /* ast_identifier can appear several places in a full abstract syntax - * tree. This particular use must be at location specified in the grammar - * as 'variable_identifier'. - */ - ir_variable *var = - state->symbols->get_variable(this->primary_expression.identifier); - - if (var != NULL) { - var->data.used = true; - result = new(ctx) ir_dereference_variable(var); - } else { - _mesa_glsl_error(& loc, state, "`%s' undeclared", - this->primary_expression.identifier); - - result = ir_rvalue::error_value(ctx); - error_emitted = true; - } - break; - } - - case ast_int_constant: - result = new(ctx) ir_constant(this->primary_expression.int_constant); - break; - - case ast_uint_constant: - result = new(ctx) ir_constant(this->primary_expression.uint_constant); - break; - - case ast_float_constant: - result = new(ctx) ir_constant(this->primary_expression.float_constant); - break; - - case ast_bool_constant: - result = new(ctx) ir_constant(bool(this->primary_expression.bool_constant)); - break; - - case ast_double_constant: - result = new(ctx) ir_constant(this->primary_expression.double_constant); - break; - - case ast_sequence: { - /* It should not be possible to generate a sequence in the AST without - * any expressions in it. - */ - assert(!this->expressions.is_empty()); - - /* The r-value of a sequence is the last expression in the sequence. If - * the other expressions in the sequence do not have side-effects (and - * therefore add instructions to the instruction list), they get dropped - * on the floor. - */ - exec_node *previous_tail_pred = NULL; - YYLTYPE previous_operand_loc = loc; - - foreach_list_typed (ast_node, ast, link, &this->expressions) { - /* If one of the operands of comma operator does not generate any - * code, we want to emit a warning. At each pass through the loop - * previous_tail_pred will point to the last instruction in the - * stream *before* processing the previous operand. Naturally, - * instructions->tail_pred will point to the last instruction in the - * stream *after* processing the previous operand. If the two - * pointers match, then the previous operand had no effect. - * - * The warning behavior here differs slightly from GCC. GCC will - * only emit a warning if none of the left-hand operands have an - * effect. However, it will emit a warning for each. I believe that - * there are some cases in C (especially with GCC extensions) where - * it is useful to have an intermediate step in a sequence have no - * effect, but I don't think these cases exist in GLSL. Either way, - * it would be a giant hassle to replicate that behavior. - */ - if (previous_tail_pred == instructions->tail_pred) { - _mesa_glsl_warning(&previous_operand_loc, state, - "left-hand operand of comma expression has " - "no effect"); - } - - /* tail_pred is directly accessed instead of using the get_tail() - * method for performance reasons. get_tail() has extra code to - * return NULL when the list is empty. We don't care about that - * here, so using tail_pred directly is fine. - */ - previous_tail_pred = instructions->tail_pred; - previous_operand_loc = ast->get_location(); - - result = ast->hir(instructions, state); - } - - /* Any errors should have already been emitted in the loop above. - */ - error_emitted = true; - break; - } - } - type = NULL; /* use result->type, not type. */ - assert(result != NULL || !needs_rvalue); - - if (result && result->type->is_error() && !error_emitted) - _mesa_glsl_error(& loc, state, "type mismatch"); - - return result; -} - -bool -ast_expression::has_sequence_subexpression() const -{ - switch (this->oper) { - case ast_plus: - case ast_neg: - case ast_bit_not: - case ast_logic_not: - case ast_pre_inc: - case ast_pre_dec: - case ast_post_inc: - case ast_post_dec: - return this->subexpressions[0]->has_sequence_subexpression(); - - case ast_assign: - case ast_add: - case ast_sub: - case ast_mul: - case ast_div: - case ast_mod: - case ast_lshift: - case ast_rshift: - case ast_less: - case ast_greater: - case ast_lequal: - case ast_gequal: - case ast_nequal: - case ast_equal: - case ast_bit_and: - case ast_bit_xor: - case ast_bit_or: - case ast_logic_and: - case ast_logic_or: - case ast_logic_xor: - case ast_array_index: - case ast_mul_assign: - case ast_div_assign: - case ast_add_assign: - case ast_sub_assign: - case ast_mod_assign: - case ast_ls_assign: - case ast_rs_assign: - case ast_and_assign: - case ast_xor_assign: - case ast_or_assign: - return this->subexpressions[0]->has_sequence_subexpression() || - this->subexpressions[1]->has_sequence_subexpression(); - - case ast_conditional: - return this->subexpressions[0]->has_sequence_subexpression() || - this->subexpressions[1]->has_sequence_subexpression() || - this->subexpressions[2]->has_sequence_subexpression(); - - case ast_sequence: - return true; - - case ast_field_selection: - case ast_identifier: - case ast_int_constant: - case ast_uint_constant: - case ast_float_constant: - case ast_bool_constant: - case ast_double_constant: - return false; - - case ast_aggregate: - unreachable("ast_aggregate: Should never get here."); - - case ast_function_call: - unreachable("should be handled by ast_function_expression::hir"); - - case ast_unsized_array_dim: - unreachable("ast_unsized_array_dim: Should never get here."); - } - - return false; -} - -ir_rvalue * -ast_expression_statement::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - /* It is possible to have expression statements that don't have an - * expression. This is the solitary semicolon: - * - * for (i = 0; i < 5; i++) - * ; - * - * In this case the expression will be NULL. Test for NULL and don't do - * anything in that case. - */ - if (expression != NULL) - expression->hir_no_rvalue(instructions, state); - - /* Statements do not have r-values. - */ - return NULL; -} - - -ir_rvalue * -ast_compound_statement::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - if (new_scope) - state->symbols->push_scope(); - - foreach_list_typed (ast_node, ast, link, &this->statements) - ast->hir(instructions, state); - - if (new_scope) - state->symbols->pop_scope(); - - /* Compound statements do not have r-values. - */ - return NULL; -} - -/** - * Evaluate the given exec_node (which should be an ast_node representing - * a single array dimension) and return its integer value. - */ -static unsigned -process_array_size(exec_node *node, - struct _mesa_glsl_parse_state *state) -{ - exec_list dummy_instructions; - - ast_node *array_size = exec_node_data(ast_node, node, link); - - /** - * Dimensions other than the outermost dimension can by unsized if they - * are immediately sized by a constructor or initializer. - */ - if (((ast_expression*)array_size)->oper == ast_unsized_array_dim) - return 0; - - ir_rvalue *const ir = array_size->hir(& dummy_instructions, state); - YYLTYPE loc = array_size->get_location(); - - if (ir == NULL) { - _mesa_glsl_error(& loc, state, - "array size could not be resolved"); - return 0; - } - - if (!ir->type->is_integer()) { - _mesa_glsl_error(& loc, state, - "array size must be integer type"); - return 0; - } - - if (!ir->type->is_scalar()) { - _mesa_glsl_error(& loc, state, - "array size must be scalar type"); - return 0; - } - - ir_constant *const size = ir->constant_expression_value(); - if (size == NULL || array_size->has_sequence_subexpression()) { - _mesa_glsl_error(& loc, state, "array size must be a " - "constant valued expression"); - return 0; - } - - if (size->value.i[0] <= 0) { - _mesa_glsl_error(& loc, state, "array size must be > 0"); - return 0; - } - - assert(size->type == ir->type); - - /* If the array size is const (and we've verified that - * it is) then no instructions should have been emitted - * when we converted it to HIR. If they were emitted, - * then either the array size isn't const after all, or - * we are emitting unnecessary instructions. - */ - assert(dummy_instructions.is_empty()); - - return size->value.u[0]; -} - -static const glsl_type * -process_array_type(YYLTYPE *loc, const glsl_type *base, - ast_array_specifier *array_specifier, - struct _mesa_glsl_parse_state *state) -{ - const glsl_type *array_type = base; - - if (array_specifier != NULL) { - if (base->is_array()) { - - /* From page 19 (page 25) of the GLSL 1.20 spec: - * - * "Only one-dimensional arrays may be declared." - */ - if (!state->check_arrays_of_arrays_allowed(loc)) { - return glsl_type::error_type; - } - } - - for (exec_node *node = array_specifier->array_dimensions.tail_pred; - !node->is_head_sentinel(); node = node->prev) { - unsigned array_size = process_array_size(node, state); - array_type = glsl_type::get_array_instance(array_type, array_size); - } - } - - return array_type; -} - -static bool -precision_qualifier_allowed(const glsl_type *type) -{ - /* Precision qualifiers apply to floating point, integer and opaque - * types. - * - * Section 4.5.2 (Precision Qualifiers) of the GLSL 1.30 spec says: - * "Any floating point or any integer declaration can have the type - * preceded by one of these precision qualifiers [...] Literal - * constants do not have precision qualifiers. Neither do Boolean - * variables. - * - * Section 4.5 (Precision and Precision Qualifiers) of the GLSL 1.30 - * spec also says: - * - * "Precision qualifiers are added for code portability with OpenGL - * ES, not for functionality. They have the same syntax as in OpenGL - * ES." - * - * Section 8 (Built-In Functions) of the GLSL ES 1.00 spec says: - * - * "uniform lowp sampler2D sampler; - * highp vec2 coord; - * ... - * lowp vec4 col = texture2D (sampler, coord); - * // texture2D returns lowp" - * - * From this, we infer that GLSL 1.30 (and later) should allow precision - * qualifiers on sampler types just like float and integer types. - */ - return (type->is_float() - || type->is_integer() - || type->contains_opaque()) - && !type->without_array()->is_record(); -} - -const glsl_type * -ast_type_specifier::glsl_type(const char **name, - struct _mesa_glsl_parse_state *state) const -{ - const struct glsl_type *type; - - type = state->symbols->get_type(this->type_name); - *name = this->type_name; - - YYLTYPE loc = this->get_location(); - type = process_array_type(&loc, type, this->array_specifier, state); - - return type; -} - -/** - * From the OpenGL ES 3.0 spec, 4.5.4 Default Precision Qualifiers: - * - * "The precision statement - * - * precision precision-qualifier type; - * - * can be used to establish a default precision qualifier. The type field can - * be either int or float or any of the sampler types, (...) If type is float, - * the directive applies to non-precision-qualified floating point type - * (scalar, vector, and matrix) declarations. If type is int, the directive - * applies to all non-precision-qualified integer type (scalar, vector, signed, - * and unsigned) declarations." - * - * We use the symbol table to keep the values of the default precisions for - * each 'type' in each scope and we use the 'type' string from the precision - * statement as key in the symbol table. When we want to retrieve the default - * precision associated with a given glsl_type we need to know the type string - * associated with it. This is what this function returns. - */ -static const char * -get_type_name_for_precision_qualifier(const glsl_type *type) -{ - switch (type->base_type) { - case GLSL_TYPE_FLOAT: - return "float"; - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - return "int"; - case GLSL_TYPE_ATOMIC_UINT: - return "atomic_uint"; - case GLSL_TYPE_IMAGE: - /* fallthrough */ - case GLSL_TYPE_SAMPLER: { - const unsigned type_idx = - type->sampler_array + 2 * type->sampler_shadow; - const unsigned offset = type->base_type == GLSL_TYPE_SAMPLER ? 0 : 4; - assert(type_idx < 4); - switch (type->sampler_type) { - case GLSL_TYPE_FLOAT: - switch (type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "sampler1D", "sampler1DArray", - "sampler1DShadow", "sampler1DArrayShadow" - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_2D: { - static const char *const names[8] = { - "sampler2D", "sampler2DArray", - "sampler2DShadow", "sampler2DArrayShadow", - "image2D", "image2DArray", NULL, NULL - }; - return names[offset + type_idx]; - } - case GLSL_SAMPLER_DIM_3D: { - static const char *const names[8] = { - "sampler3D", NULL, NULL, NULL, - "image3D", NULL, NULL, NULL - }; - return names[offset + type_idx]; - } - case GLSL_SAMPLER_DIM_CUBE: { - static const char *const names[8] = { - "samplerCube", "samplerCubeArray", - "samplerCubeShadow", "samplerCubeArrayShadow", - "imageCube", NULL, NULL, NULL - }; - return names[offset + type_idx]; - } - case GLSL_SAMPLER_DIM_MS: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "sampler2DMS", "sampler2DMSArray", NULL, NULL - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_RECT: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "samplerRect", NULL, "samplerRectShadow", NULL - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_BUF: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "samplerBuffer", NULL, NULL, NULL - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_EXTERNAL: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "samplerExternalOES", NULL, NULL, NULL - }; - return names[type_idx]; - } - default: - unreachable("Unsupported sampler/image dimensionality"); - } /* sampler/image float dimensionality */ - break; - case GLSL_TYPE_INT: - switch (type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "isampler1D", "isampler1DArray", NULL, NULL - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_2D: { - static const char *const names[8] = { - "isampler2D", "isampler2DArray", NULL, NULL, - "iimage2D", "iimage2DArray", NULL, NULL - }; - return names[offset + type_idx]; - } - case GLSL_SAMPLER_DIM_3D: { - static const char *const names[8] = { - "isampler3D", NULL, NULL, NULL, - "iimage3D", NULL, NULL, NULL - }; - return names[offset + type_idx]; - } - case GLSL_SAMPLER_DIM_CUBE: { - static const char *const names[8] = { - "isamplerCube", "isamplerCubeArray", NULL, NULL, - "iimageCube", NULL, NULL, NULL - }; - return names[offset + type_idx]; - } - case GLSL_SAMPLER_DIM_MS: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "isampler2DMS", "isampler2DMSArray", NULL, NULL - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_RECT: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "isamplerRect", NULL, "isamplerRectShadow", NULL - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_BUF: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "isamplerBuffer", NULL, NULL, NULL - }; - return names[type_idx]; - } - default: - unreachable("Unsupported isampler/iimage dimensionality"); - } /* sampler/image int dimensionality */ - break; - case GLSL_TYPE_UINT: - switch (type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_1D: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "usampler1D", "usampler1DArray", NULL, NULL - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_2D: { - static const char *const names[8] = { - "usampler2D", "usampler2DArray", NULL, NULL, - "uimage2D", "uimage2DArray", NULL, NULL - }; - return names[offset + type_idx]; - } - case GLSL_SAMPLER_DIM_3D: { - static const char *const names[8] = { - "usampler3D", NULL, NULL, NULL, - "uimage3D", NULL, NULL, NULL - }; - return names[offset + type_idx]; - } - case GLSL_SAMPLER_DIM_CUBE: { - static const char *const names[8] = { - "usamplerCube", "usamplerCubeArray", NULL, NULL, - "uimageCube", NULL, NULL, NULL - }; - return names[offset + type_idx]; - } - case GLSL_SAMPLER_DIM_MS: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "usampler2DMS", "usampler2DMSArray", NULL, NULL - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_RECT: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "usamplerRect", NULL, "usamplerRectShadow", NULL - }; - return names[type_idx]; - } - case GLSL_SAMPLER_DIM_BUF: { - assert(type->base_type == GLSL_TYPE_SAMPLER); - static const char *const names[4] = { - "usamplerBuffer", NULL, NULL, NULL - }; - return names[type_idx]; - } - default: - unreachable("Unsupported usampler/uimage dimensionality"); - } /* sampler/image uint dimensionality */ - break; - default: - unreachable("Unsupported sampler/image type"); - } /* sampler/image type */ - break; - } /* GLSL_TYPE_SAMPLER/GLSL_TYPE_IMAGE */ - break; - default: - unreachable("Unsupported type"); - } /* base type */ -} - -static unsigned -select_gles_precision(unsigned qual_precision, - const glsl_type *type, - struct _mesa_glsl_parse_state *state, YYLTYPE *loc) -{ - /* Precision qualifiers do not have any meaning in Desktop GLSL. - * In GLES we take the precision from the type qualifier if present, - * otherwise, if the type of the variable allows precision qualifiers at - * all, we look for the default precision qualifier for that type in the - * current scope. - */ - assert(state->es_shader); - - unsigned precision = GLSL_PRECISION_NONE; - if (qual_precision) { - precision = qual_precision; - } else if (precision_qualifier_allowed(type)) { - const char *type_name = - get_type_name_for_precision_qualifier(type->without_array()); - assert(type_name != NULL); - - precision = - state->symbols->get_default_precision_qualifier(type_name); - if (precision == ast_precision_none) { - _mesa_glsl_error(loc, state, - "No precision specified in this scope for type `%s'", - type->name); - } - } - return precision; -} - -const glsl_type * -ast_fully_specified_type::glsl_type(const char **name, - struct _mesa_glsl_parse_state *state) const -{ - return this->specifier->glsl_type(name, state); -} - -/** - * Determine whether a toplevel variable declaration declares a varying. This - * function operates by examining the variable's mode and the shader target, - * so it correctly identifies linkage variables regardless of whether they are - * declared using the deprecated "varying" syntax or the new "in/out" syntax. - * - * Passing a non-toplevel variable declaration (e.g. a function parameter) to - * this function will produce undefined results. - */ -static bool -is_varying_var(ir_variable *var, gl_shader_stage target) -{ - switch (target) { - case MESA_SHADER_VERTEX: - return var->data.mode == ir_var_shader_out; - case MESA_SHADER_FRAGMENT: - return var->data.mode == ir_var_shader_in; - default: - return var->data.mode == ir_var_shader_out || var->data.mode == ir_var_shader_in; - } -} - - -/** - * Matrix layout qualifiers are only allowed on certain types - */ -static void -validate_matrix_layout_for_type(struct _mesa_glsl_parse_state *state, - YYLTYPE *loc, - const glsl_type *type, - ir_variable *var) -{ - if (var && !var->is_in_buffer_block()) { - /* Layout qualifiers may only apply to interface blocks and fields in - * them. - */ - _mesa_glsl_error(loc, state, - "uniform block layout qualifiers row_major and " - "column_major may not be applied to variables " - "outside of uniform blocks"); - } else if (!type->without_array()->is_matrix()) { - /* The OpenGL ES 3.0 conformance tests did not originally allow - * matrix layout qualifiers on non-matrices. However, the OpenGL - * 4.4 and OpenGL ES 3.0 (revision TBD) specifications were - * amended to specifically allow these layouts on all types. Emit - * a warning so that people know their code may not be portable. - */ - _mesa_glsl_warning(loc, state, - "uniform block layout qualifiers row_major and " - "column_major applied to non-matrix types may " - "be rejected by older compilers"); - } -} - -static bool -process_qualifier_constant(struct _mesa_glsl_parse_state *state, - YYLTYPE *loc, - const char *qual_indentifier, - ast_expression *const_expression, - unsigned *value) -{ - exec_list dummy_instructions; - - if (const_expression == NULL) { - *value = 0; - return true; - } - - ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); - - ir_constant *const const_int = ir->constant_expression_value(); - if (const_int == NULL || !const_int->type->is_integer()) { - _mesa_glsl_error(loc, state, "%s must be an integral constant " - "expression", qual_indentifier); - return false; - } - - if (const_int->value.i[0] < 0) { - _mesa_glsl_error(loc, state, "%s layout qualifier is invalid (%d < 0)", - qual_indentifier, const_int->value.u[0]); - return false; - } - - /* If the location is const (and we've verified that - * it is) then no instructions should have been emitted - * when we converted it to HIR. If they were emitted, - * then either the location isn't const after all, or - * we are emitting unnecessary instructions. - */ - assert(dummy_instructions.is_empty()); - - *value = const_int->value.u[0]; - return true; -} - -static bool -validate_stream_qualifier(YYLTYPE *loc, struct _mesa_glsl_parse_state *state, - unsigned stream) -{ - if (stream >= state->ctx->Const.MaxVertexStreams) { - _mesa_glsl_error(loc, state, - "invalid stream specified %d is larger than " - "MAX_VERTEX_STREAMS - 1 (%d).", - stream, state->ctx->Const.MaxVertexStreams - 1); - return false; - } - - return true; -} - -static void -apply_explicit_binding(struct _mesa_glsl_parse_state *state, - YYLTYPE *loc, - ir_variable *var, - const glsl_type *type, - const ast_type_qualifier *qual) -{ - if (!qual->flags.q.uniform && !qual->flags.q.buffer) { - _mesa_glsl_error(loc, state, - "the \"binding\" qualifier only applies to uniforms and " - "shader storage buffer objects"); - return; - } - - unsigned qual_binding; - if (!process_qualifier_constant(state, loc, "binding", qual->binding, - &qual_binding)) { - return; - } - - const struct gl_context *const ctx = state->ctx; - unsigned elements = type->is_array() ? type->arrays_of_arrays_size() : 1; - unsigned max_index = qual_binding + elements - 1; - const glsl_type *base_type = type->without_array(); - - if (base_type->is_interface()) { - /* UBOs. From page 60 of the GLSL 4.20 specification: - * "If the binding point for any uniform block instance is less than zero, - * or greater than or equal to the implementation-dependent maximum - * number of uniform buffer bindings, a compilation error will occur. - * When the binding identifier is used with a uniform block instanced as - * an array of size N, all elements of the array from binding through - * binding + N – 1 must be within this range." - * - * The implementation-dependent maximum is GL_MAX_UNIFORM_BUFFER_BINDINGS. - */ - if (qual->flags.q.uniform && - max_index >= ctx->Const.MaxUniformBufferBindings) { - _mesa_glsl_error(loc, state, "layout(binding = %u) for %d UBOs exceeds " - "the maximum number of UBO binding points (%d)", - qual_binding, elements, - ctx->Const.MaxUniformBufferBindings); - return; - } - - /* SSBOs. From page 67 of the GLSL 4.30 specification: - * "If the binding point for any uniform or shader storage block instance - * is less than zero, or greater than or equal to the - * implementation-dependent maximum number of uniform buffer bindings, a - * compile-time error will occur. When the binding identifier is used - * with a uniform or shader storage block instanced as an array of size - * N, all elements of the array from binding through binding + N – 1 must - * be within this range." - */ - if (qual->flags.q.buffer && - max_index >= ctx->Const.MaxShaderStorageBufferBindings) { - _mesa_glsl_error(loc, state, "layout(binding = %u) for %d SSBOs exceeds " - "the maximum number of SSBO binding points (%d)", - qual_binding, elements, - ctx->Const.MaxShaderStorageBufferBindings); - return; - } - } else if (base_type->is_sampler()) { - /* Samplers. From page 63 of the GLSL 4.20 specification: - * "If the binding is less than zero, or greater than or equal to the - * implementation-dependent maximum supported number of units, a - * compilation error will occur. When the binding identifier is used - * with an array of size N, all elements of the array from binding - * through binding + N - 1 must be within this range." - */ - unsigned limit = ctx->Const.MaxCombinedTextureImageUnits; - - if (max_index >= limit) { - _mesa_glsl_error(loc, state, "layout(binding = %d) for %d samplers " - "exceeds the maximum number of texture image units " - "(%u)", qual_binding, elements, limit); - - return; - } - } else if (base_type->contains_atomic()) { - assert(ctx->Const.MaxAtomicBufferBindings <= MAX_COMBINED_ATOMIC_BUFFERS); - if (qual_binding >= ctx->Const.MaxAtomicBufferBindings) { - _mesa_glsl_error(loc, state, "layout(binding = %d) exceeds the " - " maximum number of atomic counter buffer bindings" - "(%u)", qual_binding, - ctx->Const.MaxAtomicBufferBindings); - - return; - } - } else if ((state->is_version(420, 310) || - state->ARB_shading_language_420pack_enable) && - base_type->is_image()) { - assert(ctx->Const.MaxImageUnits <= MAX_IMAGE_UNITS); - if (max_index >= ctx->Const.MaxImageUnits) { - _mesa_glsl_error(loc, state, "Image binding %d exceeds the " - " maximum number of image units (%d)", max_index, - ctx->Const.MaxImageUnits); - return; - } - - } else { - _mesa_glsl_error(loc, state, - "the \"binding\" qualifier only applies to uniform " - "blocks, opaque variables, or arrays thereof"); - return; - } - - var->data.explicit_binding = true; - var->data.binding = qual_binding; - - return; -} - - -static glsl_interp_qualifier -interpret_interpolation_qualifier(const struct ast_type_qualifier *qual, - ir_variable_mode mode, - struct _mesa_glsl_parse_state *state, - YYLTYPE *loc) -{ - glsl_interp_qualifier interpolation; - if (qual->flags.q.flat) - interpolation = INTERP_QUALIFIER_FLAT; - else if (qual->flags.q.noperspective) - interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; - else if (qual->flags.q.smooth) - interpolation = INTERP_QUALIFIER_SMOOTH; - else - interpolation = INTERP_QUALIFIER_NONE; - - if (interpolation != INTERP_QUALIFIER_NONE) { - if (mode != ir_var_shader_in && mode != ir_var_shader_out) { - _mesa_glsl_error(loc, state, - "interpolation qualifier `%s' can only be applied to " - "shader inputs or outputs.", - interpolation_string(interpolation)); - - } - - if ((state->stage == MESA_SHADER_VERTEX && mode == ir_var_shader_in) || - (state->stage == MESA_SHADER_FRAGMENT && mode == ir_var_shader_out)) { - _mesa_glsl_error(loc, state, - "interpolation qualifier `%s' cannot be applied to " - "vertex shader inputs or fragment shader outputs", - interpolation_string(interpolation)); - } - } - - return interpolation; -} - - -static void -apply_explicit_location(const struct ast_type_qualifier *qual, - ir_variable *var, - struct _mesa_glsl_parse_state *state, - YYLTYPE *loc) -{ - bool fail = false; - - unsigned qual_location; - if (!process_qualifier_constant(state, loc, "location", qual->location, - &qual_location)) { - return; - } - - /* Checks for GL_ARB_explicit_uniform_location. */ - if (qual->flags.q.uniform) { - if (!state->check_explicit_uniform_location_allowed(loc, var)) - return; - - const struct gl_context *const ctx = state->ctx; - unsigned max_loc = qual_location + var->type->uniform_locations() - 1; - - if (max_loc >= ctx->Const.MaxUserAssignableUniformLocations) { - _mesa_glsl_error(loc, state, "location(s) consumed by uniform %s " - ">= MAX_UNIFORM_LOCATIONS (%u)", var->name, - ctx->Const.MaxUserAssignableUniformLocations); - return; - } - - var->data.explicit_location = true; - var->data.location = qual_location; - return; - } - - /* Between GL_ARB_explicit_attrib_location an - * GL_ARB_separate_shader_objects, the inputs and outputs of any shader - * stage can be assigned explicit locations. The checking here associates - * the correct extension with the correct stage's input / output: - * - * input output - * ----- ------ - * vertex explicit_loc sso - * tess control sso sso - * tess eval sso sso - * geometry sso sso - * fragment sso explicit_loc - */ - switch (state->stage) { - case MESA_SHADER_VERTEX: - if (var->data.mode == ir_var_shader_in) { - if (!state->check_explicit_attrib_location_allowed(loc, var)) - return; - - break; - } - - if (var->data.mode == ir_var_shader_out) { - if (!state->check_separate_shader_objects_allowed(loc, var)) - return; - - break; - } - - fail = true; - break; - - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - if (var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) { - if (!state->check_separate_shader_objects_allowed(loc, var)) - return; - - break; - } - - fail = true; - break; - - case MESA_SHADER_FRAGMENT: - if (var->data.mode == ir_var_shader_in) { - if (!state->check_separate_shader_objects_allowed(loc, var)) - return; - - break; - } - - if (var->data.mode == ir_var_shader_out) { - if (!state->check_explicit_attrib_location_allowed(loc, var)) - return; - - break; - } - - fail = true; - break; - - case MESA_SHADER_COMPUTE: - _mesa_glsl_error(loc, state, - "compute shader variables cannot be given " - "explicit locations"); - return; - }; - - if (fail) { - _mesa_glsl_error(loc, state, - "%s cannot be given an explicit location in %s shader", - mode_string(var), - _mesa_shader_stage_to_string(state->stage)); - } else { - var->data.explicit_location = true; - - switch (state->stage) { - case MESA_SHADER_VERTEX: - var->data.location = (var->data.mode == ir_var_shader_in) - ? (qual_location + VERT_ATTRIB_GENERIC0) - : (qual_location + VARYING_SLOT_VAR0); - break; - - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - if (var->data.patch) - var->data.location = qual_location + VARYING_SLOT_PATCH0; - else - var->data.location = qual_location + VARYING_SLOT_VAR0; - break; - - case MESA_SHADER_FRAGMENT: - var->data.location = (var->data.mode == ir_var_shader_out) - ? (qual_location + FRAG_RESULT_DATA0) - : (qual_location + VARYING_SLOT_VAR0); - break; - case MESA_SHADER_COMPUTE: - assert(!"Unexpected shader type"); - break; - } - - /* Check if index was set for the uniform instead of the function */ - if (qual->flags.q.explicit_index && qual->flags.q.subroutine) { - _mesa_glsl_error(loc, state, "an index qualifier can only be " - "used with subroutine functions"); - return; - } - - unsigned qual_index; - if (qual->flags.q.explicit_index && - process_qualifier_constant(state, loc, "index", qual->index, - &qual_index)) { - /* From the GLSL 4.30 specification, section 4.4.2 (Output - * Layout Qualifiers): - * - * "It is also a compile-time error if a fragment shader - * sets a layout index to less than 0 or greater than 1." - * - * Older specifications don't mandate a behavior; we take - * this as a clarification and always generate the error. - */ - if (qual_index > 1) { - _mesa_glsl_error(loc, state, - "explicit index may only be 0 or 1"); - } else { - var->data.explicit_index = true; - var->data.index = qual_index; - } - } - } -} - -static void -apply_image_qualifier_to_variable(const struct ast_type_qualifier *qual, - ir_variable *var, - struct _mesa_glsl_parse_state *state, - YYLTYPE *loc) -{ - const glsl_type *base_type = var->type->without_array(); - - if (base_type->is_image()) { - if (var->data.mode != ir_var_uniform && - var->data.mode != ir_var_function_in) { - _mesa_glsl_error(loc, state, "image variables may only be declared as " - "function parameters or uniform-qualified " - "global variables"); - } - - var->data.image_read_only |= qual->flags.q.read_only; - var->data.image_write_only |= qual->flags.q.write_only; - var->data.image_coherent |= qual->flags.q.coherent; - var->data.image_volatile |= qual->flags.q._volatile; - var->data.image_restrict |= qual->flags.q.restrict_flag; - var->data.read_only = true; - - if (qual->flags.q.explicit_image_format) { - if (var->data.mode == ir_var_function_in) { - _mesa_glsl_error(loc, state, "format qualifiers cannot be " - "used on image function parameters"); - } - - if (qual->image_base_type != base_type->sampler_type) { - _mesa_glsl_error(loc, state, "format qualifier doesn't match the " - "base data type of the image"); - } - - var->data.image_format = qual->image_format; - } else { - if (var->data.mode == ir_var_uniform) { - if (state->es_shader) { - _mesa_glsl_error(loc, state, "all image uniforms " - "must have a format layout qualifier"); - - } else if (!qual->flags.q.write_only) { - _mesa_glsl_error(loc, state, "image uniforms not qualified with " - "`writeonly' must have a format layout " - "qualifier"); - } - } - - var->data.image_format = GL_NONE; - } - - /* From page 70 of the GLSL ES 3.1 specification: - * - * "Except for image variables qualified with the format qualifiers - * r32f, r32i, and r32ui, image variables must specify either memory - * qualifier readonly or the memory qualifier writeonly." - */ - if (state->es_shader && - var->data.image_format != GL_R32F && - var->data.image_format != GL_R32I && - var->data.image_format != GL_R32UI && - !var->data.image_read_only && - !var->data.image_write_only) { - _mesa_glsl_error(loc, state, "image variables of format other than " - "r32f, r32i or r32ui must be qualified `readonly' or " - "`writeonly'"); - } - - } else if (qual->flags.q.read_only || - qual->flags.q.write_only || - qual->flags.q.coherent || - qual->flags.q._volatile || - qual->flags.q.restrict_flag || - qual->flags.q.explicit_image_format) { - _mesa_glsl_error(loc, state, "memory qualifiers may only be applied to " - "images"); - } -} - -static inline const char* -get_layout_qualifier_string(bool origin_upper_left, bool pixel_center_integer) -{ - if (origin_upper_left && pixel_center_integer) - return "origin_upper_left, pixel_center_integer"; - else if (origin_upper_left) - return "origin_upper_left"; - else if (pixel_center_integer) - return "pixel_center_integer"; - else - return " "; -} - -static inline bool -is_conflicting_fragcoord_redeclaration(struct _mesa_glsl_parse_state *state, - const struct ast_type_qualifier *qual) -{ - /* If gl_FragCoord was previously declared, and the qualifiers were - * different in any way, return true. - */ - if (state->fs_redeclares_gl_fragcoord) { - return (state->fs_pixel_center_integer != qual->flags.q.pixel_center_integer - || state->fs_origin_upper_left != qual->flags.q.origin_upper_left); - } - - return false; -} - -static inline void -validate_array_dimensions(const glsl_type *t, - struct _mesa_glsl_parse_state *state, - YYLTYPE *loc) { - if (t->is_array()) { - t = t->fields.array; - while (t->is_array()) { - if (t->is_unsized_array()) { - _mesa_glsl_error(loc, state, - "only the outermost array dimension can " - "be unsized", - t->name); - break; - } - t = t->fields.array; - } - } -} - -static void -apply_layout_qualifier_to_variable(const struct ast_type_qualifier *qual, - ir_variable *var, - struct _mesa_glsl_parse_state *state, - YYLTYPE *loc) -{ - if (var->name != NULL && strcmp(var->name, "gl_FragCoord") == 0) { - - /* Section 4.3.8.1, page 39 of GLSL 1.50 spec says: - * - * "Within any shader, the first redeclarations of gl_FragCoord - * must appear before any use of gl_FragCoord." - * - * Generate a compiler error if above condition is not met by the - * fragment shader. - */ - ir_variable *earlier = state->symbols->get_variable("gl_FragCoord"); - if (earlier != NULL && - earlier->data.used && - !state->fs_redeclares_gl_fragcoord) { - _mesa_glsl_error(loc, state, - "gl_FragCoord used before its first redeclaration " - "in fragment shader"); - } - - /* Make sure all gl_FragCoord redeclarations specify the same layout - * qualifiers. - */ - if (is_conflicting_fragcoord_redeclaration(state, qual)) { - const char *const qual_string = - get_layout_qualifier_string(qual->flags.q.origin_upper_left, - qual->flags.q.pixel_center_integer); - - const char *const state_string = - get_layout_qualifier_string(state->fs_origin_upper_left, - state->fs_pixel_center_integer); - - _mesa_glsl_error(loc, state, - "gl_FragCoord redeclared with different layout " - "qualifiers (%s) and (%s) ", - state_string, - qual_string); - } - state->fs_origin_upper_left = qual->flags.q.origin_upper_left; - state->fs_pixel_center_integer = qual->flags.q.pixel_center_integer; - state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = - !qual->flags.q.origin_upper_left && !qual->flags.q.pixel_center_integer; - state->fs_redeclares_gl_fragcoord = - state->fs_origin_upper_left || - state->fs_pixel_center_integer || - state->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; - } - - var->data.pixel_center_integer = qual->flags.q.pixel_center_integer; - var->data.origin_upper_left = qual->flags.q.origin_upper_left; - if ((qual->flags.q.origin_upper_left || qual->flags.q.pixel_center_integer) - && (strcmp(var->name, "gl_FragCoord") != 0)) { - const char *const qual_string = (qual->flags.q.origin_upper_left) - ? "origin_upper_left" : "pixel_center_integer"; - - _mesa_glsl_error(loc, state, - "layout qualifier `%s' can only be applied to " - "fragment shader input `gl_FragCoord'", - qual_string); - } - - if (qual->flags.q.explicit_location) { - apply_explicit_location(qual, var, state, loc); - } else if (qual->flags.q.explicit_index) { - if (!qual->flags.q.subroutine_def) - _mesa_glsl_error(loc, state, - "explicit index requires explicit location"); - } - - if (qual->flags.q.explicit_binding) { - apply_explicit_binding(state, loc, var, var->type, qual); - } - - if (state->stage == MESA_SHADER_GEOMETRY && - qual->flags.q.out && qual->flags.q.stream) { - unsigned qual_stream; - if (process_qualifier_constant(state, loc, "stream", qual->stream, - &qual_stream) && - validate_stream_qualifier(loc, state, qual_stream)) { - var->data.stream = qual_stream; - } - } - - if (var->type->contains_atomic()) { - if (var->data.mode == ir_var_uniform) { - if (var->data.explicit_binding) { - unsigned *offset = - &state->atomic_counter_offsets[var->data.binding]; - - if (*offset % ATOMIC_COUNTER_SIZE) - _mesa_glsl_error(loc, state, - "misaligned atomic counter offset"); - - var->data.offset = *offset; - *offset += var->type->atomic_size(); - - } else { - _mesa_glsl_error(loc, state, - "atomic counters require explicit binding point"); - } - } else if (var->data.mode != ir_var_function_in) { - _mesa_glsl_error(loc, state, "atomic counters may only be declared as " - "function parameters or uniform-qualified " - "global variables"); - } - } - - /* Is the 'layout' keyword used with parameters that allow relaxed checking. - * Many implementations of GL_ARB_fragment_coord_conventions_enable and some - * implementations (only Mesa?) GL_ARB_explicit_attrib_location_enable - * allowed the layout qualifier to be used with 'varying' and 'attribute'. - * These extensions and all following extensions that add the 'layout' - * keyword have been modified to require the use of 'in' or 'out'. - * - * The following extension do not allow the deprecated keywords: - * - * GL_AMD_conservative_depth - * GL_ARB_conservative_depth - * GL_ARB_gpu_shader5 - * GL_ARB_separate_shader_objects - * GL_ARB_tessellation_shader - * GL_ARB_transform_feedback3 - * GL_ARB_uniform_buffer_object - * - * It is unknown whether GL_EXT_shader_image_load_store or GL_NV_gpu_shader5 - * allow layout with the deprecated keywords. - */ - const bool relaxed_layout_qualifier_checking = - state->ARB_fragment_coord_conventions_enable; - - const bool uses_deprecated_qualifier = qual->flags.q.attribute - || qual->flags.q.varying; - if (qual->has_layout() && uses_deprecated_qualifier) { - if (relaxed_layout_qualifier_checking) { - _mesa_glsl_warning(loc, state, - "`layout' qualifier may not be used with " - "`attribute' or `varying'"); - } else { - _mesa_glsl_error(loc, state, - "`layout' qualifier may not be used with " - "`attribute' or `varying'"); - } - } - - /* Layout qualifiers for gl_FragDepth, which are enabled by extension - * AMD_conservative_depth. - */ - int depth_layout_count = qual->flags.q.depth_any - + qual->flags.q.depth_greater - + qual->flags.q.depth_less - + qual->flags.q.depth_unchanged; - if (depth_layout_count > 0 - && !state->AMD_conservative_depth_enable - && !state->ARB_conservative_depth_enable) { - _mesa_glsl_error(loc, state, - "extension GL_AMD_conservative_depth or " - "GL_ARB_conservative_depth must be enabled " - "to use depth layout qualifiers"); - } else if (depth_layout_count > 0 - && strcmp(var->name, "gl_FragDepth") != 0) { - _mesa_glsl_error(loc, state, - "depth layout qualifiers can be applied only to " - "gl_FragDepth"); - } else if (depth_layout_count > 1 - && strcmp(var->name, "gl_FragDepth") == 0) { - _mesa_glsl_error(loc, state, - "at most one depth layout qualifier can be applied to " - "gl_FragDepth"); - } - if (qual->flags.q.depth_any) - var->data.depth_layout = ir_depth_layout_any; - else if (qual->flags.q.depth_greater) - var->data.depth_layout = ir_depth_layout_greater; - else if (qual->flags.q.depth_less) - var->data.depth_layout = ir_depth_layout_less; - else if (qual->flags.q.depth_unchanged) - var->data.depth_layout = ir_depth_layout_unchanged; - else - var->data.depth_layout = ir_depth_layout_none; - - if (qual->flags.q.std140 || - qual->flags.q.std430 || - qual->flags.q.packed || - qual->flags.q.shared) { - _mesa_glsl_error(loc, state, - "uniform and shader storage block layout qualifiers " - "std140, std430, packed, and shared can only be " - "applied to uniform or shader storage blocks, not " - "members"); - } - - if (qual->flags.q.row_major || qual->flags.q.column_major) { - validate_matrix_layout_for_type(state, loc, var->type, var); - } - - /* From section 4.4.1.3 of the GLSL 4.50 specification (Fragment Shader - * Inputs): - * - * "Fragment shaders also allow the following layout qualifier on in only - * (not with variable declarations) - * layout-qualifier-id - * early_fragment_tests - * [...]" - */ - if (qual->flags.q.early_fragment_tests) { - _mesa_glsl_error(loc, state, "early_fragment_tests layout qualifier only " - "valid in fragment shader input layout declaration."); - } -} - -static void -apply_type_qualifier_to_variable(const struct ast_type_qualifier *qual, - ir_variable *var, - struct _mesa_glsl_parse_state *state, - YYLTYPE *loc, - bool is_parameter) -{ - STATIC_ASSERT(sizeof(qual->flags.q) <= sizeof(qual->flags.i)); - - if (qual->flags.q.invariant) { - if (var->data.used) { - _mesa_glsl_error(loc, state, - "variable `%s' may not be redeclared " - "`invariant' after being used", - var->name); - } else { - var->data.invariant = 1; - } - } - - if (qual->flags.q.precise) { - if (var->data.used) { - _mesa_glsl_error(loc, state, - "variable `%s' may not be redeclared " - "`precise' after being used", - var->name); - } else { - var->data.precise = 1; - } - } - - if (qual->flags.q.subroutine && !qual->flags.q.uniform) { - _mesa_glsl_error(loc, state, - "`subroutine' may only be applied to uniforms, " - "subroutine type declarations, or function definitions"); - } - - if (qual->flags.q.constant || qual->flags.q.attribute - || qual->flags.q.uniform - || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT))) - var->data.read_only = 1; - - if (qual->flags.q.centroid) - var->data.centroid = 1; - - if (qual->flags.q.sample) - var->data.sample = 1; - - /* Precision qualifiers do not hold any meaning in Desktop GLSL */ - if (state->es_shader) { - var->data.precision = - select_gles_precision(qual->precision, var->type, state, loc); - } - - if (qual->flags.q.patch) - var->data.patch = 1; - - if (qual->flags.q.attribute && state->stage != MESA_SHADER_VERTEX) { - var->type = glsl_type::error_type; - _mesa_glsl_error(loc, state, - "`attribute' variables may not be declared in the " - "%s shader", - _mesa_shader_stage_to_string(state->stage)); - } - - /* Disallow layout qualifiers which may only appear on layout declarations. */ - if (qual->flags.q.prim_type) { - _mesa_glsl_error(loc, state, - "Primitive type may only be specified on GS input or output " - "layout declaration, not on variables."); - } - - /* Section 6.1.1 (Function Calling Conventions) of the GLSL 1.10 spec says: - * - * "However, the const qualifier cannot be used with out or inout." - * - * The same section of the GLSL 4.40 spec further clarifies this saying: - * - * "The const qualifier cannot be used with out or inout, or a - * compile-time error results." - */ - if (is_parameter && qual->flags.q.constant && qual->flags.q.out) { - _mesa_glsl_error(loc, state, - "`const' may not be applied to `out' or `inout' " - "function parameters"); - } - - /* If there is no qualifier that changes the mode of the variable, leave - * the setting alone. - */ - assert(var->data.mode != ir_var_temporary); - if (qual->flags.q.in && qual->flags.q.out) - var->data.mode = ir_var_function_inout; - else if (qual->flags.q.in) - var->data.mode = is_parameter ? ir_var_function_in : ir_var_shader_in; - else if (qual->flags.q.attribute - || (qual->flags.q.varying && (state->stage == MESA_SHADER_FRAGMENT))) - var->data.mode = ir_var_shader_in; - else if (qual->flags.q.out) - var->data.mode = is_parameter ? ir_var_function_out : ir_var_shader_out; - else if (qual->flags.q.varying && (state->stage == MESA_SHADER_VERTEX)) - var->data.mode = ir_var_shader_out; - else if (qual->flags.q.uniform) - var->data.mode = ir_var_uniform; - else if (qual->flags.q.buffer) - var->data.mode = ir_var_shader_storage; - else if (qual->flags.q.shared_storage) - var->data.mode = ir_var_shader_shared; - - if (!is_parameter && is_varying_var(var, state->stage)) { - /* User-defined ins/outs are not permitted in compute shaders. */ - if (state->stage == MESA_SHADER_COMPUTE) { - _mesa_glsl_error(loc, state, - "user-defined input and output variables are not " - "permitted in compute shaders"); - } - - /* This variable is being used to link data between shader stages (in - * pre-glsl-1.30 parlance, it's a "varying"). Check that it has a type - * that is allowed for such purposes. - * - * From page 25 (page 31 of the PDF) of the GLSL 1.10 spec: - * - * "The varying qualifier can be used only with the data types - * float, vec2, vec3, vec4, mat2, mat3, and mat4, or arrays of - * these." - * - * This was relaxed in GLSL version 1.30 and GLSL ES version 3.00. From - * page 31 (page 37 of the PDF) of the GLSL 1.30 spec: - * - * "Fragment inputs can only be signed and unsigned integers and - * integer vectors, float, floating-point vectors, matrices, or - * arrays of these. Structures cannot be input. - * - * Similar text exists in the section on vertex shader outputs. - * - * Similar text exists in the GLSL ES 3.00 spec, except that the GLSL ES - * 3.00 spec allows structs as well. Varying structs are also allowed - * in GLSL 1.50. - */ - switch (var->type->get_scalar_type()->base_type) { - case GLSL_TYPE_FLOAT: - /* Ok in all GLSL versions */ - break; - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - if (state->is_version(130, 300)) - break; - _mesa_glsl_error(loc, state, - "varying variables must be of base type float in %s", - state->get_version_string()); - break; - case GLSL_TYPE_STRUCT: - if (state->is_version(150, 300)) - break; - _mesa_glsl_error(loc, state, - "varying variables may not be of type struct"); - break; - case GLSL_TYPE_DOUBLE: - break; - default: - _mesa_glsl_error(loc, state, "illegal type for a varying variable"); - break; - } - } - - if (state->all_invariant && (state->current_function == NULL)) { - switch (state->stage) { - case MESA_SHADER_VERTEX: - if (var->data.mode == ir_var_shader_out) - var->data.invariant = true; - break; - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - if ((var->data.mode == ir_var_shader_in) - || (var->data.mode == ir_var_shader_out)) - var->data.invariant = true; - break; - case MESA_SHADER_FRAGMENT: - if (var->data.mode == ir_var_shader_in) - var->data.invariant = true; - break; - case MESA_SHADER_COMPUTE: - /* Invariance isn't meaningful in compute shaders. */ - break; - } - } - - var->data.interpolation = - interpret_interpolation_qualifier(qual, (ir_variable_mode) var->data.mode, - state, loc); - - /* Does the declaration use the deprecated 'attribute' or 'varying' - * keywords? - */ - const bool uses_deprecated_qualifier = qual->flags.q.attribute - || qual->flags.q.varying; - - - /* Validate auxiliary storage qualifiers */ - - /* From section 4.3.4 of the GLSL 1.30 spec: - * "It is an error to use centroid in in a vertex shader." - * - * From section 4.3.4 of the GLSL ES 3.00 spec: - * "It is an error to use centroid in or interpolation qualifiers in - * a vertex shader input." - */ - - /* Section 4.3.6 of the GLSL 1.30 specification states: - * "It is an error to use centroid out in a fragment shader." - * - * The GL_ARB_shading_language_420pack extension specification states: - * "It is an error to use auxiliary storage qualifiers or interpolation - * qualifiers on an output in a fragment shader." - */ - if (qual->flags.q.sample && (!is_varying_var(var, state->stage) || uses_deprecated_qualifier)) { - _mesa_glsl_error(loc, state, - "sample qualifier may only be used on `in` or `out` " - "variables between shader stages"); - } - if (qual->flags.q.centroid && !is_varying_var(var, state->stage)) { - _mesa_glsl_error(loc, state, - "centroid qualifier may only be used with `in', " - "`out' or `varying' variables between shader stages"); - } - - if (qual->flags.q.shared_storage && state->stage != MESA_SHADER_COMPUTE) { - _mesa_glsl_error(loc, state, - "the shared storage qualifiers can only be used with " - "compute shaders"); - } - - apply_image_qualifier_to_variable(qual, var, state, loc); -} - -/** - * Get the variable that is being redeclared by this declaration - * - * Semantic checks to verify the validity of the redeclaration are also - * performed. If semantic checks fail, compilation error will be emitted via - * \c _mesa_glsl_error, but a non-\c NULL pointer will still be returned. - * - * \returns - * A pointer to an existing variable in the current scope if the declaration - * is a redeclaration, \c NULL otherwise. - */ -static ir_variable * -get_variable_being_redeclared(ir_variable *var, YYLTYPE loc, - struct _mesa_glsl_parse_state *state, - bool allow_all_redeclarations) -{ - /* Check if this declaration is actually a re-declaration, either to - * resize an array or add qualifiers to an existing variable. - * - * This is allowed for variables in the current scope, or when at - * global scope (for built-ins in the implicit outer scope). - */ - ir_variable *earlier = state->symbols->get_variable(var->name); - if (earlier == NULL || - (state->current_function != NULL && - !state->symbols->name_declared_this_scope(var->name))) { - return NULL; - } - - - /* From page 24 (page 30 of the PDF) of the GLSL 1.50 spec, - * - * "It is legal to declare an array without a size and then - * later re-declare the same name as an array of the same - * type and specify a size." - */ - if (earlier->type->is_unsized_array() && var->type->is_array() - && (var->type->fields.array == earlier->type->fields.array)) { - /* FINISHME: This doesn't match the qualifiers on the two - * FINISHME: declarations. It's not 100% clear whether this is - * FINISHME: required or not. - */ - - const unsigned size = unsigned(var->type->array_size()); - check_builtin_array_max_size(var->name, size, loc, state); - if ((size > 0) && (size <= earlier->data.max_array_access)) { - _mesa_glsl_error(& loc, state, "array size must be > %u due to " - "previous access", - earlier->data.max_array_access); - } - - earlier->type = var->type; - delete var; - var = NULL; - } else if ((state->ARB_fragment_coord_conventions_enable || - state->is_version(150, 0)) - && strcmp(var->name, "gl_FragCoord") == 0 - && earlier->type == var->type - && var->data.mode == ir_var_shader_in) { - /* Allow redeclaration of gl_FragCoord for ARB_fcc layout - * qualifiers. - */ - earlier->data.origin_upper_left = var->data.origin_upper_left; - earlier->data.pixel_center_integer = var->data.pixel_center_integer; - - /* According to section 4.3.7 of the GLSL 1.30 spec, - * the following built-in varaibles can be redeclared with an - * interpolation qualifier: - * * gl_FrontColor - * * gl_BackColor - * * gl_FrontSecondaryColor - * * gl_BackSecondaryColor - * * gl_Color - * * gl_SecondaryColor - */ - } else if (state->is_version(130, 0) - && (strcmp(var->name, "gl_FrontColor") == 0 - || strcmp(var->name, "gl_BackColor") == 0 - || strcmp(var->name, "gl_FrontSecondaryColor") == 0 - || strcmp(var->name, "gl_BackSecondaryColor") == 0 - || strcmp(var->name, "gl_Color") == 0 - || strcmp(var->name, "gl_SecondaryColor") == 0) - && earlier->type == var->type - && earlier->data.mode == var->data.mode) { - earlier->data.interpolation = var->data.interpolation; - - /* Layout qualifiers for gl_FragDepth. */ - } else if ((state->AMD_conservative_depth_enable || - state->ARB_conservative_depth_enable) - && strcmp(var->name, "gl_FragDepth") == 0 - && earlier->type == var->type - && earlier->data.mode == var->data.mode) { - - /** From the AMD_conservative_depth spec: - * Within any shader, the first redeclarations of gl_FragDepth - * must appear before any use of gl_FragDepth. - */ - if (earlier->data.used) { - _mesa_glsl_error(&loc, state, - "the first redeclaration of gl_FragDepth " - "must appear before any use of gl_FragDepth"); - } - - /* Prevent inconsistent redeclaration of depth layout qualifier. */ - if (earlier->data.depth_layout != ir_depth_layout_none - && earlier->data.depth_layout != var->data.depth_layout) { - _mesa_glsl_error(&loc, state, - "gl_FragDepth: depth layout is declared here " - "as '%s, but it was previously declared as " - "'%s'", - depth_layout_string(var->data.depth_layout), - depth_layout_string(earlier->data.depth_layout)); - } - - earlier->data.depth_layout = var->data.depth_layout; - - } else if (allow_all_redeclarations) { - if (earlier->data.mode != var->data.mode) { - _mesa_glsl_error(&loc, state, - "redeclaration of `%s' with incorrect qualifiers", - var->name); - } else if (earlier->type != var->type) { - _mesa_glsl_error(&loc, state, - "redeclaration of `%s' has incorrect type", - var->name); - } - } else { - _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name); - } - - return earlier; -} - -/** - * Generate the IR for an initializer in a variable declaration - */ -ir_rvalue * -process_initializer(ir_variable *var, ast_declaration *decl, - ast_fully_specified_type *type, - exec_list *initializer_instructions, - struct _mesa_glsl_parse_state *state) -{ - ir_rvalue *result = NULL; - - YYLTYPE initializer_loc = decl->initializer->get_location(); - - /* From page 24 (page 30 of the PDF) of the GLSL 1.10 spec: - * - * "All uniform variables are read-only and are initialized either - * directly by an application via API commands, or indirectly by - * OpenGL." - */ - if (var->data.mode == ir_var_uniform) { - state->check_version(120, 0, &initializer_loc, - "cannot initialize uniform %s", - var->name); - } - - /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec: - * - * "Buffer variables cannot have initializers." - */ - if (var->data.mode == ir_var_shader_storage) { - _mesa_glsl_error(&initializer_loc, state, - "cannot initialize buffer variable %s", - var->name); - } - - /* From section 4.1.7 of the GLSL 4.40 spec: - * - * "Opaque variables [...] are initialized only through the - * OpenGL API; they cannot be declared with an initializer in a - * shader." - */ - if (var->type->contains_opaque()) { - _mesa_glsl_error(&initializer_loc, state, - "cannot initialize opaque variable %s", - var->name); - } - - if ((var->data.mode == ir_var_shader_in) && (state->current_function == NULL)) { - _mesa_glsl_error(&initializer_loc, state, - "cannot initialize %s shader input / %s %s", - _mesa_shader_stage_to_string(state->stage), - (state->stage == MESA_SHADER_VERTEX) - ? "attribute" : "varying", - var->name); - } - - if (var->data.mode == ir_var_shader_out && state->current_function == NULL) { - _mesa_glsl_error(&initializer_loc, state, - "cannot initialize %s shader output %s", - _mesa_shader_stage_to_string(state->stage), - var->name); - } - - /* If the initializer is an ast_aggregate_initializer, recursively store - * type information from the LHS into it, so that its hir() function can do - * type checking. - */ - if (decl->initializer->oper == ast_aggregate) - _mesa_ast_set_aggregate_type(var->type, decl->initializer); - - ir_dereference *const lhs = new(state) ir_dereference_variable(var); - ir_rvalue *rhs = decl->initializer->hir(initializer_instructions, state); - - /* Calculate the constant value if this is a const or uniform - * declaration. - * - * Section 4.3 (Storage Qualifiers) of the GLSL ES 1.00.17 spec says: - * - * "Declarations of globals without a storage qualifier, or with - * just the const qualifier, may include initializers, in which case - * they will be initialized before the first line of main() is - * executed. Such initializers must be a constant expression." - * - * The same section of the GLSL ES 3.00.4 spec has similar language. - */ - if (type->qualifier.flags.q.constant - || type->qualifier.flags.q.uniform - || (state->es_shader && state->current_function == NULL)) { - ir_rvalue *new_rhs = validate_assignment(state, initializer_loc, - lhs, rhs, true); - if (new_rhs != NULL) { - rhs = new_rhs; - - /* Section 4.3.3 (Constant Expressions) of the GLSL ES 3.00.4 spec - * says: - * - * "A constant expression is one of - * - * ... - * - * - an expression formed by an operator on operands that are - * all constant expressions, including getting an element of - * a constant array, or a field of a constant structure, or - * components of a constant vector. However, the sequence - * operator ( , ) and the assignment operators ( =, +=, ...) - * are not included in the operators that can create a - * constant expression." - * - * Section 12.43 (Sequence operator and constant expressions) says: - * - * "Should the following construct be allowed? - * - * float a[2,3]; - * - * The expression within the brackets uses the sequence operator - * (',') and returns the integer 3 so the construct is declaring - * a single-dimensional array of size 3. In some languages, the - * construct declares a two-dimensional array. It would be - * preferable to make this construct illegal to avoid confusion. - * - * One possibility is to change the definition of the sequence - * operator so that it does not return a constant-expression and - * hence cannot be used to declare an array size. - * - * RESOLUTION: The result of a sequence operator is not a - * constant-expression." - * - * Section 4.3.3 (Constant Expressions) of the GLSL 4.30.9 spec - * contains language almost identical to the section 4.3.3 in the - * GLSL ES 3.00.4 spec. This is a new limitation for these GLSL - * versions. - */ - ir_constant *constant_value = rhs->constant_expression_value(); - if (!constant_value || - (state->is_version(430, 300) && - decl->initializer->has_sequence_subexpression())) { - const char *const variable_mode = - (type->qualifier.flags.q.constant) - ? "const" - : ((type->qualifier.flags.q.uniform) ? "uniform" : "global"); - - /* If ARB_shading_language_420pack is enabled, initializers of - * const-qualified local variables do not have to be constant - * expressions. Const-qualified global variables must still be - * initialized with constant expressions. - */ - if (!state->has_420pack() - || state->current_function == NULL) { - _mesa_glsl_error(& initializer_loc, state, - "initializer of %s variable `%s' must be a " - "constant expression", - variable_mode, - decl->identifier); - if (var->type->is_numeric()) { - /* Reduce cascading errors. */ - var->constant_value = type->qualifier.flags.q.constant - ? ir_constant::zero(state, var->type) : NULL; - } - } - } else { - rhs = constant_value; - var->constant_value = type->qualifier.flags.q.constant - ? constant_value : NULL; - } - } else { - if (var->type->is_numeric()) { - /* Reduce cascading errors. */ - var->constant_value = type->qualifier.flags.q.constant - ? ir_constant::zero(state, var->type) : NULL; - } - } - } - - if (rhs && !rhs->type->is_error()) { - bool temp = var->data.read_only; - if (type->qualifier.flags.q.constant) - var->data.read_only = false; - - /* Never emit code to initialize a uniform. - */ - const glsl_type *initializer_type; - if (!type->qualifier.flags.q.uniform) { - do_assignment(initializer_instructions, state, - NULL, - lhs, rhs, - &result, true, - true, - type->get_location()); - initializer_type = result->type; - } else - initializer_type = rhs->type; - - var->constant_initializer = rhs->constant_expression_value(); - var->data.has_initializer = true; - - /* If the declared variable is an unsized array, it must inherrit - * its full type from the initializer. A declaration such as - * - * uniform float a[] = float[](1.0, 2.0, 3.0, 3.0); - * - * becomes - * - * uniform float a[4] = float[](1.0, 2.0, 3.0, 3.0); - * - * The assignment generated in the if-statement (below) will also - * automatically handle this case for non-uniforms. - * - * If the declared variable is not an array, the types must - * already match exactly. As a result, the type assignment - * here can be done unconditionally. For non-uniforms the call - * to do_assignment can change the type of the initializer (via - * the implicit conversion rules). For uniforms the initializer - * must be a constant expression, and the type of that expression - * was validated above. - */ - var->type = initializer_type; - - var->data.read_only = temp; - } - - return result; -} - -static void -validate_layout_qualifier_vertex_count(struct _mesa_glsl_parse_state *state, - YYLTYPE loc, ir_variable *var, - unsigned num_vertices, - unsigned *size, - const char *var_category) -{ - if (var->type->is_unsized_array()) { - /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec says: - * - * All geometry shader input unsized array declarations will be - * sized by an earlier input layout qualifier, when present, as per - * the following table. - * - * Followed by a table mapping each allowed input layout qualifier to - * the corresponding input length. - * - * Similarly for tessellation control shader outputs. - */ - if (num_vertices != 0) - var->type = glsl_type::get_array_instance(var->type->fields.array, - num_vertices); - } else { - /* Section 4.3.8.1 (Input Layout Qualifiers) of the GLSL 1.50 spec - * includes the following examples of compile-time errors: - * - * // code sequence within one shader... - * in vec4 Color1[]; // size unknown - * ...Color1.length()...// illegal, length() unknown - * in vec4 Color2[2]; // size is 2 - * ...Color1.length()...// illegal, Color1 still has no size - * in vec4 Color3[3]; // illegal, input sizes are inconsistent - * layout(lines) in; // legal, input size is 2, matching - * in vec4 Color4[3]; // illegal, contradicts layout - * ... - * - * To detect the case illustrated by Color3, we verify that the size of - * an explicitly-sized array matches the size of any previously declared - * explicitly-sized array. To detect the case illustrated by Color4, we - * verify that the size of an explicitly-sized array is consistent with - * any previously declared input layout. - */ - if (num_vertices != 0 && var->type->length != num_vertices) { - _mesa_glsl_error(&loc, state, - "%s size contradicts previously declared layout " - "(size is %u, but layout requires a size of %u)", - var_category, var->type->length, num_vertices); - } else if (*size != 0 && var->type->length != *size) { - _mesa_glsl_error(&loc, state, - "%s sizes are inconsistent (size is %u, but a " - "previous declaration has size %u)", - var_category, var->type->length, *size); - } else { - *size = var->type->length; - } - } -} - -static void -handle_tess_ctrl_shader_output_decl(struct _mesa_glsl_parse_state *state, - YYLTYPE loc, ir_variable *var) -{ - unsigned num_vertices = 0; - - if (state->tcs_output_vertices_specified) { - if (!state->out_qualifier->vertices-> - process_qualifier_constant(state, "vertices", - &num_vertices, false)) { - return; - } - - if (num_vertices > state->Const.MaxPatchVertices) { - _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " - "GL_MAX_PATCH_VERTICES", num_vertices); - return; - } - } - - if (!var->type->is_array() && !var->data.patch) { - _mesa_glsl_error(&loc, state, - "tessellation control shader outputs must be arrays"); - - /* To avoid cascading failures, short circuit the checks below. */ - return; - } - - if (var->data.patch) - return; - - validate_layout_qualifier_vertex_count(state, loc, var, num_vertices, - &state->tcs_output_size, - "tessellation control shader output"); -} - -/** - * Do additional processing necessary for tessellation control/evaluation shader - * input declarations. This covers both interface block arrays and bare input - * variables. - */ -static void -handle_tess_shader_input_decl(struct _mesa_glsl_parse_state *state, - YYLTYPE loc, ir_variable *var) -{ - if (!var->type->is_array() && !var->data.patch) { - _mesa_glsl_error(&loc, state, - "per-vertex tessellation shader inputs must be arrays"); - /* Avoid cascading failures. */ - return; - } - - if (var->data.patch) - return; - - /* Unsized arrays are implicitly sized to gl_MaxPatchVertices. */ - if (var->type->is_unsized_array()) { - var->type = glsl_type::get_array_instance(var->type->fields.array, - state->Const.MaxPatchVertices); - } -} - - -/** - * Do additional processing necessary for geometry shader input declarations - * (this covers both interface blocks arrays and bare input variables). - */ -static void -handle_geometry_shader_input_decl(struct _mesa_glsl_parse_state *state, - YYLTYPE loc, ir_variable *var) -{ - unsigned num_vertices = 0; - - if (state->gs_input_prim_type_specified) { - num_vertices = vertices_per_prim(state->in_qualifier->prim_type); - } - - /* Geometry shader input variables must be arrays. Caller should have - * reported an error for this. - */ - if (!var->type->is_array()) { - assert(state->error); - - /* To avoid cascading failures, short circuit the checks below. */ - return; - } - - validate_layout_qualifier_vertex_count(state, loc, var, num_vertices, - &state->gs_input_size, - "geometry shader input"); -} - -void -validate_identifier(const char *identifier, YYLTYPE loc, - struct _mesa_glsl_parse_state *state) -{ - /* From page 15 (page 21 of the PDF) of the GLSL 1.10 spec, - * - * "Identifiers starting with "gl_" are reserved for use by - * OpenGL, and may not be declared in a shader as either a - * variable or a function." - */ - if (is_gl_identifier(identifier)) { - _mesa_glsl_error(&loc, state, - "identifier `%s' uses reserved `gl_' prefix", - identifier); - } else if (strstr(identifier, "__")) { - /* From page 14 (page 20 of the PDF) of the GLSL 1.10 - * spec: - * - * "In addition, all identifiers containing two - * consecutive underscores (__) are reserved as - * possible future keywords." - * - * The intention is that names containing __ are reserved for internal - * use by the implementation, and names prefixed with GL_ are reserved - * for use by Khronos. Names simply containing __ are dangerous to use, - * but should be allowed. - * - * A future version of the GLSL specification will clarify this. - */ - _mesa_glsl_warning(&loc, state, - "identifier `%s' uses reserved `__' string", - identifier); - } -} - -ir_rvalue * -ast_declarator_list::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - const struct glsl_type *decl_type; - const char *type_name = NULL; - ir_rvalue *result = NULL; - YYLTYPE loc = this->get_location(); - - /* From page 46 (page 52 of the PDF) of the GLSL 1.50 spec: - * - * "To ensure that a particular output variable is invariant, it is - * necessary to use the invariant qualifier. It can either be used to - * qualify a previously declared variable as being invariant - * - * invariant gl_Position; // make existing gl_Position be invariant" - * - * In these cases the parser will set the 'invariant' flag in the declarator - * list, and the type will be NULL. - */ - if (this->invariant) { - assert(this->type == NULL); - - if (state->current_function != NULL) { - _mesa_glsl_error(& loc, state, - "all uses of `invariant' keyword must be at global " - "scope"); - } - - foreach_list_typed (ast_declaration, decl, link, &this->declarations) { - assert(decl->array_specifier == NULL); - assert(decl->initializer == NULL); - - ir_variable *const earlier = - state->symbols->get_variable(decl->identifier); - if (earlier == NULL) { - _mesa_glsl_error(& loc, state, - "undeclared variable `%s' cannot be marked " - "invariant", decl->identifier); - } else if (!is_varying_var(earlier, state->stage)) { - _mesa_glsl_error(&loc, state, - "`%s' cannot be marked invariant; interfaces between " - "shader stages only.", decl->identifier); - } else if (earlier->data.used) { - _mesa_glsl_error(& loc, state, - "variable `%s' may not be redeclared " - "`invariant' after being used", - earlier->name); - } else { - earlier->data.invariant = true; - } - } - - /* Invariant redeclarations do not have r-values. - */ - return NULL; - } - - if (this->precise) { - assert(this->type == NULL); - - foreach_list_typed (ast_declaration, decl, link, &this->declarations) { - assert(decl->array_specifier == NULL); - assert(decl->initializer == NULL); - - ir_variable *const earlier = - state->symbols->get_variable(decl->identifier); - if (earlier == NULL) { - _mesa_glsl_error(& loc, state, - "undeclared variable `%s' cannot be marked " - "precise", decl->identifier); - } else if (state->current_function != NULL && - !state->symbols->name_declared_this_scope(decl->identifier)) { - /* Note: we have to check if we're in a function, since - * builtins are treated as having come from another scope. - */ - _mesa_glsl_error(& loc, state, - "variable `%s' from an outer scope may not be " - "redeclared `precise' in this scope", - earlier->name); - } else if (earlier->data.used) { - _mesa_glsl_error(& loc, state, - "variable `%s' may not be redeclared " - "`precise' after being used", - earlier->name); - } else { - earlier->data.precise = true; - } - } - - /* Precise redeclarations do not have r-values either. */ - return NULL; - } - - assert(this->type != NULL); - assert(!this->invariant); - assert(!this->precise); - - /* The type specifier may contain a structure definition. Process that - * before any of the variable declarations. - */ - (void) this->type->specifier->hir(instructions, state); - - decl_type = this->type->glsl_type(& type_name, state); - - /* Section 4.3.7 "Buffer Variables" of the GLSL 4.30 spec: - * "Buffer variables may only be declared inside interface blocks - * (section 4.3.9 “Interface Blocks”), which are then referred to as - * shader storage blocks. It is a compile-time error to declare buffer - * variables at global scope (outside a block)." - */ - if (type->qualifier.flags.q.buffer && !decl_type->is_interface()) { - _mesa_glsl_error(&loc, state, - "buffer variables cannot be declared outside " - "interface blocks"); - } - - /* An offset-qualified atomic counter declaration sets the default - * offset for the next declaration within the same atomic counter - * buffer. - */ - if (decl_type && decl_type->contains_atomic()) { - if (type->qualifier.flags.q.explicit_binding && - type->qualifier.flags.q.explicit_offset) { - unsigned qual_binding; - unsigned qual_offset; - if (process_qualifier_constant(state, &loc, "binding", - type->qualifier.binding, - &qual_binding) - && process_qualifier_constant(state, &loc, "offset", - type->qualifier.offset, - &qual_offset)) { - state->atomic_counter_offsets[qual_binding] = qual_offset; - } - } - } - - if (this->declarations.is_empty()) { - /* If there is no structure involved in the program text, there are two - * possible scenarios: - * - * - The program text contained something like 'vec4;'. This is an - * empty declaration. It is valid but weird. Emit a warning. - * - * - The program text contained something like 'S;' and 'S' is not the - * name of a known structure type. This is both invalid and weird. - * Emit an error. - * - * - The program text contained something like 'mediump float;' - * when the programmer probably meant 'precision mediump - * float;' Emit a warning with a description of what they - * probably meant to do. - * - * Note that if decl_type is NULL and there is a structure involved, - * there must have been some sort of error with the structure. In this - * case we assume that an error was already generated on this line of - * code for the structure. There is no need to generate an additional, - * confusing error. - */ - assert(this->type->specifier->structure == NULL || decl_type != NULL - || state->error); - - if (decl_type == NULL) { - _mesa_glsl_error(&loc, state, - "invalid type `%s' in empty declaration", - type_name); - } else if (decl_type->base_type == GLSL_TYPE_ATOMIC_UINT) { - /* Empty atomic counter declarations are allowed and useful - * to set the default offset qualifier. - */ - return NULL; - } else if (this->type->qualifier.precision != ast_precision_none) { - if (this->type->specifier->structure != NULL) { - _mesa_glsl_error(&loc, state, - "precision qualifiers can't be applied " - "to structures"); - } else { - static const char *const precision_names[] = { - "highp", - "highp", - "mediump", - "lowp" - }; - - _mesa_glsl_warning(&loc, state, - "empty declaration with precision qualifier, " - "to set the default precision, use " - "`precision %s %s;'", - precision_names[this->type->qualifier.precision], - type_name); - } - } else if (this->type->specifier->structure == NULL) { - _mesa_glsl_warning(&loc, state, "empty declaration"); - } - } - - foreach_list_typed (ast_declaration, decl, link, &this->declarations) { - const struct glsl_type *var_type; - ir_variable *var; - const char *identifier = decl->identifier; - /* FINISHME: Emit a warning if a variable declaration shadows a - * FINISHME: declaration at a higher scope. - */ - - if ((decl_type == NULL) || decl_type->is_void()) { - if (type_name != NULL) { - _mesa_glsl_error(& loc, state, - "invalid type `%s' in declaration of `%s'", - type_name, decl->identifier); - } else { - _mesa_glsl_error(& loc, state, - "invalid type in declaration of `%s'", - decl->identifier); - } - continue; - } - - if (this->type->qualifier.flags.q.subroutine) { - const glsl_type *t; - const char *name; - - t = state->symbols->get_type(this->type->specifier->type_name); - if (!t) - _mesa_glsl_error(& loc, state, - "invalid type in declaration of `%s'", - decl->identifier); - name = ralloc_asprintf(ctx, "%s_%s", _mesa_shader_stage_to_subroutine_prefix(state->stage), decl->identifier); - - identifier = name; - - } - var_type = process_array_type(&loc, decl_type, decl->array_specifier, - state); - - var = new(ctx) ir_variable(var_type, identifier, ir_var_auto); - - /* The 'varying in' and 'varying out' qualifiers can only be used with - * ARB_geometry_shader4 and EXT_geometry_shader4, which we don't support - * yet. - */ - if (this->type->qualifier.flags.q.varying) { - if (this->type->qualifier.flags.q.in) { - _mesa_glsl_error(& loc, state, - "`varying in' qualifier in declaration of " - "`%s' only valid for geometry shaders using " - "ARB_geometry_shader4 or EXT_geometry_shader4", - decl->identifier); - } else if (this->type->qualifier.flags.q.out) { - _mesa_glsl_error(& loc, state, - "`varying out' qualifier in declaration of " - "`%s' only valid for geometry shaders using " - "ARB_geometry_shader4 or EXT_geometry_shader4", - decl->identifier); - } - } - - /* From page 22 (page 28 of the PDF) of the GLSL 1.10 specification; - * - * "Global variables can only use the qualifiers const, - * attribute, uniform, or varying. Only one may be - * specified. - * - * Local variables can only use the qualifier const." - * - * This is relaxed in GLSL 1.30 and GLSL ES 3.00. It is also relaxed by - * any extension that adds the 'layout' keyword. - */ - if (!state->is_version(130, 300) - && !state->has_explicit_attrib_location() - && !state->has_separate_shader_objects() - && !state->ARB_fragment_coord_conventions_enable) { - if (this->type->qualifier.flags.q.out) { - _mesa_glsl_error(& loc, state, - "`out' qualifier in declaration of `%s' " - "only valid for function parameters in %s", - decl->identifier, state->get_version_string()); - } - if (this->type->qualifier.flags.q.in) { - _mesa_glsl_error(& loc, state, - "`in' qualifier in declaration of `%s' " - "only valid for function parameters in %s", - decl->identifier, state->get_version_string()); - } - /* FINISHME: Test for other invalid qualifiers. */ - } - - apply_type_qualifier_to_variable(& this->type->qualifier, var, state, - & loc, false); - apply_layout_qualifier_to_variable(&this->type->qualifier, var, state, - &loc); - - if (this->type->qualifier.flags.q.invariant) { - if (!is_varying_var(var, state->stage)) { - _mesa_glsl_error(&loc, state, - "`%s' cannot be marked invariant; interfaces between " - "shader stages only", var->name); - } - } - - if (state->current_function != NULL) { - const char *mode = NULL; - const char *extra = ""; - - /* There is no need to check for 'inout' here because the parser will - * only allow that in function parameter lists. - */ - if (this->type->qualifier.flags.q.attribute) { - mode = "attribute"; - } else if (this->type->qualifier.flags.q.subroutine) { - mode = "subroutine uniform"; - } else if (this->type->qualifier.flags.q.uniform) { - mode = "uniform"; - } else if (this->type->qualifier.flags.q.varying) { - mode = "varying"; - } else if (this->type->qualifier.flags.q.in) { - mode = "in"; - extra = " or in function parameter list"; - } else if (this->type->qualifier.flags.q.out) { - mode = "out"; - extra = " or in function parameter list"; - } - - if (mode) { - _mesa_glsl_error(& loc, state, - "%s variable `%s' must be declared at " - "global scope%s", - mode, var->name, extra); - } - } else if (var->data.mode == ir_var_shader_in) { - var->data.read_only = true; - - if (state->stage == MESA_SHADER_VERTEX) { - bool error_emitted = false; - - /* From page 31 (page 37 of the PDF) of the GLSL 1.50 spec: - * - * "Vertex shader inputs can only be float, floating-point - * vectors, matrices, signed and unsigned integers and integer - * vectors. Vertex shader inputs can also form arrays of these - * types, but not structures." - * - * From page 31 (page 27 of the PDF) of the GLSL 1.30 spec: - * - * "Vertex shader inputs can only be float, floating-point - * vectors, matrices, signed and unsigned integers and integer - * vectors. They cannot be arrays or structures." - * - * From page 23 (page 29 of the PDF) of the GLSL 1.20 spec: - * - * "The attribute qualifier can be used only with float, - * floating-point vectors, and matrices. Attribute variables - * cannot be declared as arrays or structures." - * - * From page 33 (page 39 of the PDF) of the GLSL ES 3.00 spec: - * - * "Vertex shader inputs can only be float, floating-point - * vectors, matrices, signed and unsigned integers and integer - * vectors. Vertex shader inputs cannot be arrays or - * structures." - */ - const glsl_type *check_type = var->type->without_array(); - - switch (check_type->base_type) { - case GLSL_TYPE_FLOAT: - break; - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - if (state->is_version(120, 300)) - break; - case GLSL_TYPE_DOUBLE: - if (check_type->base_type == GLSL_TYPE_DOUBLE && (state->is_version(410, 0) || state->ARB_vertex_attrib_64bit_enable)) - break; - /* FALLTHROUGH */ - default: - _mesa_glsl_error(& loc, state, - "vertex shader input / attribute cannot have " - "type %s`%s'", - var->type->is_array() ? "array of " : "", - check_type->name); - error_emitted = true; - } - - if (!error_emitted && var->type->is_array() && - !state->check_version(150, 0, &loc, - "vertex shader input / attribute " - "cannot have array type")) { - error_emitted = true; - } - } else if (state->stage == MESA_SHADER_GEOMETRY) { - /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec: - * - * Geometry shader input variables get the per-vertex values - * written out by vertex shader output variables of the same - * names. Since a geometry shader operates on a set of - * vertices, each input varying variable (or input block, see - * interface blocks below) needs to be declared as an array. - */ - if (!var->type->is_array()) { - _mesa_glsl_error(&loc, state, - "geometry shader inputs must be arrays"); - } - - handle_geometry_shader_input_decl(state, loc, var); - } else if (state->stage == MESA_SHADER_FRAGMENT) { - /* From section 4.3.4 (Input Variables) of the GLSL ES 3.10 spec: - * - * It is a compile-time error to declare a fragment shader - * input with, or that contains, any of the following types: - * - * * A boolean type - * * An opaque type - * * An array of arrays - * * An array of structures - * * A structure containing an array - * * A structure containing a structure - */ - if (state->es_shader) { - const glsl_type *check_type = var->type->without_array(); - if (check_type->is_boolean() || - check_type->contains_opaque()) { - _mesa_glsl_error(&loc, state, - "fragment shader input cannot have type %s", - check_type->name); - } - if (var->type->is_array() && - var->type->fields.array->is_array()) { - _mesa_glsl_error(&loc, state, - "%s shader output " - "cannot have an array of arrays", - _mesa_shader_stage_to_string(state->stage)); - } - if (var->type->is_array() && - var->type->fields.array->is_record()) { - _mesa_glsl_error(&loc, state, - "fragment shader input " - "cannot have an array of structs"); - } - if (var->type->is_record()) { - for (unsigned i = 0; i < var->type->length; i++) { - if (var->type->fields.structure[i].type->is_array() || - var->type->fields.structure[i].type->is_record()) - _mesa_glsl_error(&loc, state, - "fragement shader input cannot have " - "a struct that contains an " - "array or struct"); - } - } - } - } else if (state->stage == MESA_SHADER_TESS_CTRL || - state->stage == MESA_SHADER_TESS_EVAL) { - handle_tess_shader_input_decl(state, loc, var); - } - } else if (var->data.mode == ir_var_shader_out) { - const glsl_type *check_type = var->type->without_array(); - - /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: - * - * It is a compile-time error to declare a vertex, tessellation - * evaluation, tessellation control, or geometry shader output - * that contains any of the following: - * - * * A Boolean type (bool, bvec2 ...) - * * An opaque type - */ - if (check_type->is_boolean() || check_type->contains_opaque()) - _mesa_glsl_error(&loc, state, - "%s shader output cannot have type %s", - _mesa_shader_stage_to_string(state->stage), - check_type->name); - - /* From section 4.3.6 (Output variables) of the GLSL 4.40 spec: - * - * It is a compile-time error to declare a fragment shader output - * that contains any of the following: - * - * * A Boolean type (bool, bvec2 ...) - * * A double-precision scalar or vector (double, dvec2 ...) - * * An opaque type - * * Any matrix type - * * A structure - */ - if (state->stage == MESA_SHADER_FRAGMENT) { - if (check_type->is_record() || check_type->is_matrix()) - _mesa_glsl_error(&loc, state, - "fragment shader output " - "cannot have struct or matrix type"); - switch (check_type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - break; - default: - _mesa_glsl_error(&loc, state, - "fragment shader output cannot have " - "type %s", check_type->name); - } - } - - /* From section 4.3.6 (Output Variables) of the GLSL ES 3.10 spec: - * - * It is a compile-time error to declare a vertex shader output - * with, or that contains, any of the following types: - * - * * A boolean type - * * An opaque type - * * An array of arrays - * * An array of structures - * * A structure containing an array - * * A structure containing a structure - * - * It is a compile-time error to declare a fragment shader output - * with, or that contains, any of the following types: - * - * * A boolean type - * * An opaque type - * * A matrix - * * A structure - * * An array of array - */ - if (state->es_shader) { - if (var->type->is_array() && - var->type->fields.array->is_array()) { - _mesa_glsl_error(&loc, state, - "%s shader output " - "cannot have an array of arrays", - _mesa_shader_stage_to_string(state->stage)); - } - if (state->stage == MESA_SHADER_VERTEX) { - if (var->type->is_array() && - var->type->fields.array->is_record()) { - _mesa_glsl_error(&loc, state, - "vertex shader output " - "cannot have an array of structs"); - } - if (var->type->is_record()) { - for (unsigned i = 0; i < var->type->length; i++) { - if (var->type->fields.structure[i].type->is_array() || - var->type->fields.structure[i].type->is_record()) - _mesa_glsl_error(&loc, state, - "vertex shader output cannot have a " - "struct that contains an " - "array or struct"); - } - } - } - } - - if (state->stage == MESA_SHADER_TESS_CTRL) { - handle_tess_ctrl_shader_output_decl(state, loc, var); - } - } else if (var->type->contains_subroutine()) { - /* declare subroutine uniforms as hidden */ - var->data.how_declared = ir_var_hidden; - } - - /* Integer fragment inputs must be qualified with 'flat'. In GLSL ES, - * so must integer vertex outputs. - * - * From section 4.3.4 ("Inputs") of the GLSL 1.50 spec: - * "Fragment shader inputs that are signed or unsigned integers or - * integer vectors must be qualified with the interpolation qualifier - * flat." - * - * From section 4.3.4 ("Input Variables") of the GLSL 3.00 ES spec: - * "Fragment shader inputs that are, or contain, signed or unsigned - * integers or integer vectors must be qualified with the - * interpolation qualifier flat." - * - * From section 4.3.6 ("Output Variables") of the GLSL 3.00 ES spec: - * "Vertex shader outputs that are, or contain, signed or unsigned - * integers or integer vectors must be qualified with the - * interpolation qualifier flat." - * - * Note that prior to GLSL 1.50, this requirement applied to vertex - * outputs rather than fragment inputs. That creates problems in the - * presence of geometry shaders, so we adopt the GLSL 1.50 rule for all - * desktop GL shaders. For GLSL ES shaders, we follow the spec and - * apply the restriction to both vertex outputs and fragment inputs. - * - * Note also that the desktop GLSL specs are missing the text "or - * contain"; this is presumably an oversight, since there is no - * reasonable way to interpolate a fragment shader input that contains - * an integer. - */ - if (state->is_version(130, 300) && - var->type->contains_integer() && - var->data.interpolation != INTERP_QUALIFIER_FLAT && - ((state->stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_in) - || (state->stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_out - && state->es_shader))) { - const char *var_type = (state->stage == MESA_SHADER_VERTEX) ? - "vertex output" : "fragment input"; - _mesa_glsl_error(&loc, state, "if a %s is (or contains) " - "an integer, then it must be qualified with 'flat'", - var_type); - } - - /* Double fragment inputs must be qualified with 'flat'. */ - if (var->type->contains_double() && - var->data.interpolation != INTERP_QUALIFIER_FLAT && - state->stage == MESA_SHADER_FRAGMENT && - var->data.mode == ir_var_shader_in) { - _mesa_glsl_error(&loc, state, "if a fragment input is (or contains) " - "a double, then it must be qualified with 'flat'", - var_type); - } - - /* Interpolation qualifiers cannot be applied to 'centroid' and - * 'centroid varying'. - * - * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec: - * "interpolation qualifiers may only precede the qualifiers in, - * centroid in, out, or centroid out in a declaration. They do not apply - * to the deprecated storage qualifiers varying or centroid varying." - * - * These deprecated storage qualifiers do not exist in GLSL ES 3.00. - */ - if (state->is_version(130, 0) - && this->type->qualifier.has_interpolation() - && this->type->qualifier.flags.q.varying) { - - const char *i = this->type->qualifier.interpolation_string(); - assert(i != NULL); - const char *s; - if (this->type->qualifier.flags.q.centroid) - s = "centroid varying"; - else - s = "varying"; - - _mesa_glsl_error(&loc, state, - "qualifier '%s' cannot be applied to the " - "deprecated storage qualifier '%s'", i, s); - } - - - /* Interpolation qualifiers can only apply to vertex shader outputs and - * fragment shader inputs. - * - * From page 29 (page 35 of the PDF) of the GLSL 1.30 spec: - * "Outputs from a vertex shader (out) and inputs to a fragment - * shader (in) can be further qualified with one or more of these - * interpolation qualifiers" - * - * From page 31 (page 37 of the PDF) of the GLSL ES 3.00 spec: - * "These interpolation qualifiers may only precede the qualifiers - * in, centroid in, out, or centroid out in a declaration. They do - * not apply to inputs into a vertex shader or outputs from a - * fragment shader." - */ - if (state->is_version(130, 300) - && this->type->qualifier.has_interpolation()) { - - const char *i = this->type->qualifier.interpolation_string(); - assert(i != NULL); - - switch (state->stage) { - case MESA_SHADER_VERTEX: - if (this->type->qualifier.flags.q.in) { - _mesa_glsl_error(&loc, state, - "qualifier '%s' cannot be applied to vertex " - "shader inputs", i); - } - break; - case MESA_SHADER_FRAGMENT: - if (this->type->qualifier.flags.q.out) { - _mesa_glsl_error(&loc, state, - "qualifier '%s' cannot be applied to fragment " - "shader outputs", i); - } - break; - default: - break; - } - } - - - /* From section 4.3.4 of the GLSL 4.00 spec: - * "Input variables may not be declared using the patch in qualifier - * in tessellation control or geometry shaders." - * - * From section 4.3.6 of the GLSL 4.00 spec: - * "It is an error to use patch out in a vertex, tessellation - * evaluation, or geometry shader." - * - * This doesn't explicitly forbid using them in a fragment shader, but - * that's probably just an oversight. - */ - if (state->stage != MESA_SHADER_TESS_EVAL - && this->type->qualifier.flags.q.patch - && this->type->qualifier.flags.q.in) { - - _mesa_glsl_error(&loc, state, "'patch in' can only be used in a " - "tessellation evaluation shader"); - } - - if (state->stage != MESA_SHADER_TESS_CTRL - && this->type->qualifier.flags.q.patch - && this->type->qualifier.flags.q.out) { - - _mesa_glsl_error(&loc, state, "'patch out' can only be used in a " - "tessellation control shader"); - } - - /* Precision qualifiers exists only in GLSL versions 1.00 and >= 1.30. - */ - if (this->type->qualifier.precision != ast_precision_none) { - state->check_precision_qualifiers_allowed(&loc); - } - - - /* If a precision qualifier is allowed on a type, it is allowed on - * an array of that type. - */ - if (!(this->type->qualifier.precision == ast_precision_none - || precision_qualifier_allowed(var->type->without_array()))) { - - _mesa_glsl_error(&loc, state, - "precision qualifiers apply only to floating point" - ", integer and opaque types"); - } - - /* From section 4.1.7 of the GLSL 4.40 spec: - * - * "[Opaque types] can only be declared as function - * parameters or uniform-qualified variables." - */ - if (var_type->contains_opaque() && - !this->type->qualifier.flags.q.uniform) { - _mesa_glsl_error(&loc, state, - "opaque variables must be declared uniform"); - } - - /* Process the initializer and add its instructions to a temporary - * list. This list will be added to the instruction stream (below) after - * the declaration is added. This is done because in some cases (such as - * redeclarations) the declaration may not actually be added to the - * instruction stream. - */ - exec_list initializer_instructions; - - /* Examine var name here since var may get deleted in the next call */ - bool var_is_gl_id = is_gl_identifier(var->name); - - ir_variable *earlier = - get_variable_being_redeclared(var, decl->get_location(), state, - false /* allow_all_redeclarations */); - if (earlier != NULL) { - if (var_is_gl_id && - earlier->data.how_declared == ir_var_declared_in_block) { - _mesa_glsl_error(&loc, state, - "`%s' has already been redeclared using " - "gl_PerVertex", earlier->name); - } - earlier->data.how_declared = ir_var_declared_normally; - } - - if (decl->initializer != NULL) { - result = process_initializer((earlier == NULL) ? var : earlier, - decl, this->type, - &initializer_instructions, state); - } else { - validate_array_dimensions(var_type, state, &loc); - } - - /* From page 23 (page 29 of the PDF) of the GLSL 1.10 spec: - * - * "It is an error to write to a const variable outside of - * its declaration, so they must be initialized when - * declared." - */ - if (this->type->qualifier.flags.q.constant && decl->initializer == NULL) { - _mesa_glsl_error(& loc, state, - "const declaration of `%s' must be initialized", - decl->identifier); - } - - if (state->es_shader) { - const glsl_type *const t = (earlier == NULL) - ? var->type : earlier->type; - - if (t->is_unsized_array()) - /* Section 10.17 of the GLSL ES 1.00 specification states that - * unsized array declarations have been removed from the language. - * Arrays that are sized using an initializer are still explicitly - * sized. However, GLSL ES 1.00 does not allow array - * initializers. That is only allowed in GLSL ES 3.00. - * - * Section 4.1.9 (Arrays) of the GLSL ES 3.00 spec says: - * - * "An array type can also be formed without specifying a size - * if the definition includes an initializer: - * - * float x[] = float[2] (1.0, 2.0); // declares an array of size 2 - * float y[] = float[] (1.0, 2.0, 3.0); // declares an array of size 3 - * - * float a[5]; - * float b[] = a;" - */ - _mesa_glsl_error(& loc, state, - "unsized array declarations are not allowed in " - "GLSL ES"); - } - - /* If the declaration is not a redeclaration, there are a few additional - * semantic checks that must be applied. In addition, variable that was - * created for the declaration should be added to the IR stream. - */ - if (earlier == NULL) { - validate_identifier(decl->identifier, loc, state); - - /* Add the variable to the symbol table. Note that the initializer's - * IR was already processed earlier (though it hasn't been emitted - * yet), without the variable in scope. - * - * This differs from most C-like languages, but it follows the GLSL - * specification. From page 28 (page 34 of the PDF) of the GLSL 1.50 - * spec: - * - * "Within a declaration, the scope of a name starts immediately - * after the initializer if present or immediately after the name - * being declared if not." - */ - if (!state->symbols->add_variable(var)) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(&loc, state, "name `%s' already taken in the " - "current scope", decl->identifier); - continue; - } - - /* Push the variable declaration to the top. It means that all the - * variable declarations will appear in a funny last-to-first order, - * but otherwise we run into trouble if a function is prototyped, a - * global var is decled, then the function is defined with usage of - * the global var. See glslparsertest's CorrectModule.frag. - */ - instructions->push_head(var); - } - - instructions->append_list(&initializer_instructions); - } - - - /* Generally, variable declarations do not have r-values. However, - * one is used for the declaration in - * - * while (bool b = some_condition()) { - * ... - * } - * - * so we return the rvalue from the last seen declaration here. - */ - return result; -} - - -ir_rvalue * -ast_parameter_declarator::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - const struct glsl_type *type; - const char *name = NULL; - YYLTYPE loc = this->get_location(); - - type = this->type->glsl_type(& name, state); - - if (type == NULL) { - if (name != NULL) { - _mesa_glsl_error(& loc, state, - "invalid type `%s' in declaration of `%s'", - name, this->identifier); - } else { - _mesa_glsl_error(& loc, state, - "invalid type in declaration of `%s'", - this->identifier); - } - - type = glsl_type::error_type; - } - - /* From page 62 (page 68 of the PDF) of the GLSL 1.50 spec: - * - * "Functions that accept no input arguments need not use void in the - * argument list because prototypes (or definitions) are required and - * therefore there is no ambiguity when an empty argument list "( )" is - * declared. The idiom "(void)" as a parameter list is provided for - * convenience." - * - * Placing this check here prevents a void parameter being set up - * for a function, which avoids tripping up checks for main taking - * parameters and lookups of an unnamed symbol. - */ - if (type->is_void()) { - if (this->identifier != NULL) - _mesa_glsl_error(& loc, state, - "named parameter cannot have type `void'"); - - is_void = true; - return NULL; - } - - if (formal_parameter && (this->identifier == NULL)) { - _mesa_glsl_error(& loc, state, "formal parameter lacks a name"); - return NULL; - } - - /* This only handles "vec4 foo[..]". The earlier specifier->glsl_type(...) - * call already handled the "vec4[..] foo" case. - */ - type = process_array_type(&loc, type, this->array_specifier, state); - - if (!type->is_error() && type->is_unsized_array()) { - _mesa_glsl_error(&loc, state, "arrays passed as parameters must have " - "a declared size"); - type = glsl_type::error_type; - } - - is_void = false; - ir_variable *var = new(ctx) - ir_variable(type, this->identifier, ir_var_function_in); - - /* Apply any specified qualifiers to the parameter declaration. Note that - * for function parameters the default mode is 'in'. - */ - apply_type_qualifier_to_variable(& this->type->qualifier, var, state, & loc, - true); - - /* From section 4.1.7 of the GLSL 4.40 spec: - * - * "Opaque variables cannot be treated as l-values; hence cannot - * be used as out or inout function parameters, nor can they be - * assigned into." - */ - if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out) - && type->contains_opaque()) { - _mesa_glsl_error(&loc, state, "out and inout parameters cannot " - "contain opaque variables"); - type = glsl_type::error_type; - } - - /* From page 39 (page 45 of the PDF) of the GLSL 1.10 spec: - * - * "When calling a function, expressions that do not evaluate to - * l-values cannot be passed to parameters declared as out or inout." - * - * From page 32 (page 38 of the PDF) of the GLSL 1.10 spec: - * - * "Other binary or unary expressions, non-dereferenced arrays, - * function names, swizzles with repeated fields, and constants - * cannot be l-values." - * - * So for GLSL 1.10, passing an array as an out or inout parameter is not - * allowed. This restriction is removed in GLSL 1.20, and in GLSL ES. - */ - if ((var->data.mode == ir_var_function_inout || var->data.mode == ir_var_function_out) - && type->is_array() - && !state->check_version(120, 100, &loc, - "arrays cannot be out or inout parameters")) { - type = glsl_type::error_type; - } - - instructions->push_tail(var); - - /* Parameter declarations do not have r-values. - */ - return NULL; -} - - -void -ast_parameter_declarator::parameters_to_hir(exec_list *ast_parameters, - bool formal, - exec_list *ir_parameters, - _mesa_glsl_parse_state *state) -{ - ast_parameter_declarator *void_param = NULL; - unsigned count = 0; - - foreach_list_typed (ast_parameter_declarator, param, link, ast_parameters) { - param->formal_parameter = formal; - param->hir(ir_parameters, state); - - if (param->is_void) - void_param = param; - - count++; - } - - if ((void_param != NULL) && (count > 1)) { - YYLTYPE loc = void_param->get_location(); - - _mesa_glsl_error(& loc, state, - "`void' parameter must be only parameter"); - } -} - - -void -emit_function(_mesa_glsl_parse_state *state, ir_function *f) -{ - /* IR invariants disallow function declarations or definitions - * nested within other function definitions. But there is no - * requirement about the relative order of function declarations - * and definitions with respect to one another. So simply insert - * the new ir_function block at the end of the toplevel instruction - * list. - */ - state->toplevel_ir->push_tail(f); -} - - -ir_rvalue * -ast_function::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - ir_function *f = NULL; - ir_function_signature *sig = NULL; - exec_list hir_parameters; - YYLTYPE loc = this->get_location(); - - const char *const name = identifier; - - /* New functions are always added to the top-level IR instruction stream, - * so this instruction list pointer is ignored. See also emit_function - * (called below). - */ - (void) instructions; - - /* From page 21 (page 27 of the PDF) of the GLSL 1.20 spec, - * - * "Function declarations (prototypes) cannot occur inside of functions; - * they must be at global scope, or for the built-in functions, outside - * the global scope." - * - * From page 27 (page 33 of the PDF) of the GLSL ES 1.00.16 spec, - * - * "User defined functions may only be defined within the global scope." - * - * Note that this language does not appear in GLSL 1.10. - */ - if ((state->current_function != NULL) && - state->is_version(120, 100)) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(&loc, state, - "declaration of function `%s' not allowed within " - "function body", name); - } - - validate_identifier(name, this->get_location(), state); - - /* Convert the list of function parameters to HIR now so that they can be - * used below to compare this function's signature with previously seen - * signatures for functions with the same name. - */ - ast_parameter_declarator::parameters_to_hir(& this->parameters, - is_definition, - & hir_parameters, state); - - const char *return_type_name; - const glsl_type *return_type = - this->return_type->glsl_type(& return_type_name, state); - - if (!return_type) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(&loc, state, - "function `%s' has undeclared return type `%s'", - name, return_type_name); - return_type = glsl_type::error_type; - } - - /* ARB_shader_subroutine states: - * "Subroutine declarations cannot be prototyped. It is an error to prepend - * subroutine(...) to a function declaration." - */ - if (this->return_type->qualifier.flags.q.subroutine_def && !is_definition) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(&loc, state, - "function declaration `%s' cannot have subroutine prepended", - name); - } - - /* From page 56 (page 62 of the PDF) of the GLSL 1.30 spec: - * "No qualifier is allowed on the return type of a function." - */ - if (this->return_type->has_qualifiers(state)) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(& loc, state, - "function `%s' return type has qualifiers", name); - } - - /* Section 6.1 (Function Definitions) of the GLSL 1.20 spec says: - * - * "Arrays are allowed as arguments and as the return type. In both - * cases, the array must be explicitly sized." - */ - if (return_type->is_unsized_array()) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(& loc, state, - "function `%s' return type array must be explicitly " - "sized", name); - } - - /* From section 4.1.7 of the GLSL 4.40 spec: - * - * "[Opaque types] can only be declared as function parameters - * or uniform-qualified variables." - */ - if (return_type->contains_opaque()) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(&loc, state, - "function `%s' return type can't contain an opaque type", - name); - } - - /* Create an ir_function if one doesn't already exist. */ - f = state->symbols->get_function(name); - if (f == NULL) { - f = new(ctx) ir_function(name); - if (!this->return_type->qualifier.flags.q.subroutine) { - if (!state->symbols->add_function(f)) { - /* This function name shadows a non-function use of the same name. */ - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(&loc, state, "function name `%s' conflicts with " - "non-function", name); - return NULL; - } - } - emit_function(state, f); - } - - /* From GLSL ES 3.0 spec, chapter 6.1 "Function Definitions", page 71: - * - * "A shader cannot redefine or overload built-in functions." - * - * While in GLSL ES 1.0 specification, chapter 8 "Built-in Functions": - * - * "User code can overload the built-in functions but cannot redefine - * them." - */ - if (state->es_shader && state->language_version >= 300) { - /* Local shader has no exact candidates; check the built-ins. */ - _mesa_glsl_initialize_builtin_functions(); - if (_mesa_glsl_find_builtin_function_by_name(name)) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(& loc, state, - "A shader cannot redefine or overload built-in " - "function `%s' in GLSL ES 3.00", name); - return NULL; - } - } - - /* Verify that this function's signature either doesn't match a previously - * seen signature for a function with the same name, or, if a match is found, - * that the previously seen signature does not have an associated definition. - */ - if (state->es_shader || f->has_user_signature()) { - sig = f->exact_matching_signature(state, &hir_parameters); - if (sig != NULL) { - const char *badvar = sig->qualifiers_match(&hir_parameters); - if (badvar != NULL) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(&loc, state, "function `%s' parameter `%s' " - "qualifiers don't match prototype", name, badvar); - } - - if (sig->return_type != return_type) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(&loc, state, "function `%s' return type doesn't " - "match prototype", name); - } - - if (sig->is_defined) { - if (is_definition) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(& loc, state, "function `%s' redefined", name); - } else { - /* We just encountered a prototype that exactly matches a - * function that's already been defined. This is redundant, - * and we should ignore it. - */ - return NULL; - } - } - } - } - - /* Verify the return type of main() */ - if (strcmp(name, "main") == 0) { - if (! return_type->is_void()) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, "main() must return void"); - } - - if (!hir_parameters.is_empty()) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, "main() must not take any parameters"); - } - } - - /* Finish storing the information about this new function in its signature. - */ - if (sig == NULL) { - sig = new(ctx) ir_function_signature(return_type); - f->add_signature(sig); - } - - sig->replace_parameters(&hir_parameters); - signature = sig; - - if (this->return_type->qualifier.flags.q.subroutine_def) { - int idx; - - if (this->return_type->qualifier.flags.q.explicit_index) { - unsigned qual_index; - if (process_qualifier_constant(state, &loc, "index", - this->return_type->qualifier.index, - &qual_index)) { - if (!state->has_explicit_uniform_location()) { - _mesa_glsl_error(&loc, state, "subroutine index requires " - "GL_ARB_explicit_uniform_location or " - "GLSL 4.30"); - } else if (qual_index >= MAX_SUBROUTINES) { - _mesa_glsl_error(&loc, state, - "invalid subroutine index (%d) index must " - "be a number between 0 and " - "GL_MAX_SUBROUTINES - 1 (%d)", qual_index, - MAX_SUBROUTINES - 1); - } else { - f->subroutine_index = qual_index; - } - } - } - - f->num_subroutine_types = this->return_type->qualifier.subroutine_list->declarations.length(); - f->subroutine_types = ralloc_array(state, const struct glsl_type *, - f->num_subroutine_types); - idx = 0; - foreach_list_typed(ast_declaration, decl, link, &this->return_type->qualifier.subroutine_list->declarations) { - const struct glsl_type *type; - /* the subroutine type must be already declared */ - type = state->symbols->get_type(decl->identifier); - if (!type) { - _mesa_glsl_error(& loc, state, "unknown type '%s' in subroutine function definition", decl->identifier); - } - f->subroutine_types[idx++] = type; - } - state->subroutines = (ir_function **)reralloc(state, state->subroutines, - ir_function *, - state->num_subroutines + 1); - state->subroutines[state->num_subroutines] = f; - state->num_subroutines++; - - } - - if (this->return_type->qualifier.flags.q.subroutine) { - if (!state->symbols->add_type(this->identifier, glsl_type::get_subroutine_instance(this->identifier))) { - _mesa_glsl_error(& loc, state, "type '%s' previously defined", this->identifier); - return NULL; - } - state->subroutine_types = (ir_function **)reralloc(state, state->subroutine_types, - ir_function *, - state->num_subroutine_types + 1); - state->subroutine_types[state->num_subroutine_types] = f; - state->num_subroutine_types++; - - f->is_subroutine = true; - } - - /* Function declarations (prototypes) do not have r-values. - */ - return NULL; -} - - -ir_rvalue * -ast_function_definition::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - prototype->is_definition = true; - prototype->hir(instructions, state); - - ir_function_signature *signature = prototype->signature; - if (signature == NULL) - return NULL; - - assert(state->current_function == NULL); - state->current_function = signature; - state->found_return = false; - - /* Duplicate parameters declared in the prototype as concrete variables. - * Add these to the symbol table. - */ - state->symbols->push_scope(); - foreach_in_list(ir_variable, var, &signature->parameters) { - assert(var->as_variable() != NULL); - - /* The only way a parameter would "exist" is if two parameters have - * the same name. - */ - if (state->symbols->name_declared_this_scope(var->name)) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, "parameter `%s' redeclared", var->name); - } else { - state->symbols->add_variable(var); - } - } - - /* Convert the body of the function to HIR. */ - this->body->hir(&signature->body, state); - signature->is_defined = true; - - state->symbols->pop_scope(); - - assert(state->current_function == signature); - state->current_function = NULL; - - if (!signature->return_type->is_void() && !state->found_return) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(& loc, state, "function `%s' has non-void return type " - "%s, but no return statement", - signature->function_name(), - signature->return_type->name); - } - - /* Function definitions do not have r-values. - */ - return NULL; -} - - -ir_rvalue * -ast_jump_statement::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - - switch (mode) { - case ast_return: { - ir_return *inst; - assert(state->current_function); - - if (opt_return_value) { - ir_rvalue *ret = opt_return_value->hir(instructions, state); - - /* The value of the return type can be NULL if the shader says - * 'return foo();' and foo() is a function that returns void. - * - * NOTE: The GLSL spec doesn't say that this is an error. The type - * of the return value is void. If the return type of the function is - * also void, then this should compile without error. Seriously. - */ - const glsl_type *const ret_type = - (ret == NULL) ? glsl_type::void_type : ret->type; - - /* Implicit conversions are not allowed for return values prior to - * ARB_shading_language_420pack. - */ - if (state->current_function->return_type != ret_type) { - YYLTYPE loc = this->get_location(); - - if (state->has_420pack()) { - if (!apply_implicit_conversion(state->current_function->return_type, - ret, state)) { - _mesa_glsl_error(& loc, state, - "could not implicitly convert return value " - "to %s, in function `%s'", - state->current_function->return_type->name, - state->current_function->function_name()); - } - } else { - _mesa_glsl_error(& loc, state, - "`return' with wrong type %s, in function `%s' " - "returning %s", - ret_type->name, - state->current_function->function_name(), - state->current_function->return_type->name); - } - } else if (state->current_function->return_type->base_type == - GLSL_TYPE_VOID) { - YYLTYPE loc = this->get_location(); - - /* The ARB_shading_language_420pack, GLSL ES 3.0, and GLSL 4.20 - * specs add a clarification: - * - * "A void function can only use return without a return argument, even if - * the return argument has void type. Return statements only accept values: - * - * void func1() { } - * void func2() { return func1(); } // illegal return statement" - */ - _mesa_glsl_error(& loc, state, - "void functions can only use `return' without a " - "return argument"); - } - - inst = new(ctx) ir_return(ret); - } else { - if (state->current_function->return_type->base_type != - GLSL_TYPE_VOID) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, - "`return' with no value, in function %s returning " - "non-void", - state->current_function->function_name()); - } - inst = new(ctx) ir_return; - } - - state->found_return = true; - instructions->push_tail(inst); - break; - } - - case ast_discard: - if (state->stage != MESA_SHADER_FRAGMENT) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, - "`discard' may only appear in a fragment shader"); - } - instructions->push_tail(new(ctx) ir_discard); - break; - - case ast_break: - case ast_continue: - if (mode == ast_continue && - state->loop_nesting_ast == NULL) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, "continue may only appear in a loop"); - } else if (mode == ast_break && - state->loop_nesting_ast == NULL && - state->switch_state.switch_nesting_ast == NULL) { - YYLTYPE loc = this->get_location(); - - _mesa_glsl_error(& loc, state, - "break may only appear in a loop or a switch"); - } else { - /* For a loop, inline the for loop expression again, since we don't - * know where near the end of the loop body the normal copy of it is - * going to be placed. Same goes for the condition for a do-while - * loop. - */ - if (state->loop_nesting_ast != NULL && - mode == ast_continue && !state->switch_state.is_switch_innermost) { - if (state->loop_nesting_ast->rest_expression) { - state->loop_nesting_ast->rest_expression->hir(instructions, - state); - } - if (state->loop_nesting_ast->mode == - ast_iteration_statement::ast_do_while) { - state->loop_nesting_ast->condition_to_hir(instructions, state); - } - } - - if (state->switch_state.is_switch_innermost && - mode == ast_continue) { - /* Set 'continue_inside' to true. */ - ir_rvalue *const true_val = new (ctx) ir_constant(true); - ir_dereference_variable *deref_continue_inside_var = - new(ctx) ir_dereference_variable(state->switch_state.continue_inside); - instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var, - true_val)); - - /* Break out from the switch, continue for the loop will - * be called right after switch. */ - ir_loop_jump *const jump = - new(ctx) ir_loop_jump(ir_loop_jump::jump_break); - instructions->push_tail(jump); - - } else if (state->switch_state.is_switch_innermost && - mode == ast_break) { - /* Force break out of switch by inserting a break. */ - ir_loop_jump *const jump = - new(ctx) ir_loop_jump(ir_loop_jump::jump_break); - instructions->push_tail(jump); - } else { - ir_loop_jump *const jump = - new(ctx) ir_loop_jump((mode == ast_break) - ? ir_loop_jump::jump_break - : ir_loop_jump::jump_continue); - instructions->push_tail(jump); - } - } - - break; - } - - /* Jump instructions do not have r-values. - */ - return NULL; -} - - -ir_rvalue * -ast_selection_statement::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - - ir_rvalue *const condition = this->condition->hir(instructions, state); - - /* From page 66 (page 72 of the PDF) of the GLSL 1.50 spec: - * - * "Any expression whose type evaluates to a Boolean can be used as the - * conditional expression bool-expression. Vector types are not accepted - * as the expression to if." - * - * The checks are separated so that higher quality diagnostics can be - * generated for cases where both rules are violated. - */ - if (!condition->type->is_boolean() || !condition->type->is_scalar()) { - YYLTYPE loc = this->condition->get_location(); - - _mesa_glsl_error(& loc, state, "if-statement condition must be scalar " - "boolean"); - } - - ir_if *const stmt = new(ctx) ir_if(condition); - - if (then_statement != NULL) { - state->symbols->push_scope(); - then_statement->hir(& stmt->then_instructions, state); - state->symbols->pop_scope(); - } - - if (else_statement != NULL) { - state->symbols->push_scope(); - else_statement->hir(& stmt->else_instructions, state); - state->symbols->pop_scope(); - } - - instructions->push_tail(stmt); - - /* if-statements do not have r-values. - */ - return NULL; -} - - -ir_rvalue * -ast_switch_statement::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - - ir_rvalue *const test_expression = - this->test_expression->hir(instructions, state); - - /* From page 66 (page 55 of the PDF) of the GLSL 1.50 spec: - * - * "The type of init-expression in a switch statement must be a - * scalar integer." - */ - if (!test_expression->type->is_scalar() || - !test_expression->type->is_integer()) { - YYLTYPE loc = this->test_expression->get_location(); - - _mesa_glsl_error(& loc, - state, - "switch-statement expression must be scalar " - "integer"); - } - - /* Track the switch-statement nesting in a stack-like manner. - */ - struct glsl_switch_state saved = state->switch_state; - - state->switch_state.is_switch_innermost = true; - state->switch_state.switch_nesting_ast = this; - state->switch_state.labels_ht = hash_table_ctor(0, hash_table_pointer_hash, - hash_table_pointer_compare); - state->switch_state.previous_default = NULL; - - /* Initalize is_fallthru state to false. - */ - ir_rvalue *const is_fallthru_val = new (ctx) ir_constant(false); - state->switch_state.is_fallthru_var = - new(ctx) ir_variable(glsl_type::bool_type, - "switch_is_fallthru_tmp", - ir_var_temporary); - instructions->push_tail(state->switch_state.is_fallthru_var); - - ir_dereference_variable *deref_is_fallthru_var = - new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var); - instructions->push_tail(new(ctx) ir_assignment(deref_is_fallthru_var, - is_fallthru_val)); - - /* Initialize continue_inside state to false. - */ - state->switch_state.continue_inside = - new(ctx) ir_variable(glsl_type::bool_type, - "continue_inside_tmp", - ir_var_temporary); - instructions->push_tail(state->switch_state.continue_inside); - - ir_rvalue *const false_val = new (ctx) ir_constant(false); - ir_dereference_variable *deref_continue_inside_var = - new(ctx) ir_dereference_variable(state->switch_state.continue_inside); - instructions->push_tail(new(ctx) ir_assignment(deref_continue_inside_var, - false_val)); - - state->switch_state.run_default = - new(ctx) ir_variable(glsl_type::bool_type, - "run_default_tmp", - ir_var_temporary); - instructions->push_tail(state->switch_state.run_default); - - /* Loop around the switch is used for flow control. */ - ir_loop * loop = new(ctx) ir_loop(); - instructions->push_tail(loop); - - /* Cache test expression. - */ - test_to_hir(&loop->body_instructions, state); - - /* Emit code for body of switch stmt. - */ - body->hir(&loop->body_instructions, state); - - /* Insert a break at the end to exit loop. */ - ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_break); - loop->body_instructions.push_tail(jump); - - /* If we are inside loop, check if continue got called inside switch. */ - if (state->loop_nesting_ast != NULL) { - ir_dereference_variable *deref_continue_inside = - new(ctx) ir_dereference_variable(state->switch_state.continue_inside); - ir_if *irif = new(ctx) ir_if(deref_continue_inside); - ir_loop_jump *jump = new(ctx) ir_loop_jump(ir_loop_jump::jump_continue); - - if (state->loop_nesting_ast != NULL) { - if (state->loop_nesting_ast->rest_expression) { - state->loop_nesting_ast->rest_expression->hir(&irif->then_instructions, - state); - } - if (state->loop_nesting_ast->mode == - ast_iteration_statement::ast_do_while) { - state->loop_nesting_ast->condition_to_hir(&irif->then_instructions, state); - } - } - irif->then_instructions.push_tail(jump); - instructions->push_tail(irif); - } - - hash_table_dtor(state->switch_state.labels_ht); - - state->switch_state = saved; - - /* Switch statements do not have r-values. */ - return NULL; -} - - -void -ast_switch_statement::test_to_hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - - /* Cache value of test expression. */ - ir_rvalue *const test_val = - test_expression->hir(instructions, - state); - - state->switch_state.test_var = new(ctx) ir_variable(test_val->type, - "switch_test_tmp", - ir_var_temporary); - ir_dereference_variable *deref_test_var = - new(ctx) ir_dereference_variable(state->switch_state.test_var); - - instructions->push_tail(state->switch_state.test_var); - instructions->push_tail(new(ctx) ir_assignment(deref_test_var, test_val)); -} - - -ir_rvalue * -ast_switch_body::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - if (stmts != NULL) - stmts->hir(instructions, state); - - /* Switch bodies do not have r-values. */ - return NULL; -} - -ir_rvalue * -ast_case_statement_list::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - exec_list default_case, after_default, tmp; - - foreach_list_typed (ast_case_statement, case_stmt, link, & this->cases) { - case_stmt->hir(&tmp, state); - - /* Default case. */ - if (state->switch_state.previous_default && default_case.is_empty()) { - default_case.append_list(&tmp); - continue; - } - - /* If default case found, append 'after_default' list. */ - if (!default_case.is_empty()) - after_default.append_list(&tmp); - else - instructions->append_list(&tmp); - } - - /* Handle the default case. This is done here because default might not be - * the last case. We need to add checks against following cases first to see - * if default should be chosen or not. - */ - if (!default_case.is_empty()) { - - ir_rvalue *const true_val = new (state) ir_constant(true); - ir_dereference_variable *deref_run_default_var = - new(state) ir_dereference_variable(state->switch_state.run_default); - - /* Choose to run default case initially, following conditional - * assignments might change this. - */ - ir_assignment *const init_var = - new(state) ir_assignment(deref_run_default_var, true_val); - instructions->push_tail(init_var); - - /* Default case was the last one, no checks required. */ - if (after_default.is_empty()) { - instructions->append_list(&default_case); - return NULL; - } - - foreach_in_list(ir_instruction, ir, &after_default) { - ir_assignment *assign = ir->as_assignment(); - - if (!assign) - continue; - - /* Clone the check between case label and init expression. */ - ir_expression *exp = (ir_expression*) assign->condition; - ir_expression *clone = exp->clone(state, NULL); - - ir_dereference_variable *deref_var = - new(state) ir_dereference_variable(state->switch_state.run_default); - ir_rvalue *const false_val = new (state) ir_constant(false); - - ir_assignment *const set_false = - new(state) ir_assignment(deref_var, false_val, clone); - - instructions->push_tail(set_false); - } - - /* Append default case and all cases after it. */ - instructions->append_list(&default_case); - instructions->append_list(&after_default); - } - - /* Case statements do not have r-values. */ - return NULL; -} - -ir_rvalue * -ast_case_statement::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - labels->hir(instructions, state); - - /* Guard case statements depending on fallthru state. */ - ir_dereference_variable *const deref_fallthru_guard = - new(state) ir_dereference_variable(state->switch_state.is_fallthru_var); - ir_if *const test_fallthru = new(state) ir_if(deref_fallthru_guard); - - foreach_list_typed (ast_node, stmt, link, & this->stmts) - stmt->hir(& test_fallthru->then_instructions, state); - - instructions->push_tail(test_fallthru); - - /* Case statements do not have r-values. */ - return NULL; -} - - -ir_rvalue * -ast_case_label_list::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - foreach_list_typed (ast_case_label, label, link, & this->labels) - label->hir(instructions, state); - - /* Case labels do not have r-values. */ - return NULL; -} - -ir_rvalue * -ast_case_label::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - - ir_dereference_variable *deref_fallthru_var = - new(ctx) ir_dereference_variable(state->switch_state.is_fallthru_var); - - ir_rvalue *const true_val = new(ctx) ir_constant(true); - - /* If not default case, ... */ - if (this->test_value != NULL) { - /* Conditionally set fallthru state based on - * comparison of cached test expression value to case label. - */ - ir_rvalue *const label_rval = this->test_value->hir(instructions, state); - ir_constant *label_const = label_rval->constant_expression_value(); - - if (!label_const) { - YYLTYPE loc = this->test_value->get_location(); - - _mesa_glsl_error(& loc, state, - "switch statement case label must be a " - "constant expression"); - - /* Stuff a dummy value in to allow processing to continue. */ - label_const = new(ctx) ir_constant(0); - } else { - ast_expression *previous_label = (ast_expression *) - hash_table_find(state->switch_state.labels_ht, - (void *)(uintptr_t)label_const->value.u[0]); - - if (previous_label) { - YYLTYPE loc = this->test_value->get_location(); - _mesa_glsl_error(& loc, state, "duplicate case value"); - - loc = previous_label->get_location(); - _mesa_glsl_error(& loc, state, "this is the previous case label"); - } else { - hash_table_insert(state->switch_state.labels_ht, - this->test_value, - (void *)(uintptr_t)label_const->value.u[0]); - } - } - - ir_dereference_variable *deref_test_var = - new(ctx) ir_dereference_variable(state->switch_state.test_var); - - ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, - label_const, - deref_test_var); - - /* - * From GLSL 4.40 specification section 6.2 ("Selection"): - * - * "The type of the init-expression value in a switch statement must - * be a scalar int or uint. The type of the constant-expression value - * in a case label also must be a scalar int or uint. When any pair - * of these values is tested for "equal value" and the types do not - * match, an implicit conversion will be done to convert the int to a - * uint (see section 4.1.10 “Implicit Conversions”) before the compare - * is done." - */ - if (label_const->type != state->switch_state.test_var->type) { - YYLTYPE loc = this->test_value->get_location(); - - const glsl_type *type_a = label_const->type; - const glsl_type *type_b = state->switch_state.test_var->type; - - /* Check if int->uint implicit conversion is supported. */ - bool integer_conversion_supported = - glsl_type::int_type->can_implicitly_convert_to(glsl_type::uint_type, - state); - - if ((!type_a->is_integer() || !type_b->is_integer()) || - !integer_conversion_supported) { - _mesa_glsl_error(&loc, state, "type mismatch with switch " - "init-expression and case label (%s != %s)", - type_a->name, type_b->name); - } else { - /* Conversion of the case label. */ - if (type_a->base_type == GLSL_TYPE_INT) { - if (!apply_implicit_conversion(glsl_type::uint_type, - test_cond->operands[0], state)) - _mesa_glsl_error(&loc, state, "implicit type conversion error"); - } else { - /* Conversion of the init-expression value. */ - if (!apply_implicit_conversion(glsl_type::uint_type, - test_cond->operands[1], state)) - _mesa_glsl_error(&loc, state, "implicit type conversion error"); - } - } - } - - ir_assignment *set_fallthru_on_test = - new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); - - instructions->push_tail(set_fallthru_on_test); - } else { /* default case */ - if (state->switch_state.previous_default) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(& loc, state, - "multiple default labels in one switch"); - - loc = state->switch_state.previous_default->get_location(); - _mesa_glsl_error(& loc, state, "this is the first default label"); - } - state->switch_state.previous_default = this; - - /* Set fallthru condition on 'run_default' bool. */ - ir_dereference_variable *deref_run_default = - new(ctx) ir_dereference_variable(state->switch_state.run_default); - ir_rvalue *const cond_true = new(ctx) ir_constant(true); - ir_expression *test_cond = new(ctx) ir_expression(ir_binop_all_equal, - cond_true, - deref_run_default); - - /* Set falltrhu state. */ - ir_assignment *set_fallthru = - new(ctx) ir_assignment(deref_fallthru_var, true_val, test_cond); - - instructions->push_tail(set_fallthru); - } - - /* Case statements do not have r-values. */ - return NULL; -} - -void -ast_iteration_statement::condition_to_hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - - if (condition != NULL) { - ir_rvalue *const cond = - condition->hir(instructions, state); - - if ((cond == NULL) - || !cond->type->is_boolean() || !cond->type->is_scalar()) { - YYLTYPE loc = condition->get_location(); - - _mesa_glsl_error(& loc, state, - "loop condition must be scalar boolean"); - } else { - /* As the first code in the loop body, generate a block that looks - * like 'if (!condition) break;' as the loop termination condition. - */ - ir_rvalue *const not_cond = - new(ctx) ir_expression(ir_unop_logic_not, cond); - - ir_if *const if_stmt = new(ctx) ir_if(not_cond); - - ir_jump *const break_stmt = - new(ctx) ir_loop_jump(ir_loop_jump::jump_break); - - if_stmt->then_instructions.push_tail(break_stmt); - instructions->push_tail(if_stmt); - } - } -} - - -ir_rvalue * -ast_iteration_statement::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - - /* For-loops and while-loops start a new scope, but do-while loops do not. - */ - if (mode != ast_do_while) - state->symbols->push_scope(); - - if (init_statement != NULL) - init_statement->hir(instructions, state); - - ir_loop *const stmt = new(ctx) ir_loop(); - instructions->push_tail(stmt); - - /* Track the current loop nesting. */ - ast_iteration_statement *nesting_ast = state->loop_nesting_ast; - - state->loop_nesting_ast = this; - - /* Likewise, indicate that following code is closest to a loop, - * NOT closest to a switch. - */ - bool saved_is_switch_innermost = state->switch_state.is_switch_innermost; - state->switch_state.is_switch_innermost = false; - - if (mode != ast_do_while) - condition_to_hir(&stmt->body_instructions, state); - - if (body != NULL) - body->hir(& stmt->body_instructions, state); - - if (rest_expression != NULL) - rest_expression->hir(& stmt->body_instructions, state); - - if (mode == ast_do_while) - condition_to_hir(&stmt->body_instructions, state); - - if (mode != ast_do_while) - state->symbols->pop_scope(); - - /* Restore previous nesting before returning. */ - state->loop_nesting_ast = nesting_ast; - state->switch_state.is_switch_innermost = saved_is_switch_innermost; - - /* Loops do not have r-values. - */ - return NULL; -} - - -/** - * Determine if the given type is valid for establishing a default precision - * qualifier. - * - * From GLSL ES 3.00 section 4.5.4 ("Default Precision Qualifiers"): - * - * "The precision statement - * - * precision precision-qualifier type; - * - * can be used to establish a default precision qualifier. The type field - * can be either int or float or any of the sampler types, and the - * precision-qualifier can be lowp, mediump, or highp." - * - * GLSL ES 1.00 has similar language. GLSL 1.30 doesn't allow precision - * qualifiers on sampler types, but this seems like an oversight (since the - * intention of including these in GLSL 1.30 is to allow compatibility with ES - * shaders). So we allow int, float, and all sampler types regardless of GLSL - * version. - */ -static bool -is_valid_default_precision_type(const struct glsl_type *const type) -{ - if (type == NULL) - return false; - - switch (type->base_type) { - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - /* "int" and "float" are valid, but vectors and matrices are not. */ - return type->vector_elements == 1 && type->matrix_columns == 1; - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_ATOMIC_UINT: - return true; - default: - return false; - } -} - - -ir_rvalue * -ast_type_specifier::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - if (this->default_precision == ast_precision_none && this->structure == NULL) - return NULL; - - YYLTYPE loc = this->get_location(); - - /* If this is a precision statement, check that the type to which it is - * applied is either float or int. - * - * From section 4.5.3 of the GLSL 1.30 spec: - * "The precision statement - * precision precision-qualifier type; - * can be used to establish a default precision qualifier. The type - * field can be either int or float [...]. Any other types or - * qualifiers will result in an error. - */ - if (this->default_precision != ast_precision_none) { - if (!state->check_precision_qualifiers_allowed(&loc)) - return NULL; - - if (this->structure != NULL) { - _mesa_glsl_error(&loc, state, - "precision qualifiers do not apply to structures"); - return NULL; - } - - if (this->array_specifier != NULL) { - _mesa_glsl_error(&loc, state, - "default precision statements do not apply to " - "arrays"); - return NULL; - } - - const struct glsl_type *const type = - state->symbols->get_type(this->type_name); - if (!is_valid_default_precision_type(type)) { - _mesa_glsl_error(&loc, state, - "default precision statements apply only to " - "float, int, and opaque types"); - return NULL; - } - - if (state->es_shader) { - /* Section 4.5.3 (Default Precision Qualifiers) of the GLSL ES 1.00 - * spec says: - * - * "Non-precision qualified declarations will use the precision - * qualifier specified in the most recent precision statement - * that is still in scope. The precision statement has the same - * scoping rules as variable declarations. If it is declared - * inside a compound statement, its effect stops at the end of - * the innermost statement it was declared in. Precision - * statements in nested scopes override precision statements in - * outer scopes. Multiple precision statements for the same basic - * type can appear inside the same scope, with later statements - * overriding earlier statements within that scope." - * - * Default precision specifications follow the same scope rules as - * variables. So, we can track the state of the default precision - * qualifiers in the symbol table, and the rules will just work. This - * is a slight abuse of the symbol table, but it has the semantics - * that we want. - */ - state->symbols->add_default_precision_qualifier(this->type_name, - this->default_precision); - } - - /* FINISHME: Translate precision statements into IR. */ - return NULL; - } - - /* _mesa_ast_set_aggregate_type() sets the field so that - * process_record_constructor() can do type-checking on C-style initializer - * expressions of structs, but ast_struct_specifier should only be translated - * to HIR if it is declaring the type of a structure. - * - * The ->is_declaration field is false for initializers of variables - * declared separately from the struct's type definition. - * - * struct S { ... }; (is_declaration = true) - * struct T { ... } t = { ... }; (is_declaration = true) - * S s = { ... }; (is_declaration = false) - */ - if (this->structure != NULL && this->structure->is_declaration) - return this->structure->hir(instructions, state); - - return NULL; -} - - -/** - * Process a structure or interface block tree into an array of structure fields - * - * After parsing, where there are some syntax differnces, structures and - * interface blocks are almost identical. They are similar enough that the - * AST for each can be processed the same way into a set of - * \c glsl_struct_field to describe the members. - * - * If we're processing an interface block, var_mode should be the type of the - * interface block (ir_var_shader_in, ir_var_shader_out, ir_var_uniform or - * ir_var_shader_storage). If we're processing a structure, var_mode should be - * ir_var_auto. - * - * \return - * The number of fields processed. A pointer to the array structure fields is - * stored in \c *fields_ret. - */ -static unsigned -ast_process_struct_or_iface_block_members(exec_list *instructions, - struct _mesa_glsl_parse_state *state, - exec_list *declarations, - glsl_struct_field **fields_ret, - bool is_interface, - enum glsl_matrix_layout matrix_layout, - bool allow_reserved_names, - ir_variable_mode var_mode, - ast_type_qualifier *layout, - unsigned block_stream, - unsigned expl_location) -{ - unsigned decl_count = 0; - - /* Make an initial pass over the list of fields to determine how - * many there are. Each element in this list is an ast_declarator_list. - * This means that we actually need to count the number of elements in the - * 'declarations' list in each of the elements. - */ - foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { - decl_count += decl_list->declarations.length(); - } - - /* Allocate storage for the fields and process the field - * declarations. As the declarations are processed, try to also convert - * the types to HIR. This ensures that structure definitions embedded in - * other structure definitions or in interface blocks are processed. - */ - glsl_struct_field *const fields = ralloc_array(state, glsl_struct_field, - decl_count); - - bool first_member = true; - bool first_member_has_explicit_location; - - unsigned i = 0; - foreach_list_typed (ast_declarator_list, decl_list, link, declarations) { - const char *type_name; - YYLTYPE loc = decl_list->get_location(); - - decl_list->type->specifier->hir(instructions, state); - - /* Section 10.9 of the GLSL ES 1.00 specification states that - * embedded structure definitions have been removed from the language. - */ - if (state->es_shader && decl_list->type->specifier->structure != NULL) { - _mesa_glsl_error(&loc, state, "embedded structure definitions are " - "not allowed in GLSL ES 1.00"); - } - - const glsl_type *decl_type = - decl_list->type->glsl_type(& type_name, state); - - const struct ast_type_qualifier *const qual = - &decl_list->type->qualifier; - - /* From section 4.3.9 of the GLSL 4.40 spec: - * - * "[In interface blocks] opaque types are not allowed." - * - * It should be impossible for decl_type to be NULL here. Cases that - * might naturally lead to decl_type being NULL, especially for the - * is_interface case, will have resulted in compilation having - * already halted due to a syntax error. - */ - assert(decl_type); - - if (is_interface && decl_type->contains_opaque()) { - _mesa_glsl_error(&loc, state, - "uniform/buffer in non-default interface block contains " - "opaque variable"); - } - - if (decl_type->contains_atomic()) { - /* From section 4.1.7.3 of the GLSL 4.40 spec: - * - * "Members of structures cannot be declared as atomic counter - * types." - */ - _mesa_glsl_error(&loc, state, "atomic counter in structure, " - "shader storage block or uniform block"); - } - - if (decl_type->contains_image()) { - /* FINISHME: Same problem as with atomic counters. - * FINISHME: Request clarification from Khronos and add - * FINISHME: spec quotation here. - */ - _mesa_glsl_error(&loc, state, - "image in structure, shader storage block or " - "uniform block"); - } - - if (qual->flags.q.explicit_binding) { - _mesa_glsl_error(&loc, state, - "binding layout qualifier cannot be applied " - "to struct or interface block members"); - } - - if (is_interface) { - if (!first_member) { - if (!layout->flags.q.explicit_location && - ((first_member_has_explicit_location && - !qual->flags.q.explicit_location) || - (!first_member_has_explicit_location && - qual->flags.q.explicit_location))) { - _mesa_glsl_error(&loc, state, - "when block-level location layout qualifier " - "is not supplied either all members must " - "have a location layout qualifier or all " - "members must not have a location layout " - "qualifier"); - } - } else { - first_member = false; - first_member_has_explicit_location = - qual->flags.q.explicit_location; - } - } - - if (qual->flags.q.std140 || - qual->flags.q.std430 || - qual->flags.q.packed || - qual->flags.q.shared) { - _mesa_glsl_error(&loc, state, - "uniform/shader storage block layout qualifiers " - "std140, std430, packed, and shared can only be " - "applied to uniform/shader storage blocks, not " - "members"); - } - - if (qual->flags.q.constant) { - _mesa_glsl_error(&loc, state, - "const storage qualifier cannot be applied " - "to struct or interface block members"); - } - - /* From Section 4.4.2.3 (Geometry Outputs) of the GLSL 4.50 spec: - * - * "A block member may be declared with a stream identifier, but - * the specified stream must match the stream associated with the - * containing block." - */ - if (qual->flags.q.explicit_stream) { - unsigned qual_stream; - if (process_qualifier_constant(state, &loc, "stream", - qual->stream, &qual_stream) && - qual_stream != block_stream) { - _mesa_glsl_error(&loc, state, "stream layout qualifier on " - "interface block member does not match " - "the interface block (%u vs %u)", qual_stream, - block_stream); - } - } - - if (qual->flags.q.uniform && qual->has_interpolation()) { - _mesa_glsl_error(&loc, state, - "interpolation qualifiers cannot be used " - "with uniform interface blocks"); - } - - if ((qual->flags.q.uniform || !is_interface) && - qual->has_auxiliary_storage()) { - _mesa_glsl_error(&loc, state, - "auxiliary storage qualifiers cannot be used " - "in uniform blocks or structures."); - } - - if (qual->flags.q.row_major || qual->flags.q.column_major) { - if (!qual->flags.q.uniform && !qual->flags.q.buffer) { - _mesa_glsl_error(&loc, state, - "row_major and column_major can only be " - "applied to interface blocks"); - } else - validate_matrix_layout_for_type(state, &loc, decl_type, NULL); - } - - if (qual->flags.q.read_only && qual->flags.q.write_only) { - _mesa_glsl_error(&loc, state, "buffer variable can't be both " - "readonly and writeonly."); - } - - foreach_list_typed (ast_declaration, decl, link, - &decl_list->declarations) { - YYLTYPE loc = decl->get_location(); - - if (!allow_reserved_names) - validate_identifier(decl->identifier, loc, state); - - const struct glsl_type *field_type = - process_array_type(&loc, decl_type, decl->array_specifier, state); - validate_array_dimensions(field_type, state, &loc); - fields[i].type = field_type; - fields[i].name = decl->identifier; - fields[i].interpolation = - interpret_interpolation_qualifier(qual, var_mode, state, &loc); - fields[i].centroid = qual->flags.q.centroid ? 1 : 0; - fields[i].sample = qual->flags.q.sample ? 1 : 0; - fields[i].patch = qual->flags.q.patch ? 1 : 0; - fields[i].precision = qual->precision; - - if (qual->flags.q.explicit_location) { - unsigned qual_location; - if (process_qualifier_constant(state, &loc, "location", - qual->location, &qual_location)) { - fields[i].location = VARYING_SLOT_VAR0 + qual_location; - expl_location = fields[i].location + - fields[i].type->count_attribute_slots(false); - } - } else { - if (layout && layout->flags.q.explicit_location) { - fields[i].location = expl_location; - expl_location += fields[i].type->count_attribute_slots(false); - } else { - fields[i].location = -1; - } - } - - /* Propogate row- / column-major information down the fields of the - * structure or interface block. Structures need this data because - * the structure may contain a structure that contains ... a matrix - * that need the proper layout. - */ - if (field_type->without_array()->is_matrix() - || field_type->without_array()->is_record()) { - /* If no layout is specified for the field, inherit the layout - * from the block. - */ - fields[i].matrix_layout = matrix_layout; - - if (qual->flags.q.row_major) - fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR; - else if (qual->flags.q.column_major) - fields[i].matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR; - - /* If we're processing an interface block, the matrix layout must - * be decided by this point. - */ - assert(!is_interface - || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR - || fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR); - } - - /* Image qualifiers are allowed on buffer variables, which can only - * be defined inside shader storage buffer objects - */ - if (layout && var_mode == ir_var_shader_storage) { - /* For readonly and writeonly qualifiers the field definition, - * if set, overwrites the layout qualifier. - */ - if (qual->flags.q.read_only) { - fields[i].image_read_only = true; - fields[i].image_write_only = false; - } else if (qual->flags.q.write_only) { - fields[i].image_read_only = false; - fields[i].image_write_only = true; - } else { - fields[i].image_read_only = layout->flags.q.read_only; - fields[i].image_write_only = layout->flags.q.write_only; - } - - /* For other qualifiers, we set the flag if either the layout - * qualifier or the field qualifier are set - */ - fields[i].image_coherent = qual->flags.q.coherent || - layout->flags.q.coherent; - fields[i].image_volatile = qual->flags.q._volatile || - layout->flags.q._volatile; - fields[i].image_restrict = qual->flags.q.restrict_flag || - layout->flags.q.restrict_flag; - } - - i++; - } - } - - assert(i == decl_count); - - *fields_ret = fields; - return decl_count; -} - - -ir_rvalue * -ast_struct_specifier::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - YYLTYPE loc = this->get_location(); - - /* Section 4.1.8 (Structures) of the GLSL 1.10 spec says: - * - * "Anonymous structures are not supported; so embedded structures must - * have a declarator. A name given to an embedded struct is scoped at - * the same level as the struct it is embedded in." - * - * The same section of the GLSL 1.20 spec says: - * - * "Anonymous structures are not supported. Embedded structures are not - * supported. - * - * struct S { float f; }; - * struct T { - * S; // Error: anonymous structures disallowed - * struct { ... }; // Error: embedded structures disallowed - * S s; // Okay: nested structures with name are allowed - * };" - * - * The GLSL ES 1.00 and 3.00 specs have similar langauge and examples. So, - * we allow embedded structures in 1.10 only. - */ - if (state->language_version != 110 && state->struct_specifier_depth != 0) - _mesa_glsl_error(&loc, state, - "embedded structure declarations are not allowed"); - - state->struct_specifier_depth++; - - unsigned expl_location = 0; - if (layout && layout->flags.q.explicit_location) { - if (!process_qualifier_constant(state, &loc, "location", - layout->location, &expl_location)) { - return NULL; - } else { - expl_location = VARYING_SLOT_VAR0 + expl_location; - } - } - - glsl_struct_field *fields; - unsigned decl_count = - ast_process_struct_or_iface_block_members(instructions, - state, - &this->declarations, - &fields, - false, - GLSL_MATRIX_LAYOUT_INHERITED, - false /* allow_reserved_names */, - ir_var_auto, - layout, - 0, /* for interface only */ - expl_location); - - validate_identifier(this->name, loc, state); - - const glsl_type *t = - glsl_type::get_record_instance(fields, decl_count, this->name); - - if (!state->symbols->add_type(name, t)) { - _mesa_glsl_error(& loc, state, "struct `%s' previously defined", name); - } else { - const glsl_type **s = reralloc(state, state->user_structures, - const glsl_type *, - state->num_user_structures + 1); - if (s != NULL) { - s[state->num_user_structures] = t; - state->user_structures = s; - state->num_user_structures++; - } - } - - state->struct_specifier_depth--; - - /* Structure type definitions do not have r-values. - */ - return NULL; -} - - -/** - * Visitor class which detects whether a given interface block has been used. - */ -class interface_block_usage_visitor : public ir_hierarchical_visitor -{ -public: - interface_block_usage_visitor(ir_variable_mode mode, const glsl_type *block) - : mode(mode), block(block), found(false) - { - } - - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - if (ir->var->data.mode == mode && ir->var->get_interface_type() == block) { - found = true; - return visit_stop; - } - return visit_continue; - } - - bool usage_found() const - { - return this->found; - } - -private: - ir_variable_mode mode; - const glsl_type *block; - bool found; -}; - -static bool -is_unsized_array_last_element(ir_variable *v) -{ - const glsl_type *interface_type = v->get_interface_type(); - int length = interface_type->length; - - assert(v->type->is_unsized_array()); - - /* Check if it is the last element of the interface */ - if (strcmp(interface_type->fields.structure[length-1].name, v->name) == 0) - return true; - return false; -} - -ir_rvalue * -ast_interface_block::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - YYLTYPE loc = this->get_location(); - - /* Interface blocks must be declared at global scope */ - if (state->current_function != NULL) { - _mesa_glsl_error(&loc, state, - "Interface block `%s' must be declared " - "at global scope", - this->block_name); - } - - if (!this->layout.flags.q.buffer && - this->layout.flags.q.std430) { - _mesa_glsl_error(&loc, state, - "std430 storage block layout qualifier is supported " - "only for shader storage blocks"); - } - - /* The ast_interface_block has a list of ast_declarator_lists. We - * need to turn those into ir_variables with an association - * with this uniform block. - */ - enum glsl_interface_packing packing; - if (this->layout.flags.q.shared) { - packing = GLSL_INTERFACE_PACKING_SHARED; - } else if (this->layout.flags.q.packed) { - packing = GLSL_INTERFACE_PACKING_PACKED; - } else if (this->layout.flags.q.std430) { - packing = GLSL_INTERFACE_PACKING_STD430; - } else { - /* The default layout is std140. - */ - packing = GLSL_INTERFACE_PACKING_STD140; - } - - ir_variable_mode var_mode; - const char *iface_type_name; - if (this->layout.flags.q.in) { - var_mode = ir_var_shader_in; - iface_type_name = "in"; - } else if (this->layout.flags.q.out) { - var_mode = ir_var_shader_out; - iface_type_name = "out"; - } else if (this->layout.flags.q.uniform) { - var_mode = ir_var_uniform; - iface_type_name = "uniform"; - } else if (this->layout.flags.q.buffer) { - var_mode = ir_var_shader_storage; - iface_type_name = "buffer"; - } else { - var_mode = ir_var_auto; - iface_type_name = "UNKNOWN"; - assert(!"interface block layout qualifier not found!"); - } - - enum glsl_matrix_layout matrix_layout = GLSL_MATRIX_LAYOUT_INHERITED; - if (this->layout.flags.q.row_major) - matrix_layout = GLSL_MATRIX_LAYOUT_ROW_MAJOR; - else if (this->layout.flags.q.column_major) - matrix_layout = GLSL_MATRIX_LAYOUT_COLUMN_MAJOR; - - bool redeclaring_per_vertex = strcmp(this->block_name, "gl_PerVertex") == 0; - exec_list declared_variables; - glsl_struct_field *fields; - - /* Treat an interface block as one level of nesting, so that embedded struct - * specifiers will be disallowed. - */ - state->struct_specifier_depth++; - - /* For blocks that accept memory qualifiers (i.e. shader storage), verify - * that we don't have incompatible qualifiers - */ - if (this->layout.flags.q.read_only && this->layout.flags.q.write_only) { - _mesa_glsl_error(&loc, state, - "Interface block sets both readonly and writeonly"); - } - - unsigned qual_stream; - if (!process_qualifier_constant(state, &loc, "stream", this->layout.stream, - &qual_stream) || - !validate_stream_qualifier(&loc, state, qual_stream)) { - /* If the stream qualifier is invalid it doesn't make sense to continue - * on and try to compare stream layouts on member variables against it - * so just return early. - */ - return NULL; - } - - unsigned expl_location = 0; - if (layout.flags.q.explicit_location) { - if (!process_qualifier_constant(state, &loc, "location", - layout.location, &expl_location)) { - return NULL; - } else { - expl_location = VARYING_SLOT_VAR0 + expl_location; - } - } - - unsigned int num_variables = - ast_process_struct_or_iface_block_members(&declared_variables, - state, - &this->declarations, - &fields, - true, - matrix_layout, - redeclaring_per_vertex, - var_mode, - &this->layout, - qual_stream, - expl_location); - - state->struct_specifier_depth--; - - if (!redeclaring_per_vertex) { - validate_identifier(this->block_name, loc, state); - - /* From section 4.3.9 ("Interface Blocks") of the GLSL 4.50 spec: - * - * "Block names have no other use within a shader beyond interface - * matching; it is a compile-time error to use a block name at global - * scope for anything other than as a block name." - */ - ir_variable *var = state->symbols->get_variable(this->block_name); - if (var && !var->type->is_interface()) { - _mesa_glsl_error(&loc, state, "Block name `%s' is " - "already used in the scope.", - this->block_name); - } - } - - const glsl_type *earlier_per_vertex = NULL; - if (redeclaring_per_vertex) { - /* Find the previous declaration of gl_PerVertex. If we're redeclaring - * the named interface block gl_in, we can find it by looking at the - * previous declaration of gl_in. Otherwise we can find it by looking - * at the previous decalartion of any of the built-in outputs, - * e.g. gl_Position. - * - * Also check that the instance name and array-ness of the redeclaration - * are correct. - */ - switch (var_mode) { - case ir_var_shader_in: - if (ir_variable *earlier_gl_in = - state->symbols->get_variable("gl_in")) { - earlier_per_vertex = earlier_gl_in->get_interface_type(); - } else { - _mesa_glsl_error(&loc, state, - "redeclaration of gl_PerVertex input not allowed " - "in the %s shader", - _mesa_shader_stage_to_string(state->stage)); - } - if (this->instance_name == NULL || - strcmp(this->instance_name, "gl_in") != 0 || this->array_specifier == NULL || - !this->array_specifier->is_single_dimension()) { - _mesa_glsl_error(&loc, state, - "gl_PerVertex input must be redeclared as " - "gl_in[]"); - } - break; - case ir_var_shader_out: - if (ir_variable *earlier_gl_Position = - state->symbols->get_variable("gl_Position")) { - earlier_per_vertex = earlier_gl_Position->get_interface_type(); - } else if (ir_variable *earlier_gl_out = - state->symbols->get_variable("gl_out")) { - earlier_per_vertex = earlier_gl_out->get_interface_type(); - } else { - _mesa_glsl_error(&loc, state, - "redeclaration of gl_PerVertex output not " - "allowed in the %s shader", - _mesa_shader_stage_to_string(state->stage)); - } - if (state->stage == MESA_SHADER_TESS_CTRL) { - if (this->instance_name == NULL || - strcmp(this->instance_name, "gl_out") != 0 || this->array_specifier == NULL) { - _mesa_glsl_error(&loc, state, - "gl_PerVertex output must be redeclared as " - "gl_out[]"); - } - } else { - if (this->instance_name != NULL) { - _mesa_glsl_error(&loc, state, - "gl_PerVertex output may not be redeclared with " - "an instance name"); - } - } - break; - default: - _mesa_glsl_error(&loc, state, - "gl_PerVertex must be declared as an input or an " - "output"); - break; - } - - if (earlier_per_vertex == NULL) { - /* An error has already been reported. Bail out to avoid null - * dereferences later in this function. - */ - return NULL; - } - - /* Copy locations from the old gl_PerVertex interface block. */ - for (unsigned i = 0; i < num_variables; i++) { - int j = earlier_per_vertex->field_index(fields[i].name); - if (j == -1) { - _mesa_glsl_error(&loc, state, - "redeclaration of gl_PerVertex must be a subset " - "of the built-in members of gl_PerVertex"); - } else { - fields[i].location = - earlier_per_vertex->fields.structure[j].location; - fields[i].interpolation = - earlier_per_vertex->fields.structure[j].interpolation; - fields[i].centroid = - earlier_per_vertex->fields.structure[j].centroid; - fields[i].sample = - earlier_per_vertex->fields.structure[j].sample; - fields[i].patch = - earlier_per_vertex->fields.structure[j].patch; - fields[i].precision = - earlier_per_vertex->fields.structure[j].precision; - } - } - - /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 - * spec: - * - * If a built-in interface block is redeclared, it must appear in - * the shader before any use of any member included in the built-in - * declaration, or a compilation error will result. - * - * This appears to be a clarification to the behaviour established for - * gl_PerVertex by GLSL 1.50, therefore we implement this behaviour - * regardless of GLSL version. - */ - interface_block_usage_visitor v(var_mode, earlier_per_vertex); - v.run(instructions); - if (v.usage_found()) { - _mesa_glsl_error(&loc, state, - "redeclaration of a built-in interface block must " - "appear before any use of any member of the " - "interface block"); - } - } - - const glsl_type *block_type = - glsl_type::get_interface_instance(fields, - num_variables, - packing, - this->block_name); - - if (!state->symbols->add_interface(block_type->name, block_type, var_mode)) { - YYLTYPE loc = this->get_location(); - _mesa_glsl_error(&loc, state, "interface block `%s' with type `%s' " - "already taken in the current scope", - this->block_name, iface_type_name); - } - - /* Since interface blocks cannot contain statements, it should be - * impossible for the block to generate any instructions. - */ - assert(declared_variables.is_empty()); - - /* From section 4.3.4 (Inputs) of the GLSL 1.50 spec: - * - * Geometry shader input variables get the per-vertex values written - * out by vertex shader output variables of the same names. Since a - * geometry shader operates on a set of vertices, each input varying - * variable (or input block, see interface blocks below) needs to be - * declared as an array. - */ - if (state->stage == MESA_SHADER_GEOMETRY && this->array_specifier == NULL && - var_mode == ir_var_shader_in) { - _mesa_glsl_error(&loc, state, "geometry shader inputs must be arrays"); - } else if ((state->stage == MESA_SHADER_TESS_CTRL || - state->stage == MESA_SHADER_TESS_EVAL) && - this->array_specifier == NULL && - var_mode == ir_var_shader_in) { - _mesa_glsl_error(&loc, state, "per-vertex tessellation shader inputs must be arrays"); - } else if (state->stage == MESA_SHADER_TESS_CTRL && - this->array_specifier == NULL && - var_mode == ir_var_shader_out) { - _mesa_glsl_error(&loc, state, "tessellation control shader outputs must be arrays"); - } - - - /* Page 39 (page 45 of the PDF) of section 4.3.7 in the GLSL ES 3.00 spec - * says: - * - * "If an instance name (instance-name) is used, then it puts all the - * members inside a scope within its own name space, accessed with the - * field selector ( . ) operator (analogously to structures)." - */ - if (this->instance_name) { - if (redeclaring_per_vertex) { - /* When a built-in in an unnamed interface block is redeclared, - * get_variable_being_redeclared() calls - * check_builtin_array_max_size() to make sure that built-in array - * variables aren't redeclared to illegal sizes. But we're looking - * at a redeclaration of a named built-in interface block. So we - * have to manually call check_builtin_array_max_size() for all parts - * of the interface that are arrays. - */ - for (unsigned i = 0; i < num_variables; i++) { - if (fields[i].type->is_array()) { - const unsigned size = fields[i].type->array_size(); - check_builtin_array_max_size(fields[i].name, size, loc, state); - } - } - } else { - validate_identifier(this->instance_name, loc, state); - } - - ir_variable *var; - - if (this->array_specifier != NULL) { - const glsl_type *block_array_type = - process_array_type(&loc, block_type, this->array_specifier, state); - - /* Section 4.3.7 (Interface Blocks) of the GLSL 1.50 spec says: - * - * For uniform blocks declared an array, each individual array - * element corresponds to a separate buffer object backing one - * instance of the block. As the array size indicates the number - * of buffer objects needed, uniform block array declarations - * must specify an array size. - * - * And a few paragraphs later: - * - * Geometry shader input blocks must be declared as arrays and - * follow the array declaration and linking rules for all - * geometry shader inputs. All other input and output block - * arrays must specify an array size. - * - * The same applies to tessellation shaders. - * - * The upshot of this is that the only circumstance where an - * interface array size *doesn't* need to be specified is on a - * geometry shader input, tessellation control shader input, - * tessellation control shader output, and tessellation evaluation - * shader input. - */ - if (block_array_type->is_unsized_array()) { - bool allow_inputs = state->stage == MESA_SHADER_GEOMETRY || - state->stage == MESA_SHADER_TESS_CTRL || - state->stage == MESA_SHADER_TESS_EVAL; - bool allow_outputs = state->stage == MESA_SHADER_TESS_CTRL; - - if (this->layout.flags.q.in) { - if (!allow_inputs) - _mesa_glsl_error(&loc, state, - "unsized input block arrays not allowed in " - "%s shader", - _mesa_shader_stage_to_string(state->stage)); - } else if (this->layout.flags.q.out) { - if (!allow_outputs) - _mesa_glsl_error(&loc, state, - "unsized output block arrays not allowed in " - "%s shader", - _mesa_shader_stage_to_string(state->stage)); - } else { - /* by elimination, this is a uniform block array */ - _mesa_glsl_error(&loc, state, - "unsized uniform block arrays not allowed in " - "%s shader", - _mesa_shader_stage_to_string(state->stage)); - } - } - - /* From section 4.3.9 (Interface Blocks) of the GLSL ES 3.10 spec: - * - * * Arrays of arrays of blocks are not allowed - */ - if (state->es_shader && block_array_type->is_array() && - block_array_type->fields.array->is_array()) { - _mesa_glsl_error(&loc, state, - "arrays of arrays interface blocks are " - "not allowed"); - } - - var = new(state) ir_variable(block_array_type, - this->instance_name, - var_mode); - } else { - var = new(state) ir_variable(block_type, - this->instance_name, - var_mode); - } - - var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED - ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; - - if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) - var->data.read_only = true; - - if (state->stage == MESA_SHADER_GEOMETRY && var_mode == ir_var_shader_in) - handle_geometry_shader_input_decl(state, loc, var); - else if ((state->stage == MESA_SHADER_TESS_CTRL || - state->stage == MESA_SHADER_TESS_EVAL) && var_mode == ir_var_shader_in) - handle_tess_shader_input_decl(state, loc, var); - else if (state->stage == MESA_SHADER_TESS_CTRL && var_mode == ir_var_shader_out) - handle_tess_ctrl_shader_output_decl(state, loc, var); - - for (unsigned i = 0; i < num_variables; i++) { - if (fields[i].type->is_unsized_array()) { - if (var_mode == ir_var_shader_storage) { - if (i != (num_variables - 1)) { - _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " - "only last member of a shader storage block " - "can be defined as unsized array", - fields[i].name); - } - } else { - /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays": - * - * "If an array is declared as the last member of a shader storage - * block and the size is not specified at compile-time, it is - * sized at run-time. In all other cases, arrays are sized only - * at compile-time." - */ - if (state->es_shader) { - _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " - "only last member of a shader storage block " - "can be defined as unsized array", - fields[i].name); - } - } - } - } - - if (ir_variable *earlier = - state->symbols->get_variable(this->instance_name)) { - if (!redeclaring_per_vertex) { - _mesa_glsl_error(&loc, state, "`%s' redeclared", - this->instance_name); - } - earlier->data.how_declared = ir_var_declared_normally; - earlier->type = var->type; - earlier->reinit_interface_type(block_type); - delete var; - } else { - if (this->layout.flags.q.explicit_binding) { - apply_explicit_binding(state, &loc, var, var->type, - &this->layout); - } - - var->data.stream = qual_stream; - if (layout.flags.q.explicit_location) { - var->data.location = expl_location; - var->data.explicit_location = true; - } - - state->symbols->add_variable(var); - instructions->push_tail(var); - } - } else { - /* In order to have an array size, the block must also be declared with - * an instance name. - */ - assert(this->array_specifier == NULL); - - for (unsigned i = 0; i < num_variables; i++) { - ir_variable *var = - new(state) ir_variable(fields[i].type, - ralloc_strdup(state, fields[i].name), - var_mode); - var->data.interpolation = fields[i].interpolation; - var->data.centroid = fields[i].centroid; - var->data.sample = fields[i].sample; - var->data.patch = fields[i].patch; - var->data.stream = qual_stream; - var->data.location = fields[i].location; - if (fields[i].location != -1) - var->data.explicit_location = true; - var->init_interface_type(block_type); - - if (var_mode == ir_var_shader_in || var_mode == ir_var_uniform) - var->data.read_only = true; - - /* Precision qualifiers do not have any meaning in Desktop GLSL */ - if (state->es_shader) { - var->data.precision = - select_gles_precision(fields[i].precision, fields[i].type, - state, &loc); - } - - if (fields[i].matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED) { - var->data.matrix_layout = matrix_layout == GLSL_MATRIX_LAYOUT_INHERITED - ? GLSL_MATRIX_LAYOUT_COLUMN_MAJOR : matrix_layout; - } else { - var->data.matrix_layout = fields[i].matrix_layout; - } - - if (var->data.mode == ir_var_shader_storage) { - var->data.image_read_only = fields[i].image_read_only; - var->data.image_write_only = fields[i].image_write_only; - var->data.image_coherent = fields[i].image_coherent; - var->data.image_volatile = fields[i].image_volatile; - var->data.image_restrict = fields[i].image_restrict; - } - - /* Examine var name here since var may get deleted in the next call */ - bool var_is_gl_id = is_gl_identifier(var->name); - - if (redeclaring_per_vertex) { - ir_variable *earlier = - get_variable_being_redeclared(var, loc, state, - true /* allow_all_redeclarations */); - if (!var_is_gl_id || earlier == NULL) { - _mesa_glsl_error(&loc, state, - "redeclaration of gl_PerVertex can only " - "include built-in variables"); - } else if (earlier->data.how_declared == ir_var_declared_normally) { - _mesa_glsl_error(&loc, state, - "`%s' has already been redeclared", - earlier->name); - } else { - earlier->data.how_declared = ir_var_declared_in_block; - earlier->reinit_interface_type(block_type); - } - continue; - } - - if (state->symbols->get_variable(var->name) != NULL) - _mesa_glsl_error(&loc, state, "`%s' redeclared", var->name); - - /* Propagate the "binding" keyword into this UBO/SSBO's fields. - * The UBO declaration itself doesn't get an ir_variable unless it - * has an instance name. This is ugly. - */ - if (this->layout.flags.q.explicit_binding) { - apply_explicit_binding(state, &loc, var, - var->get_interface_type(), &this->layout); - } - - if (var->type->is_unsized_array()) { - if (var->is_in_shader_storage_block()) { - if (!is_unsized_array_last_element(var)) { - _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " - "only last member of a shader storage block " - "can be defined as unsized array", - var->name); - } - var->data.from_ssbo_unsized_array = true; - } else { - /* From GLSL ES 3.10 spec, section 4.1.9 "Arrays": - * - * "If an array is declared as the last member of a shader storage - * block and the size is not specified at compile-time, it is - * sized at run-time. In all other cases, arrays are sized only - * at compile-time." - */ - if (state->es_shader) { - _mesa_glsl_error(&loc, state, "unsized array `%s' definition: " - "only last member of a shader storage block " - "can be defined as unsized array", - var->name); - } - } - } - - state->symbols->add_variable(var); - instructions->push_tail(var); - } - - if (redeclaring_per_vertex && block_type != earlier_per_vertex) { - /* From section 7.1 ("Built-in Language Variables") of the GLSL 4.10 spec: - * - * It is also a compilation error ... to redeclare a built-in - * block and then use a member from that built-in block that was - * not included in the redeclaration. - * - * This appears to be a clarification to the behaviour established - * for gl_PerVertex by GLSL 1.50, therefore we implement this - * behaviour regardless of GLSL version. - * - * To prevent the shader from using a member that was not included in - * the redeclaration, we disable any ir_variables that are still - * associated with the old declaration of gl_PerVertex (since we've - * already updated all of the variables contained in the new - * gl_PerVertex to point to it). - * - * As a side effect this will prevent - * validate_intrastage_interface_blocks() from getting confused and - * thinking there are conflicting definitions of gl_PerVertex in the - * shader. - */ - foreach_in_list_safe(ir_instruction, node, instructions) { - ir_variable *const var = node->as_variable(); - if (var != NULL && - var->get_interface_type() == earlier_per_vertex && - var->data.mode == var_mode) { - if (var->data.how_declared == ir_var_declared_normally) { - _mesa_glsl_error(&loc, state, - "redeclaration of gl_PerVertex cannot " - "follow a redeclaration of `%s'", - var->name); - } - state->symbols->disable_variable(var->name); - var->remove(); - } - } - } - } - - return NULL; -} - - -ir_rvalue * -ast_tcs_output_layout::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - YYLTYPE loc = this->get_location(); - - unsigned num_vertices; - if (!state->out_qualifier->vertices-> - process_qualifier_constant(state, "vertices", &num_vertices, - false)) { - /* return here to stop cascading incorrect error messages */ - return NULL; - } - - /* If any shader outputs occurred before this declaration and specified an - * array size, make sure the size they specified is consistent with the - * primitive type. - */ - if (state->tcs_output_size != 0 && state->tcs_output_size != num_vertices) { - _mesa_glsl_error(&loc, state, - "this tessellation control shader output layout " - "specifies %u vertices, but a previous output " - "is declared with size %u", - num_vertices, state->tcs_output_size); - return NULL; - } - - state->tcs_output_vertices_specified = true; - - /* If any shader outputs occurred before this declaration and did not - * specify an array size, their size is determined now. - */ - foreach_in_list (ir_instruction, node, instructions) { - ir_variable *var = node->as_variable(); - if (var == NULL || var->data.mode != ir_var_shader_out) - continue; - - /* Note: Not all tessellation control shader output are arrays. */ - if (!var->type->is_unsized_array() || var->data.patch) - continue; - - if (var->data.max_array_access >= num_vertices) { - _mesa_glsl_error(&loc, state, - "this tessellation control shader output layout " - "specifies %u vertices, but an access to element " - "%u of output `%s' already exists", num_vertices, - var->data.max_array_access, var->name); - } else { - var->type = glsl_type::get_array_instance(var->type->fields.array, - num_vertices); - } - } - - return NULL; -} - - -ir_rvalue * -ast_gs_input_layout::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - YYLTYPE loc = this->get_location(); - - /* If any geometry input layout declaration preceded this one, make sure it - * was consistent with this one. - */ - if (state->gs_input_prim_type_specified && - state->in_qualifier->prim_type != this->prim_type) { - _mesa_glsl_error(&loc, state, - "geometry shader input layout does not match" - " previous declaration"); - return NULL; - } - - /* If any shader inputs occurred before this declaration and specified an - * array size, make sure the size they specified is consistent with the - * primitive type. - */ - unsigned num_vertices = vertices_per_prim(this->prim_type); - if (state->gs_input_size != 0 && state->gs_input_size != num_vertices) { - _mesa_glsl_error(&loc, state, - "this geometry shader input layout implies %u vertices" - " per primitive, but a previous input is declared" - " with size %u", num_vertices, state->gs_input_size); - return NULL; - } - - state->gs_input_prim_type_specified = true; - - /* If any shader inputs occurred before this declaration and did not - * specify an array size, their size is determined now. - */ - foreach_in_list(ir_instruction, node, instructions) { - ir_variable *var = node->as_variable(); - if (var == NULL || var->data.mode != ir_var_shader_in) - continue; - - /* Note: gl_PrimitiveIDIn has mode ir_var_shader_in, but it's not an - * array; skip it. - */ - - if (var->type->is_unsized_array()) { - if (var->data.max_array_access >= num_vertices) { - _mesa_glsl_error(&loc, state, - "this geometry shader input layout implies %u" - " vertices, but an access to element %u of input" - " `%s' already exists", num_vertices, - var->data.max_array_access, var->name); - } else { - var->type = glsl_type::get_array_instance(var->type->fields.array, - num_vertices); - } - } - } - - return NULL; -} - - -ir_rvalue * -ast_cs_input_layout::hir(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - YYLTYPE loc = this->get_location(); - - /* From the ARB_compute_shader specification: - * - * If the local size of the shader in any dimension is greater - * than the maximum size supported by the implementation for that - * dimension, a compile-time error results. - * - * It is not clear from the spec how the error should be reported if - * the total size of the work group exceeds - * MAX_COMPUTE_WORK_GROUP_INVOCATIONS, but it seems reasonable to - * report it at compile time as well. - */ - GLuint64 total_invocations = 1; - unsigned qual_local_size[3]; - for (int i = 0; i < 3; i++) { - - char *local_size_str = ralloc_asprintf(NULL, "invalid local_size_%c", - 'x' + i); - /* Infer a local_size of 1 for unspecified dimensions */ - if (this->local_size[i] == NULL) { - qual_local_size[i] = 1; - } else if (!this->local_size[i]-> - process_qualifier_constant(state, local_size_str, - &qual_local_size[i], false)) { - ralloc_free(local_size_str); - return NULL; - } - ralloc_free(local_size_str); - - if (qual_local_size[i] > state->ctx->Const.MaxComputeWorkGroupSize[i]) { - _mesa_glsl_error(&loc, state, - "local_size_%c exceeds MAX_COMPUTE_WORK_GROUP_SIZE" - " (%d)", 'x' + i, - state->ctx->Const.MaxComputeWorkGroupSize[i]); - break; - } - total_invocations *= qual_local_size[i]; - if (total_invocations > - state->ctx->Const.MaxComputeWorkGroupInvocations) { - _mesa_glsl_error(&loc, state, - "product of local_sizes exceeds " - "MAX_COMPUTE_WORK_GROUP_INVOCATIONS (%d)", - state->ctx->Const.MaxComputeWorkGroupInvocations); - break; - } - } - - /* If any compute input layout declaration preceded this one, make sure it - * was consistent with this one. - */ - if (state->cs_input_local_size_specified) { - for (int i = 0; i < 3; i++) { - if (state->cs_input_local_size[i] != qual_local_size[i]) { - _mesa_glsl_error(&loc, state, - "compute shader input layout does not match" - " previous declaration"); - return NULL; - } - } - } - - state->cs_input_local_size_specified = true; - for (int i = 0; i < 3; i++) - state->cs_input_local_size[i] = qual_local_size[i]; - - /* We may now declare the built-in constant gl_WorkGroupSize (see - * builtin_variable_generator::generate_constants() for why we didn't - * declare it earlier). - */ - ir_variable *var = new(state->symbols) - ir_variable(glsl_type::uvec3_type, "gl_WorkGroupSize", ir_var_auto); - var->data.how_declared = ir_var_declared_implicitly; - var->data.read_only = true; - instructions->push_tail(var); - state->symbols->add_variable(var); - ir_constant_data data; - memset(&data, 0, sizeof(data)); - for (int i = 0; i < 3; i++) - data.u[i] = qual_local_size[i]; - var->constant_value = new(var) ir_constant(glsl_type::uvec3_type, &data); - var->constant_initializer = - new(var) ir_constant(glsl_type::uvec3_type, &data); - var->data.has_initializer = true; - - return NULL; -} - - -static void -detect_conflicting_assignments(struct _mesa_glsl_parse_state *state, - exec_list *instructions) -{ - bool gl_FragColor_assigned = false; - bool gl_FragData_assigned = false; - bool gl_FragSecondaryColor_assigned = false; - bool gl_FragSecondaryData_assigned = false; - bool user_defined_fs_output_assigned = false; - ir_variable *user_defined_fs_output = NULL; - - /* It would be nice to have proper location information. */ - YYLTYPE loc; - memset(&loc, 0, sizeof(loc)); - - foreach_in_list(ir_instruction, node, instructions) { - ir_variable *var = node->as_variable(); - - if (!var || !var->data.assigned) - continue; - - if (strcmp(var->name, "gl_FragColor") == 0) - gl_FragColor_assigned = true; - else if (strcmp(var->name, "gl_FragData") == 0) - gl_FragData_assigned = true; - else if (strcmp(var->name, "gl_SecondaryFragColorEXT") == 0) - gl_FragSecondaryColor_assigned = true; - else if (strcmp(var->name, "gl_SecondaryFragDataEXT") == 0) - gl_FragSecondaryData_assigned = true; - else if (!is_gl_identifier(var->name)) { - if (state->stage == MESA_SHADER_FRAGMENT && - var->data.mode == ir_var_shader_out) { - user_defined_fs_output_assigned = true; - user_defined_fs_output = var; - } - } - } - - /* From the GLSL 1.30 spec: - * - * "If a shader statically assigns a value to gl_FragColor, it - * may not assign a value to any element of gl_FragData. If a - * shader statically writes a value to any element of - * gl_FragData, it may not assign a value to - * gl_FragColor. That is, a shader may assign values to either - * gl_FragColor or gl_FragData, but not both. Multiple shaders - * linked together must also consistently write just one of - * these variables. Similarly, if user declared output - * variables are in use (statically assigned to), then the - * built-in variables gl_FragColor and gl_FragData may not be - * assigned to. These incorrect usages all generate compile - * time errors." - */ - if (gl_FragColor_assigned && gl_FragData_assigned) { - _mesa_glsl_error(&loc, state, "fragment shader writes to both " - "`gl_FragColor' and `gl_FragData'"); - } else if (gl_FragColor_assigned && user_defined_fs_output_assigned) { - _mesa_glsl_error(&loc, state, "fragment shader writes to both " - "`gl_FragColor' and `%s'", - user_defined_fs_output->name); - } else if (gl_FragSecondaryColor_assigned && gl_FragSecondaryData_assigned) { - _mesa_glsl_error(&loc, state, "fragment shader writes to both " - "`gl_FragSecondaryColorEXT' and" - " `gl_FragSecondaryDataEXT'"); - } else if (gl_FragColor_assigned && gl_FragSecondaryData_assigned) { - _mesa_glsl_error(&loc, state, "fragment shader writes to both " - "`gl_FragColor' and" - " `gl_FragSecondaryDataEXT'"); - } else if (gl_FragData_assigned && gl_FragSecondaryColor_assigned) { - _mesa_glsl_error(&loc, state, "fragment shader writes to both " - "`gl_FragData' and" - " `gl_FragSecondaryColorEXT'"); - } else if (gl_FragData_assigned && user_defined_fs_output_assigned) { - _mesa_glsl_error(&loc, state, "fragment shader writes to both " - "`gl_FragData' and `%s'", - user_defined_fs_output->name); - } - - if ((gl_FragSecondaryColor_assigned || gl_FragSecondaryData_assigned) && - !state->EXT_blend_func_extended_enable) { - _mesa_glsl_error(&loc, state, - "Dual source blending requires EXT_blend_func_extended"); - } -} - - -static void -remove_per_vertex_blocks(exec_list *instructions, - _mesa_glsl_parse_state *state, ir_variable_mode mode) -{ - /* Find the gl_PerVertex interface block of the appropriate (in/out) mode, - * if it exists in this shader type. - */ - const glsl_type *per_vertex = NULL; - switch (mode) { - case ir_var_shader_in: - if (ir_variable *gl_in = state->symbols->get_variable("gl_in")) - per_vertex = gl_in->get_interface_type(); - break; - case ir_var_shader_out: - if (ir_variable *gl_Position = - state->symbols->get_variable("gl_Position")) { - per_vertex = gl_Position->get_interface_type(); - } - break; - default: - assert(!"Unexpected mode"); - break; - } - - /* If we didn't find a built-in gl_PerVertex interface block, then we don't - * need to do anything. - */ - if (per_vertex == NULL) - return; - - /* If the interface block is used by the shader, then we don't need to do - * anything. - */ - interface_block_usage_visitor v(mode, per_vertex); - v.run(instructions); - if (v.usage_found()) - return; - - /* Remove any ir_variable declarations that refer to the interface block - * we're removing. - */ - foreach_in_list_safe(ir_instruction, node, instructions) { - ir_variable *const var = node->as_variable(); - if (var != NULL && var->get_interface_type() == per_vertex && - var->data.mode == mode) { - state->symbols->disable_variable(var->name); - var->remove(); - } - } -} diff --git a/src/glsl/ast_type.cpp b/src/glsl/ast_type.cpp deleted file mode 100644 index e0e331152dd..00000000000 --- a/src/glsl/ast_type.cpp +++ /dev/null @@ -1,548 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ast.h" - -void -ast_type_specifier::print(void) const -{ - if (structure) { - structure->print(); - } else { - printf("%s ", type_name); - } - - if (array_specifier) { - array_specifier->print(); - } -} - -bool -ast_fully_specified_type::has_qualifiers(_mesa_glsl_parse_state *state) const -{ - /* 'subroutine' isnt a real qualifier. */ - ast_type_qualifier subroutine_only; - subroutine_only.flags.i = 0; - subroutine_only.flags.q.subroutine = 1; - subroutine_only.flags.q.subroutine_def = 1; - if (state->has_explicit_uniform_location()) { - subroutine_only.flags.q.explicit_index = 1; - } - return (this->qualifier.flags.i & ~subroutine_only.flags.i) != 0; -} - -bool ast_type_qualifier::has_interpolation() const -{ - return this->flags.q.smooth - || this->flags.q.flat - || this->flags.q.noperspective; -} - -bool -ast_type_qualifier::has_layout() const -{ - return this->flags.q.origin_upper_left - || this->flags.q.pixel_center_integer - || this->flags.q.depth_any - || this->flags.q.depth_greater - || this->flags.q.depth_less - || this->flags.q.depth_unchanged - || this->flags.q.std140 - || this->flags.q.std430 - || this->flags.q.shared - || this->flags.q.column_major - || this->flags.q.row_major - || this->flags.q.packed - || this->flags.q.explicit_location - || this->flags.q.explicit_image_format - || this->flags.q.explicit_index - || this->flags.q.explicit_binding - || this->flags.q.explicit_offset - || this->flags.q.explicit_stream; -} - -bool -ast_type_qualifier::has_storage() const -{ - return this->flags.q.constant - || this->flags.q.attribute - || this->flags.q.varying - || this->flags.q.in - || this->flags.q.out - || this->flags.q.uniform - || this->flags.q.buffer - || this->flags.q.shared_storage; -} - -bool -ast_type_qualifier::has_auxiliary_storage() const -{ - return this->flags.q.centroid - || this->flags.q.sample - || this->flags.q.patch; -} - -const char* -ast_type_qualifier::interpolation_string() const -{ - if (this->flags.q.smooth) - return "smooth"; - else if (this->flags.q.flat) - return "flat"; - else if (this->flags.q.noperspective) - return "noperspective"; - else - return NULL; -} - -/** - * This function merges both duplicate identifies within a single layout and - * multiple layout qualifiers on a single variable declaration. The - * is_single_layout_merge param is used differentiate between the two. - */ -bool -ast_type_qualifier::merge_qualifier(YYLTYPE *loc, - _mesa_glsl_parse_state *state, - const ast_type_qualifier &q, - bool is_single_layout_merge) -{ - ast_type_qualifier ubo_mat_mask; - ubo_mat_mask.flags.i = 0; - ubo_mat_mask.flags.q.row_major = 1; - ubo_mat_mask.flags.q.column_major = 1; - - ast_type_qualifier ubo_layout_mask; - ubo_layout_mask.flags.i = 0; - ubo_layout_mask.flags.q.std140 = 1; - ubo_layout_mask.flags.q.packed = 1; - ubo_layout_mask.flags.q.shared = 1; - ubo_layout_mask.flags.q.std430 = 1; - - ast_type_qualifier ubo_binding_mask; - ubo_binding_mask.flags.i = 0; - ubo_binding_mask.flags.q.explicit_binding = 1; - ubo_binding_mask.flags.q.explicit_offset = 1; - - ast_type_qualifier stream_layout_mask; - stream_layout_mask.flags.i = 0; - stream_layout_mask.flags.q.stream = 1; - - /* Uniform block layout qualifiers get to overwrite each - * other (rightmost having priority), while all other - * qualifiers currently don't allow duplicates. - */ - ast_type_qualifier allowed_duplicates_mask; - allowed_duplicates_mask.flags.i = - ubo_mat_mask.flags.i | - ubo_layout_mask.flags.i | - ubo_binding_mask.flags.i; - - /* Geometry shaders can have several layout qualifiers - * assigning different stream values. - */ - if (state->stage == MESA_SHADER_GEOMETRY) - allowed_duplicates_mask.flags.i |= - stream_layout_mask.flags.i; - - if (is_single_layout_merge && !state->has_enhanced_layouts() && - (this->flags.i & q.flags.i & ~allowed_duplicates_mask.flags.i) != 0) { - _mesa_glsl_error(loc, state, - "duplicate layout qualifiers used"); - return false; - } - - if (q.flags.q.prim_type) { - if (this->flags.q.prim_type && this->prim_type != q.prim_type) { - _mesa_glsl_error(loc, state, - "conflicting primitive type qualifiers used"); - return false; - } - this->prim_type = q.prim_type; - } - - if (q.flags.q.max_vertices) { - if (this->max_vertices) { - this->max_vertices->merge_qualifier(q.max_vertices); - } else { - this->max_vertices = q.max_vertices; - } - } - - if (q.flags.q.subroutine_def) { - if (this->flags.q.subroutine_def) { - _mesa_glsl_error(loc, state, - "conflicting subroutine qualifiers used"); - } else { - this->subroutine_list = q.subroutine_list; - } - } - - if (q.flags.q.invocations) { - if (this->invocations) { - this->invocations->merge_qualifier(q.invocations); - } else { - this->invocations = q.invocations; - } - } - - if (state->stage == MESA_SHADER_GEOMETRY && - state->has_explicit_attrib_stream()) { - if (!this->flags.q.explicit_stream) { - if (q.flags.q.stream) { - this->flags.q.stream = 1; - this->stream = q.stream; - } else if (!this->flags.q.stream && this->flags.q.out) { - /* Assign default global stream value */ - this->flags.q.stream = 1; - this->stream = state->out_qualifier->stream; - } - } - } - - if (q.flags.q.vertices) { - if (this->vertices) { - this->vertices->merge_qualifier(q.vertices); - } else { - this->vertices = q.vertices; - } - } - - if (q.flags.q.vertex_spacing) { - if (this->flags.q.vertex_spacing && this->vertex_spacing != q.vertex_spacing) { - _mesa_glsl_error(loc, state, - "conflicting vertex spacing used"); - return false; - } - this->vertex_spacing = q.vertex_spacing; - } - - if (q.flags.q.ordering) { - if (this->flags.q.ordering && this->ordering != q.ordering) { - _mesa_glsl_error(loc, state, - "conflicting ordering used"); - return false; - } - this->ordering = q.ordering; - } - - if (q.flags.q.point_mode) { - if (this->flags.q.point_mode && this->point_mode != q.point_mode) { - _mesa_glsl_error(loc, state, - "conflicting point mode used"); - return false; - } - this->point_mode = q.point_mode; - } - - if ((q.flags.i & ubo_mat_mask.flags.i) != 0) - this->flags.i &= ~ubo_mat_mask.flags.i; - if ((q.flags.i & ubo_layout_mask.flags.i) != 0) - this->flags.i &= ~ubo_layout_mask.flags.i; - - for (int i = 0; i < 3; i++) { - if (q.flags.q.local_size & (1 << i)) { - if (this->local_size[i]) { - this->local_size[i]->merge_qualifier(q.local_size[i]); - } else { - this->local_size[i] = q.local_size[i]; - } - } - } - - this->flags.i |= q.flags.i; - - if (q.flags.q.explicit_location) - this->location = q.location; - - if (q.flags.q.explicit_index) - this->index = q.index; - - if (q.flags.q.explicit_binding) - this->binding = q.binding; - - if (q.flags.q.explicit_offset) - this->offset = q.offset; - - if (q.precision != ast_precision_none) - this->precision = q.precision; - - if (q.flags.q.explicit_image_format) { - this->image_format = q.image_format; - this->image_base_type = q.image_base_type; - } - - return true; -} - -bool -ast_type_qualifier::merge_out_qualifier(YYLTYPE *loc, - _mesa_glsl_parse_state *state, - const ast_type_qualifier &q, - ast_node* &node, bool create_node) -{ - void *mem_ctx = state; - const bool r = this->merge_qualifier(loc, state, q, false); - - if (state->stage == MESA_SHADER_GEOMETRY) { - if (q.flags.q.prim_type) { - /* Make sure this is a valid output primitive type. */ - switch (q.prim_type) { - case GL_POINTS: - case GL_LINE_STRIP: - case GL_TRIANGLE_STRIP: - break; - default: - _mesa_glsl_error(loc, state, "invalid geometry shader output " - "primitive type"); - break; - } - } - - /* Allow future assigments of global out's stream id value */ - this->flags.q.explicit_stream = 0; - } else if (state->stage == MESA_SHADER_TESS_CTRL) { - if (create_node) { - node = new(mem_ctx) ast_tcs_output_layout(*loc); - } - } else { - _mesa_glsl_error(loc, state, "out layout qualifiers only valid in " - "tessellation control or geometry shaders"); - } - - return r; -} - -bool -ast_type_qualifier::merge_in_qualifier(YYLTYPE *loc, - _mesa_glsl_parse_state *state, - const ast_type_qualifier &q, - ast_node* &node, bool create_node) -{ - void *mem_ctx = state; - bool create_gs_ast = false; - bool create_cs_ast = false; - ast_type_qualifier valid_in_mask; - valid_in_mask.flags.i = 0; - - switch (state->stage) { - case MESA_SHADER_TESS_EVAL: - if (q.flags.q.prim_type) { - /* Make sure this is a valid input primitive type. */ - switch (q.prim_type) { - case GL_TRIANGLES: - case GL_QUADS: - case GL_ISOLINES: - break; - default: - _mesa_glsl_error(loc, state, - "invalid tessellation evaluation " - "shader input primitive type"); - break; - } - } - - valid_in_mask.flags.q.prim_type = 1; - valid_in_mask.flags.q.vertex_spacing = 1; - valid_in_mask.flags.q.ordering = 1; - valid_in_mask.flags.q.point_mode = 1; - break; - case MESA_SHADER_GEOMETRY: - if (q.flags.q.prim_type) { - /* Make sure this is a valid input primitive type. */ - switch (q.prim_type) { - case GL_POINTS: - case GL_LINES: - case GL_LINES_ADJACENCY: - case GL_TRIANGLES: - case GL_TRIANGLES_ADJACENCY: - break; - default: - _mesa_glsl_error(loc, state, - "invalid geometry shader input primitive type"); - break; - } - } - - create_gs_ast |= - q.flags.q.prim_type && - !state->in_qualifier->flags.q.prim_type; - - valid_in_mask.flags.q.prim_type = 1; - valid_in_mask.flags.q.invocations = 1; - break; - case MESA_SHADER_FRAGMENT: - valid_in_mask.flags.q.early_fragment_tests = 1; - break; - case MESA_SHADER_COMPUTE: - create_cs_ast |= - q.flags.q.local_size != 0 && - state->in_qualifier->flags.q.local_size == 0; - - valid_in_mask.flags.q.local_size = 7; - break; - default: - _mesa_glsl_error(loc, state, - "input layout qualifiers only valid in " - "geometry, fragment and compute shaders"); - break; - } - - /* Generate an error when invalid input layout qualifiers are used. */ - if ((q.flags.i & ~valid_in_mask.flags.i) != 0) { - _mesa_glsl_error(loc, state, - "invalid input layout qualifiers used"); - return false; - } - - /* Input layout qualifiers can be specified multiple - * times in separate declarations, as long as they match. - */ - if (this->flags.q.prim_type) { - if (q.flags.q.prim_type && - this->prim_type != q.prim_type) { - _mesa_glsl_error(loc, state, - "conflicting input primitive %s specified", - state->stage == MESA_SHADER_GEOMETRY ? - "type" : "mode"); - } - } else if (q.flags.q.prim_type) { - state->in_qualifier->flags.q.prim_type = 1; - state->in_qualifier->prim_type = q.prim_type; - } - - if (q.flags.q.invocations) { - this->flags.q.invocations = 1; - if (this->invocations) { - this->invocations->merge_qualifier(q.invocations); - } else { - this->invocations = q.invocations; - } - } - - if (q.flags.q.early_fragment_tests) { - state->fs_early_fragment_tests = true; - } - - if (this->flags.q.vertex_spacing) { - if (q.flags.q.vertex_spacing && - this->vertex_spacing != q.vertex_spacing) { - _mesa_glsl_error(loc, state, - "conflicting vertex spacing specified"); - } - } else if (q.flags.q.vertex_spacing) { - this->flags.q.vertex_spacing = 1; - this->vertex_spacing = q.vertex_spacing; - } - - if (this->flags.q.ordering) { - if (q.flags.q.ordering && - this->ordering != q.ordering) { - _mesa_glsl_error(loc, state, - "conflicting ordering specified"); - } - } else if (q.flags.q.ordering) { - this->flags.q.ordering = 1; - this->ordering = q.ordering; - } - - if (this->flags.q.point_mode) { - if (q.flags.q.point_mode && - this->point_mode != q.point_mode) { - _mesa_glsl_error(loc, state, - "conflicting point mode specified"); - } - } else if (q.flags.q.point_mode) { - this->flags.q.point_mode = 1; - this->point_mode = q.point_mode; - } - - if (create_node) { - if (create_gs_ast) { - node = new(mem_ctx) ast_gs_input_layout(*loc, q.prim_type); - } else if (create_cs_ast) { - node = new(mem_ctx) ast_cs_input_layout(*loc, q.local_size); - } - } - - return true; -} - -bool -ast_layout_expression::process_qualifier_constant(struct _mesa_glsl_parse_state *state, - const char *qual_indentifier, - unsigned *value, - bool can_be_zero) -{ - int min_value = 0; - bool first_pass = true; - *value = 0; - - if (!can_be_zero) - min_value = 1; - - for (exec_node *node = layout_const_expressions.head; - !node->is_tail_sentinel(); node = node->next) { - - exec_list dummy_instructions; - ast_node *const_expression = exec_node_data(ast_node, node, link); - - ir_rvalue *const ir = const_expression->hir(&dummy_instructions, state); - - ir_constant *const const_int = ir->constant_expression_value(); - if (const_int == NULL || !const_int->type->is_integer()) { - YYLTYPE loc = const_expression->get_location(); - _mesa_glsl_error(&loc, state, "%s must be an integral constant " - "expression", qual_indentifier); - return false; - } - - if (const_int->value.i[0] < min_value) { - YYLTYPE loc = const_expression->get_location(); - _mesa_glsl_error(&loc, state, "%s layout qualifier is invalid " - "(%d < %d)", qual_indentifier, - const_int->value.i[0], min_value); - return false; - } - - if (!first_pass && *value != const_int->value.u[0]) { - YYLTYPE loc = const_expression->get_location(); - _mesa_glsl_error(&loc, state, "%s layout qualifier does not " - "match previous declaration (%d vs %d)", - qual_indentifier, *value, const_int->value.i[0]); - return false; - } else { - first_pass = false; - *value = const_int->value.u[0]; - } - - /* If the location is const (and we've verified that - * it is) then no instructions should have been emitted - * when we converted it to HIR. If they were emitted, - * then either the location isn't const after all, or - * we are emitting unnecessary instructions. - */ - assert(dummy_instructions.is_empty()); - } - - return true; -} diff --git a/src/glsl/blob.c b/src/glsl/blob.c deleted file mode 100644 index dd4341be961..00000000000 --- a/src/glsl/blob.c +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include - -#include "main/macros.h" -#include "util/ralloc.h" -#include "blob.h" - -#define BLOB_INITIAL_SIZE 4096 - -/* Ensure that \blob will be able to fit an additional object of size - * \additional. The growing (if any) will occur by doubling the existing - * allocation. - */ -static bool -grow_to_fit(struct blob *blob, size_t additional) -{ - size_t to_allocate; - uint8_t *new_data; - - if (blob->size + additional <= blob->allocated) - return true; - - if (blob->allocated == 0) - to_allocate = BLOB_INITIAL_SIZE; - else - to_allocate = blob->allocated * 2; - - to_allocate = MAX2(to_allocate, blob->allocated + additional); - - new_data = reralloc_size(blob, blob->data, to_allocate); - if (new_data == NULL) - return false; - - blob->data = new_data; - blob->allocated = to_allocate; - - return true; -} - -/* Align the blob->size so that reading or writing a value at (blob->data + - * blob->size) will result in an access aligned to a granularity of \alignment - * bytes. - * - * \return True unless allocation fails - */ -static bool -align_blob(struct blob *blob, size_t alignment) -{ - const size_t new_size = ALIGN(blob->size, alignment); - - if (! grow_to_fit (blob, new_size - blob->size)) - return false; - - blob->size = new_size; - - return true; -} - -static void -align_blob_reader(struct blob_reader *blob, size_t alignment) -{ - blob->current = blob->data + ALIGN(blob->current - blob->data, alignment); -} - -struct blob * -blob_create(void *mem_ctx) -{ - struct blob *blob; - - blob = ralloc(mem_ctx, struct blob); - if (blob == NULL) - return NULL; - - blob->data = NULL; - blob->allocated = 0; - blob->size = 0; - - return blob; -} - -bool -blob_overwrite_bytes(struct blob *blob, - size_t offset, - const void *bytes, - size_t to_write) -{ - /* Detect an attempt to overwrite data out of bounds. */ - if (offset < 0 || blob->size - offset < to_write) - return false; - - memcpy(blob->data + offset, bytes, to_write); - - return true; -} - -bool -blob_write_bytes(struct blob *blob, const void *bytes, size_t to_write) -{ - if (! grow_to_fit(blob, to_write)) - return false; - - memcpy(blob->data + blob->size, bytes, to_write); - blob->size += to_write; - - return true; -} - -uint8_t * -blob_reserve_bytes(struct blob *blob, size_t to_write) -{ - uint8_t *ret; - - if (! grow_to_fit (blob, to_write)) - return NULL; - - ret = blob->data + blob->size; - blob->size += to_write; - - return ret; -} - -bool -blob_write_uint32(struct blob *blob, uint32_t value) -{ - align_blob(blob, sizeof(value)); - - return blob_write_bytes(blob, &value, sizeof(value)); -} - -bool -blob_overwrite_uint32 (struct blob *blob, - size_t offset, - uint32_t value) -{ - return blob_overwrite_bytes(blob, offset, &value, sizeof(value)); -} - -bool -blob_write_uint64(struct blob *blob, uint64_t value) -{ - align_blob(blob, sizeof(value)); - - return blob_write_bytes(blob, &value, sizeof(value)); -} - -bool -blob_write_intptr(struct blob *blob, intptr_t value) -{ - align_blob(blob, sizeof(value)); - - return blob_write_bytes(blob, &value, sizeof(value)); -} - -bool -blob_write_string(struct blob *blob, const char *str) -{ - return blob_write_bytes(blob, str, strlen(str) + 1); -} - -void -blob_reader_init(struct blob_reader *blob, uint8_t *data, size_t size) -{ - blob->data = data; - blob->end = data + size; - blob->current = data; - blob->overrun = false; -} - -/* Check that an object of size \size can be read from this blob. - * - * If not, set blob->overrun to indicate that we attempted to read too far. - */ -static bool -ensure_can_read(struct blob_reader *blob, size_t size) -{ - if (blob->current < blob->end && blob->end - blob->current >= size) - return true; - - blob->overrun = true; - - return false; -} - -void * -blob_read_bytes(struct blob_reader *blob, size_t size) -{ - void *ret; - - if (! ensure_can_read (blob, size)) - return NULL; - - ret = blob->current; - - blob->current += size; - - return ret; -} - -void -blob_copy_bytes(struct blob_reader *blob, uint8_t *dest, size_t size) -{ - uint8_t *bytes; - - bytes = blob_read_bytes(blob, size); - if (bytes == NULL) - return; - - memcpy(dest, bytes, size); -} - -/* These next three read functions have identical form. If we add any beyond - * these first three we should probably switch to generating these with a - * preprocessor macro. -*/ -uint32_t -blob_read_uint32(struct blob_reader *blob) -{ - uint32_t ret; - int size = sizeof(ret); - - align_blob_reader(blob, size); - - if (! ensure_can_read(blob, size)) - return 0; - - ret = *((uint32_t*) blob->current); - - blob->current += size; - - return ret; -} - -uint64_t -blob_read_uint64(struct blob_reader *blob) -{ - uint64_t ret; - int size = sizeof(ret); - - align_blob_reader(blob, size); - - if (! ensure_can_read(blob, size)) - return 0; - - ret = *((uint64_t*) blob->current); - - blob->current += size; - - return ret; -} - -intptr_t -blob_read_intptr(struct blob_reader *blob) -{ - intptr_t ret; - int size = sizeof(ret); - - align_blob_reader(blob, size); - - if (! ensure_can_read(blob, size)) - return 0; - - ret = *((intptr_t *) blob->current); - - blob->current += size; - - return ret; -} - -char * -blob_read_string(struct blob_reader *blob) -{ - int size; - char *ret; - uint8_t *nul; - - /* If we're already at the end, then this is an overrun. */ - if (blob->current >= blob->end) { - blob->overrun = true; - return NULL; - } - - /* Similarly, if there is no zero byte in the data remaining in this blob, - * we also consider that an overrun. - */ - nul = memchr(blob->current, 0, blob->end - blob->current); - - if (nul == NULL) { - blob->overrun = true; - return NULL; - } - - size = nul - blob->current + 1; - - assert(ensure_can_read(blob, size)); - - ret = (char *) blob->current; - - blob->current += size; - - return ret; -} diff --git a/src/glsl/blob.h b/src/glsl/blob.h deleted file mode 100644 index ec903ec140f..00000000000 --- a/src/glsl/blob.h +++ /dev/null @@ -1,289 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#pragma once -#ifndef BLOB_H -#define BLOB_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include - -/* The blob functions implement a simple, low-level API for serializing and - * deserializing. - * - * All objects written to a blob will be serialized directly, (without any - * additional meta-data to describe the data written). Therefore, it is the - * caller's responsibility to ensure that any data can be read later, (either - * by knowing exactly what data is expected, or by writing to the blob - * sufficient meta-data to describe what has been written). - * - * A blob is efficient in that it dynamically grows by doubling in size, so - * allocation costs are logarithmic. - */ - -struct blob { - /* The data actually written to the blob. */ - uint8_t *data; - - /** Number of bytes that have been allocated for \c data. */ - size_t allocated; - - /** The number of bytes that have actual data written to them. */ - size_t size; -}; - -/* When done reading, the caller can ensure that everything was consumed by - * checking the following: - * - * 1. blob->current should be equal to blob->end, (if not, too little was - * read). - * - * 2. blob->overrun should be false, (otherwise, too much was read). - */ -struct blob_reader { - uint8_t *data; - uint8_t *end; - uint8_t *current; - bool overrun; -}; - -/** - * Create a new, empty blob, belonging to \mem_ctx. - * - * \return The new blob, (or NULL in case of allocation failure). - */ -struct blob * -blob_create (void *mem_ctx); - -/** - * Add some unstructured, fixed-size data to a blob. - * - * \return True unless allocation failed. - */ -bool -blob_write_bytes (struct blob *blob, const void *bytes, size_t to_write); - -/** - * Reserve space in \blob for a number of bytes. - * - * Space will be allocated within the blob for these byes, but the bytes will - * be left uninitialized. The caller is expected to use the return value to - * write directly (and immediately) to these bytes. - * - * \note The return value is valid immediately upon return, but can be - * invalidated by any other call to a blob function. So the caller should call - * blob_reserve_byes immediately before writing through the returned pointer. - * - * This function is intended to be used when interfacing with an existing API - * that is not aware of the blob API, (so that blob_write_bytes cannot be - * called). - * - * \return A pointer to space allocated within \blob to which \to_write bytes - * can be written, (or NULL in case of any allocation error). - */ -uint8_t * -blob_reserve_bytes (struct blob *blob, size_t to_write); - -/** - * Overwrite some data previously written to the blob. - * - * Writes data to an existing portion of the blob at an offset of \offset. - * This data range must have previously been written to the blob by one of the - * blob_write_* calls. - * - * For example usage, see blob_overwrite_uint32 - * - * \return True unless the requested offset or offset+to_write lie outside - * the current blob's size. - */ -bool -blob_overwrite_bytes (struct blob *blob, - size_t offset, - const void *bytes, - size_t to_write); - -/** - * Add a uint32_t to a blob. - * - * \note This function will only write to a uint32_t-aligned offset from the - * beginning of the blob's data, so some padding bytes may be added to the - * blob if this write follows some unaligned write (such as - * blob_write_string). - * - * \return True unless allocation failed. - */ -bool -blob_write_uint32 (struct blob *blob, uint32_t value); - -/** - * Overwrite a uint32_t previously written to the blob. - * - * Writes a uint32_t value to an existing portion of the blob at an offset of - * \offset. This data range must have previously been written to the blob by - * one of the blob_write_* calls. - * - * - * The expected usage is something like the following pattern: - * - * size_t offset; - * - * offset = blob->size; - * blob_write_uint32 (blob, 0); // placeholder - * ... various blob write calls, writing N items ... - * blob_overwrite_uint32 (blob, offset, N); - * - * \return True unless the requested position or position+to_write lie outside - * the current blob's size. - */ -bool -blob_overwrite_uint32 (struct blob *blob, - size_t offset, - uint32_t value); - -/** - * Add a uint64_t to a blob. - * - * \note This function will only write to a uint64_t-aligned offset from the - * beginning of the blob's data, so some padding bytes may be added to the - * blob if this write follows some unaligned write (such as - * blob_write_string). - * - * \return True unless allocation failed. - */ -bool -blob_write_uint64 (struct blob *blob, uint64_t value); - -/** - * Add an intptr_t to a blob. - * - * \note This function will only write to an intptr_t-aligned offset from the - * beginning of the blob's data, so some padding bytes may be added to the - * blob if this write follows some unaligned write (such as - * blob_write_string). - * - * \return True unless allocation failed. - */ -bool -blob_write_intptr (struct blob *blob, intptr_t value); - -/** - * Add a NULL-terminated string to a blob, (including the NULL terminator). - * - * \return True unless allocation failed. - */ -bool -blob_write_string (struct blob *blob, const char *str); - -/** - * Start reading a blob, (initializing the contents of \blob for reading). - * - * After this call, the caller can use the various blob_read_* functions to - * read elements from the data array. - * - * For all of the blob_read_* functions, if there is insufficient data - * remaining, the functions will do nothing, (perhaps returning default values - * such as 0). The caller can detect this by noting that the blob_reader's - * current value is unchanged before and after the call. - */ -void -blob_reader_init (struct blob_reader *blob, uint8_t *data, size_t size); - -/** - * Read some unstructured, fixed-size data from the current location, (and - * update the current location to just past this data). - * - * \note The memory returned belongs to the data underlying the blob reader. The - * caller must copy the data in order to use it after the lifetime of the data - * underlying the blob reader. - * - * \return The bytes read (see note above about memory lifetime). - */ -void * -blob_read_bytes (struct blob_reader *blob, size_t size); - -/** - * Read some unstructured, fixed-size data from the current location, copying - * it to \dest (and update the current location to just past this data) - */ -void -blob_copy_bytes (struct blob_reader *blob, uint8_t *dest, size_t size); - -/** - * Read a uint32_t from the current location, (and update the current location - * to just past this uint32_t). - * - * \note This function will only read from a uint32_t-aligned offset from the - * beginning of the blob's data, so some padding bytes may be skipped. - * - * \return The uint32_t read - */ -uint32_t -blob_read_uint32 (struct blob_reader *blob); - -/** - * Read a uint64_t from the current location, (and update the current location - * to just past this uint64_t). - * - * \note This function will only read from a uint64_t-aligned offset from the - * beginning of the blob's data, so some padding bytes may be skipped. - * - * \return The uint64_t read - */ -uint64_t -blob_read_uint64 (struct blob_reader *blob); - -/** - * Read an intptr_t value from the current location, (and update the - * current location to just past this intptr_t). - * - * \note This function will only read from an intptr_t-aligned offset from the - * beginning of the blob's data, so some padding bytes may be skipped. - * - * \return The intptr_t read - */ -intptr_t -blob_read_intptr (struct blob_reader *blob); - -/** - * Read a NULL-terminated string from the current location, (and update the - * current location to just past this string). - * - * \note The memory returned belongs to the data underlying the blob reader. The - * caller must copy the string in order to use the string after the lifetime - * of the data underlying the blob reader. - * - * \return The string read (see note above about memory lifetime). However, if - * there is no NULL byte remaining within the blob, this function returns - * NULL. - */ -char * -blob_read_string (struct blob_reader *blob); - -#ifdef __cplusplus -} -#endif - -#endif /* BLOB_H */ diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp deleted file mode 100644 index 95e86df1cdd..00000000000 --- a/src/glsl/builtin_functions.cpp +++ /dev/null @@ -1,5502 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file builtin_functions.cpp - * - * Support for GLSL built-in functions. - * - * This file is split into several main components: - * - * 1. Availability predicates - * - * A series of small functions that check whether the current shader - * supports the version/extensions required to expose a built-in. - * - * 2. Core builtin_builder class functionality - * - * 3. Lists of built-in functions - * - * The builtin_builder::create_builtins() function contains lists of all - * built-in function signatures, where they're available, what types they - * take, and so on. - * - * 4. Implementations of built-in function signatures - * - * A series of functions which create ir_function_signatures and emit IR - * via ir_builder to implement them. - * - * 5. External API - * - * A few functions the rest of the compiler can use to interact with the - * built-in function module. For example, searching for a built-in by - * name and parameters. - */ - -#include -#include -#include "main/core.h" /* for struct gl_shader */ -#include "main/shaderobj.h" -#include "ir_builder.h" -#include "glsl_parser_extras.h" -#include "program/prog_instruction.h" -#include - -#define M_PIf ((float) M_PI) -#define M_PI_2f ((float) M_PI_2) -#define M_PI_4f ((float) M_PI_4) - -using namespace ir_builder; - -/** - * Availability predicates: - * @{ - */ -static bool -always_available(const _mesa_glsl_parse_state *) -{ - return true; -} - -static bool -compatibility_vs_only(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_VERTEX && - state->language_version <= 130 && - !state->es_shader; -} - -static bool -fs_only(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_FRAGMENT; -} - -static bool -gs_only(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_GEOMETRY; -} - -static bool -v110(const _mesa_glsl_parse_state *state) -{ - return !state->es_shader; -} - -static bool -v110_fs_only(const _mesa_glsl_parse_state *state) -{ - return !state->es_shader && state->stage == MESA_SHADER_FRAGMENT; -} - -static bool -v120(const _mesa_glsl_parse_state *state) -{ - return state->is_version(120, 300); -} - -static bool -v130(const _mesa_glsl_parse_state *state) -{ - return state->is_version(130, 300); -} - -static bool -v130_fs_only(const _mesa_glsl_parse_state *state) -{ - return state->is_version(130, 300) && - state->stage == MESA_SHADER_FRAGMENT; -} - -static bool -v140(const _mesa_glsl_parse_state *state) -{ - return state->is_version(140, 0); -} - -static bool -v140_or_es3(const _mesa_glsl_parse_state *state) -{ - return state->is_version(140, 300); -} - -static bool -v400_fs_only(const _mesa_glsl_parse_state *state) -{ - return state->is_version(400, 0) && - state->stage == MESA_SHADER_FRAGMENT; -} - -static bool -es31(const _mesa_glsl_parse_state *state) -{ - return state->is_version(0, 310); -} - -static bool -texture_rectangle(const _mesa_glsl_parse_state *state) -{ - return state->ARB_texture_rectangle_enable; -} - -static bool -texture_external(const _mesa_glsl_parse_state *state) -{ - return state->OES_EGL_image_external_enable; -} - -/** True if texturing functions with explicit LOD are allowed. */ -static bool -lod_exists_in_stage(const _mesa_glsl_parse_state *state) -{ - /* Texturing functions with "Lod" in their name exist: - * - In the vertex shader stage (for all languages) - * - In any stage for GLSL 1.30+ or GLSL ES 3.00 - * - In any stage for desktop GLSL with ARB_shader_texture_lod enabled. - * - * Since ARB_shader_texture_lod can only be enabled on desktop GLSL, we - * don't need to explicitly check state->es_shader. - */ - return state->stage == MESA_SHADER_VERTEX || - state->is_version(130, 300) || - state->ARB_shader_texture_lod_enable; -} - -static bool -v110_lod(const _mesa_glsl_parse_state *state) -{ - return !state->es_shader && lod_exists_in_stage(state); -} - -static bool -shader_texture_lod(const _mesa_glsl_parse_state *state) -{ - return state->ARB_shader_texture_lod_enable; -} - -static bool -shader_texture_lod_and_rect(const _mesa_glsl_parse_state *state) -{ - return state->ARB_shader_texture_lod_enable && - state->ARB_texture_rectangle_enable; -} - -static bool -shader_bit_encoding(const _mesa_glsl_parse_state *state) -{ - return state->is_version(330, 300) || - state->ARB_shader_bit_encoding_enable || - state->ARB_gpu_shader5_enable; -} - -static bool -shader_integer_mix(const _mesa_glsl_parse_state *state) -{ - return state->is_version(450, 310) || - (v130(state) && state->EXT_shader_integer_mix_enable); -} - -static bool -shader_packing_or_es3(const _mesa_glsl_parse_state *state) -{ - return state->ARB_shading_language_packing_enable || - state->is_version(420, 300); -} - -static bool -shader_packing_or_es3_or_gpu_shader5(const _mesa_glsl_parse_state *state) -{ - return state->ARB_shading_language_packing_enable || - state->ARB_gpu_shader5_enable || - state->is_version(400, 300); -} - -static bool -gpu_shader5(const _mesa_glsl_parse_state *state) -{ - return state->is_version(400, 0) || state->ARB_gpu_shader5_enable; -} - -static bool -gpu_shader5_or_es31(const _mesa_glsl_parse_state *state) -{ - return state->is_version(400, 310) || state->ARB_gpu_shader5_enable; -} - -static bool -shader_packing_or_es31_or_gpu_shader5(const _mesa_glsl_parse_state *state) -{ - return state->ARB_shading_language_packing_enable || - state->ARB_gpu_shader5_enable || - state->is_version(400, 310); -} - -static bool -fs_gpu_shader5(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_FRAGMENT && - (state->is_version(400, 0) || state->ARB_gpu_shader5_enable); -} - - -static bool -texture_array_lod(const _mesa_glsl_parse_state *state) -{ - return lod_exists_in_stage(state) && - state->EXT_texture_array_enable; -} - -static bool -fs_texture_array(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_FRAGMENT && - state->EXT_texture_array_enable; -} - -static bool -texture_array(const _mesa_glsl_parse_state *state) -{ - return state->EXT_texture_array_enable; -} - -static bool -texture_multisample(const _mesa_glsl_parse_state *state) -{ - return state->is_version(150, 310) || - state->ARB_texture_multisample_enable; -} - -static bool -texture_multisample_array(const _mesa_glsl_parse_state *state) -{ - return state->is_version(150, 320) || - state->ARB_texture_multisample_enable || - state->OES_texture_storage_multisample_2d_array_enable; -} - -static bool -texture_samples_identical(const _mesa_glsl_parse_state *state) -{ - return texture_multisample(state) && - state->EXT_shader_samples_identical_enable; -} - -static bool -texture_samples_identical_array(const _mesa_glsl_parse_state *state) -{ - return texture_multisample_array(state) && - state->EXT_shader_samples_identical_enable; -} - -static bool -fs_texture_cube_map_array(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_FRAGMENT && - (state->is_version(400, 0) || - state->ARB_texture_cube_map_array_enable); -} - -static bool -texture_cube_map_array(const _mesa_glsl_parse_state *state) -{ - return state->is_version(400, 0) || - state->ARB_texture_cube_map_array_enable; -} - -static bool -texture_query_levels(const _mesa_glsl_parse_state *state) -{ - return state->is_version(430, 0) || - state->ARB_texture_query_levels_enable; -} - -static bool -texture_query_lod(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_FRAGMENT && - state->ARB_texture_query_lod_enable; -} - -static bool -texture_gather(const _mesa_glsl_parse_state *state) -{ - return state->is_version(400, 0) || - state->ARB_texture_gather_enable || - state->ARB_gpu_shader5_enable; -} - -static bool -texture_gather_or_es31(const _mesa_glsl_parse_state *state) -{ - return state->is_version(400, 310) || - state->ARB_texture_gather_enable || - state->ARB_gpu_shader5_enable; -} - -/* Only ARB_texture_gather but not GLSL 4.0 or ARB_gpu_shader5. - * used for relaxation of const offset requirements. - */ -static bool -texture_gather_only_or_es31(const _mesa_glsl_parse_state *state) -{ - return !state->is_version(400, 0) && - !state->ARB_gpu_shader5_enable && - (state->ARB_texture_gather_enable || - state->is_version(0, 310)); -} - -/* Desktop GL or OES_standard_derivatives + fragment shader only */ -static bool -fs_oes_derivatives(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_FRAGMENT && - (state->is_version(110, 300) || - state->OES_standard_derivatives_enable); -} - -static bool -fs_derivative_control(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_FRAGMENT && - (state->is_version(450, 0) || - state->ARB_derivative_control_enable); -} - -static bool -tex1d_lod(const _mesa_glsl_parse_state *state) -{ - return !state->es_shader && lod_exists_in_stage(state); -} - -/** True if sampler3D exists */ -static bool -tex3d(const _mesa_glsl_parse_state *state) -{ - /* sampler3D exists in all desktop GLSL versions, GLSL ES 1.00 with the - * OES_texture_3D extension, and in GLSL ES 3.00. - */ - return !state->es_shader || - state->OES_texture_3D_enable || - state->language_version >= 300; -} - -static bool -fs_tex3d(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_FRAGMENT && - (!state->es_shader || state->OES_texture_3D_enable); -} - -static bool -tex3d_lod(const _mesa_glsl_parse_state *state) -{ - return tex3d(state) && lod_exists_in_stage(state); -} - -static bool -shader_atomic_counters(const _mesa_glsl_parse_state *state) -{ - return state->has_atomic_counters(); -} - -static bool -shader_clock(const _mesa_glsl_parse_state *state) -{ - return state->ARB_shader_clock_enable; -} - -static bool -shader_storage_buffer_object(const _mesa_glsl_parse_state *state) -{ - return state->has_shader_storage_buffer_objects(); -} - -static bool -shader_trinary_minmax(const _mesa_glsl_parse_state *state) -{ - return state->AMD_shader_trinary_minmax_enable; -} - -static bool -shader_image_load_store(const _mesa_glsl_parse_state *state) -{ - return (state->is_version(420, 310) || - state->ARB_shader_image_load_store_enable); -} - -static bool -shader_image_atomic(const _mesa_glsl_parse_state *state) -{ - return (state->is_version(420, 0) || - state->ARB_shader_image_load_store_enable); -} - -static bool -shader_image_size(const _mesa_glsl_parse_state *state) -{ - return state->is_version(430, 310) || - state->ARB_shader_image_size_enable; -} - -static bool -shader_samples(const _mesa_glsl_parse_state *state) -{ - return state->is_version(450, 0) || - state->ARB_shader_texture_image_samples_enable; -} - -static bool -gs_streams(const _mesa_glsl_parse_state *state) -{ - return gpu_shader5(state) && gs_only(state); -} - -static bool -fp64(const _mesa_glsl_parse_state *state) -{ - return state->has_double(); -} - -static bool -compute_shader(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_COMPUTE; -} - -static bool -buffer_atomics_supported(const _mesa_glsl_parse_state *state) -{ - return compute_shader(state) || shader_storage_buffer_object(state); -} - -static bool -barrier_supported(const _mesa_glsl_parse_state *state) -{ - return compute_shader(state) || - state->stage == MESA_SHADER_TESS_CTRL; -} - -/** @} */ - -/******************************************************************************/ - -namespace { - -/** - * builtin_builder: A singleton object representing the core of the built-in - * function module. - * - * It generates IR for every built-in function signature, and organizes them - * into functions. - */ -class builtin_builder { -public: - builtin_builder(); - ~builtin_builder(); - - void initialize(); - void release(); - ir_function_signature *find(_mesa_glsl_parse_state *state, - const char *name, exec_list *actual_parameters); - - /** - * A shader to hold all the built-in signatures; created by this module. - * - * This includes signatures for every built-in, regardless of version or - * enabled extensions. The availability predicate associated with each - * signature allows matching_signature() to filter out the irrelevant ones. - */ - gl_shader *shader; - -private: - void *mem_ctx; - - /** Global variables used by built-in functions. */ - ir_variable *gl_ModelViewProjectionMatrix; - ir_variable *gl_Vertex; - - void create_shader(); - void create_intrinsics(); - void create_builtins(); - - /** - * IR builder helpers: - * - * These convenience functions assist in emitting IR, but don't necessarily - * fit in ir_builder itself. Many of them rely on having a mem_ctx class - * member available. - */ - ir_variable *in_var(const glsl_type *type, const char *name); - ir_variable *out_var(const glsl_type *type, const char *name); - ir_constant *imm(float f, unsigned vector_elements=1); - ir_constant *imm(bool b, unsigned vector_elements=1); - ir_constant *imm(int i, unsigned vector_elements=1); - ir_constant *imm(unsigned u, unsigned vector_elements=1); - ir_constant *imm(double d, unsigned vector_elements=1); - ir_constant *imm(const glsl_type *type, const ir_constant_data &); - ir_dereference_variable *var_ref(ir_variable *var); - ir_dereference_array *array_ref(ir_variable *var, int i); - ir_swizzle *matrix_elt(ir_variable *var, int col, int row); - - ir_expression *asin_expr(ir_variable *x); - void do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x); - - /** - * Call function \param f with parameters specified as the linked - * list \param params of \c ir_variable objects. \param ret should - * point to the ir_variable that will hold the function return - * value, or be \c NULL if the function has void return type. - */ - ir_call *call(ir_function *f, ir_variable *ret, exec_list params); - - /** Create a new function and add the given signatures. */ - void add_function(const char *name, ...); - - typedef ir_function_signature *(builtin_builder::*image_prototype_ctr)(const glsl_type *image_type, - unsigned num_arguments, - unsigned flags); - - enum image_function_flags { - IMAGE_FUNCTION_EMIT_STUB = (1 << 0), - IMAGE_FUNCTION_RETURNS_VOID = (1 << 1), - IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE = (1 << 2), - IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE = (1 << 3), - IMAGE_FUNCTION_READ_ONLY = (1 << 4), - IMAGE_FUNCTION_WRITE_ONLY = (1 << 5), - IMAGE_FUNCTION_AVAIL_ATOMIC = (1 << 6), - IMAGE_FUNCTION_MS_ONLY = (1 << 7), - }; - - /** - * Create a new image built-in function for all known image types. - * \p flags is a bitfield of \c image_function_flags flags. - */ - void add_image_function(const char *name, - const char *intrinsic_name, - image_prototype_ctr prototype, - unsigned num_arguments, - unsigned flags); - - /** - * Create new functions for all known image built-ins and types. - * If \p glsl is \c true, use the GLSL built-in names and emit code - * to call into the actual compiler intrinsic. If \p glsl is - * false, emit a function prototype with no body for each image - * intrinsic name. - */ - void add_image_functions(bool glsl); - - ir_function_signature *new_sig(const glsl_type *return_type, - builtin_available_predicate avail, - int num_params, ...); - - /** - * Function signature generators: - * @{ - */ - ir_function_signature *unop(builtin_available_predicate avail, - ir_expression_operation opcode, - const glsl_type *return_type, - const glsl_type *param_type); - ir_function_signature *binop(builtin_available_predicate avail, - ir_expression_operation opcode, - const glsl_type *return_type, - const glsl_type *param0_type, - const glsl_type *param1_type); - -#define B0(X) ir_function_signature *_##X(); -#define B1(X) ir_function_signature *_##X(const glsl_type *); -#define B2(X) ir_function_signature *_##X(const glsl_type *, const glsl_type *); -#define B3(X) ir_function_signature *_##X(const glsl_type *, const glsl_type *, const glsl_type *); -#define BA1(X) ir_function_signature *_##X(builtin_available_predicate, const glsl_type *); -#define BA2(X) ir_function_signature *_##X(builtin_available_predicate, const glsl_type *, const glsl_type *); - B1(radians) - B1(degrees) - B1(sin) - B1(cos) - B1(tan) - B1(asin) - B1(acos) - B1(atan2) - B1(atan) - B1(sinh) - B1(cosh) - B1(tanh) - B1(asinh) - B1(acosh) - B1(atanh) - B1(pow) - B1(exp) - B1(log) - B1(exp2) - B1(log2) - BA1(sqrt) - BA1(inversesqrt) - BA1(abs) - BA1(sign) - BA1(floor) - BA1(trunc) - BA1(round) - BA1(roundEven) - BA1(ceil) - BA1(fract) - B2(mod) - BA1(modf) - BA2(min) - BA2(max) - BA2(clamp) - BA2(mix_lrp) - ir_function_signature *_mix_sel(builtin_available_predicate avail, - const glsl_type *val_type, - const glsl_type *blend_type); - BA2(step) - BA2(smoothstep) - BA1(isnan) - BA1(isinf) - B1(floatBitsToInt) - B1(floatBitsToUint) - B1(intBitsToFloat) - B1(uintBitsToFloat) - ir_function_signature *_packUnorm2x16(builtin_available_predicate avail); - ir_function_signature *_packSnorm2x16(builtin_available_predicate avail); - ir_function_signature *_packUnorm4x8(builtin_available_predicate avail); - ir_function_signature *_packSnorm4x8(builtin_available_predicate avail); - ir_function_signature *_unpackUnorm2x16(builtin_available_predicate avail); - ir_function_signature *_unpackSnorm2x16(builtin_available_predicate avail); - ir_function_signature *_unpackUnorm4x8(builtin_available_predicate avail); - ir_function_signature *_unpackSnorm4x8(builtin_available_predicate avail); - ir_function_signature *_packHalf2x16(builtin_available_predicate avail); - ir_function_signature *_unpackHalf2x16(builtin_available_predicate avail); - ir_function_signature *_packDouble2x32(builtin_available_predicate avail); - ir_function_signature *_unpackDouble2x32(builtin_available_predicate avail); - - BA1(length) - BA1(distance); - BA1(dot); - BA1(cross); - BA1(normalize); - B0(ftransform); - BA1(faceforward); - BA1(reflect); - BA1(refract); - BA1(matrixCompMult); - BA1(outerProduct); - BA1(determinant_mat2); - BA1(determinant_mat3); - BA1(determinant_mat4); - BA1(inverse_mat2); - BA1(inverse_mat3); - BA1(inverse_mat4); - BA1(transpose); - BA1(lessThan); - BA1(lessThanEqual); - BA1(greaterThan); - BA1(greaterThanEqual); - BA1(equal); - BA1(notEqual); - B1(any); - B1(all); - B1(not); - BA2(textureSize); - B1(textureSamples); - -/** Flags to _texture() */ -#define TEX_PROJECT 1 -#define TEX_OFFSET 2 -#define TEX_COMPONENT 4 -#define TEX_OFFSET_NONCONST 8 -#define TEX_OFFSET_ARRAY 16 - - ir_function_signature *_texture(ir_texture_opcode opcode, - builtin_available_predicate avail, - const glsl_type *return_type, - const glsl_type *sampler_type, - const glsl_type *coord_type, - int flags = 0); - B0(textureCubeArrayShadow); - ir_function_signature *_texelFetch(builtin_available_predicate avail, - const glsl_type *return_type, - const glsl_type *sampler_type, - const glsl_type *coord_type, - const glsl_type *offset_type = NULL); - - B0(EmitVertex) - B0(EndPrimitive) - ir_function_signature *_EmitStreamVertex(builtin_available_predicate avail, - const glsl_type *stream_type); - ir_function_signature *_EndStreamPrimitive(builtin_available_predicate avail, - const glsl_type *stream_type); - B0(barrier) - - BA2(textureQueryLod); - B1(textureQueryLevels); - BA2(textureSamplesIdentical); - B1(dFdx); - B1(dFdy); - B1(fwidth); - B1(dFdxCoarse); - B1(dFdyCoarse); - B1(fwidthCoarse); - B1(dFdxFine); - B1(dFdyFine); - B1(fwidthFine); - B1(noise1); - B1(noise2); - B1(noise3); - B1(noise4); - - B1(bitfieldExtract) - B1(bitfieldInsert) - B1(bitfieldReverse) - B1(bitCount) - B1(findLSB) - B1(findMSB) - BA1(fma) - B2(ldexp) - B2(frexp) - B2(dfrexp) - B1(uaddCarry) - B1(usubBorrow) - B1(mulExtended) - B1(interpolateAtCentroid) - B1(interpolateAtOffset) - B1(interpolateAtSample) - - ir_function_signature *_atomic_counter_intrinsic(builtin_available_predicate avail); - ir_function_signature *_atomic_counter_op(const char *intrinsic, - builtin_available_predicate avail); - - ir_function_signature *_atomic_intrinsic2(builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_atomic_op2(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_atomic_intrinsic3(builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_atomic_op3(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type); - - B1(min3) - B1(max3) - B1(mid3) - - ir_function_signature *_image_prototype(const glsl_type *image_type, - unsigned num_arguments, - unsigned flags); - ir_function_signature *_image_size_prototype(const glsl_type *image_type, - unsigned num_arguments, - unsigned flags); - ir_function_signature *_image_samples_prototype(const glsl_type *image_type, - unsigned num_arguments, - unsigned flags); - ir_function_signature *_image(image_prototype_ctr prototype, - const glsl_type *image_type, - const char *intrinsic_name, - unsigned num_arguments, - unsigned flags); - - ir_function_signature *_memory_barrier_intrinsic( - builtin_available_predicate avail); - ir_function_signature *_memory_barrier(const char *intrinsic_name, - builtin_available_predicate avail); - - ir_function_signature *_shader_clock_intrinsic(builtin_available_predicate avail, - const glsl_type *type); - ir_function_signature *_shader_clock(builtin_available_predicate avail, - const glsl_type *type); - -#undef B0 -#undef B1 -#undef B2 -#undef B3 -#undef BA1 -#undef BA2 - /** @} */ -}; - -} /* anonymous namespace */ - -/** - * Core builtin_builder functionality: - * @{ - */ -builtin_builder::builtin_builder() - : shader(NULL), - gl_ModelViewProjectionMatrix(NULL), - gl_Vertex(NULL) -{ - mem_ctx = NULL; -} - -builtin_builder::~builtin_builder() -{ - ralloc_free(mem_ctx); -} - -ir_function_signature * -builtin_builder::find(_mesa_glsl_parse_state *state, - const char *name, exec_list *actual_parameters) -{ - /* The shader currently being compiled requested a built-in function; - * it needs to link against builtin_builder::shader in order to get them. - * - * Even if we don't find a matching signature, we still need to do this so - * that the "no matching signature" error will list potential candidates - * from the available built-ins. - */ - state->uses_builtin_functions = true; - - ir_function *f = shader->symbols->get_function(name); - if (f == NULL) - return NULL; - - ir_function_signature *sig = - f->matching_signature(state, actual_parameters, true); - if (sig == NULL) - return NULL; - - return sig; -} - -void -builtin_builder::initialize() -{ - /* If already initialized, don't do it again. */ - if (mem_ctx != NULL) - return; - - mem_ctx = ralloc_context(NULL); - create_shader(); - create_intrinsics(); - create_builtins(); -} - -void -builtin_builder::release() -{ - ralloc_free(mem_ctx); - mem_ctx = NULL; - - ralloc_free(shader); - shader = NULL; -} - -void -builtin_builder::create_shader() -{ - /* The target doesn't actually matter. There's no target for generic - * GLSL utility code that could be linked against any stage, so just - * arbitrarily pick GL_VERTEX_SHADER. - */ - shader = _mesa_new_shader(NULL, 0, GL_VERTEX_SHADER); - shader->symbols = new(mem_ctx) glsl_symbol_table; - - gl_ModelViewProjectionMatrix = - new(mem_ctx) ir_variable(glsl_type::mat4_type, - "gl_ModelViewProjectionMatrix", - ir_var_uniform); - - shader->symbols->add_variable(gl_ModelViewProjectionMatrix); - - gl_Vertex = in_var(glsl_type::vec4_type, "gl_Vertex"); - shader->symbols->add_variable(gl_Vertex); -} - -/** @} */ - -/** - * Create ir_function and ir_function_signature objects for each - * intrinsic. - */ -void -builtin_builder::create_intrinsics() -{ - add_function("__intrinsic_atomic_read", - _atomic_counter_intrinsic(shader_atomic_counters), - NULL); - add_function("__intrinsic_atomic_increment", - _atomic_counter_intrinsic(shader_atomic_counters), - NULL); - add_function("__intrinsic_atomic_predecrement", - _atomic_counter_intrinsic(shader_atomic_counters), - NULL); - - add_function("__intrinsic_atomic_add", - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::uint_type), - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("__intrinsic_atomic_min", - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::uint_type), - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("__intrinsic_atomic_max", - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::uint_type), - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("__intrinsic_atomic_and", - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::uint_type), - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("__intrinsic_atomic_or", - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::uint_type), - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("__intrinsic_atomic_xor", - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::uint_type), - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("__intrinsic_atomic_exchange", - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::uint_type), - _atomic_intrinsic2(buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("__intrinsic_atomic_comp_swap", - _atomic_intrinsic3(buffer_atomics_supported, - glsl_type::uint_type), - _atomic_intrinsic3(buffer_atomics_supported, - glsl_type::int_type), - NULL); - - add_image_functions(false); - - add_function("__intrinsic_memory_barrier", - _memory_barrier_intrinsic(shader_image_load_store), - NULL); - add_function("__intrinsic_group_memory_barrier", - _memory_barrier_intrinsic(compute_shader), - NULL); - add_function("__intrinsic_memory_barrier_atomic_counter", - _memory_barrier_intrinsic(compute_shader), - NULL); - add_function("__intrinsic_memory_barrier_buffer", - _memory_barrier_intrinsic(compute_shader), - NULL); - add_function("__intrinsic_memory_barrier_image", - _memory_barrier_intrinsic(compute_shader), - NULL); - add_function("__intrinsic_memory_barrier_shared", - _memory_barrier_intrinsic(compute_shader), - NULL); - - add_function("__intrinsic_shader_clock", - _shader_clock_intrinsic(shader_clock, - glsl_type::uvec2_type), - NULL); -} - -/** - * Create ir_function and ir_function_signature objects for each built-in. - * - * Contains a list of every available built-in. - */ -void -builtin_builder::create_builtins() -{ -#define F(NAME) \ - add_function(#NAME, \ - _##NAME(glsl_type::float_type), \ - _##NAME(glsl_type::vec2_type), \ - _##NAME(glsl_type::vec3_type), \ - _##NAME(glsl_type::vec4_type), \ - NULL); - -#define FD(NAME) \ - add_function(#NAME, \ - _##NAME(always_available, glsl_type::float_type), \ - _##NAME(always_available, glsl_type::vec2_type), \ - _##NAME(always_available, glsl_type::vec3_type), \ - _##NAME(always_available, glsl_type::vec4_type), \ - _##NAME(fp64, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec2_type), \ - _##NAME(fp64, glsl_type::dvec3_type), \ - _##NAME(fp64, glsl_type::dvec4_type), \ - NULL); - -#define FD130(NAME) \ - add_function(#NAME, \ - _##NAME(v130, glsl_type::float_type), \ - _##NAME(v130, glsl_type::vec2_type), \ - _##NAME(v130, glsl_type::vec3_type), \ - _##NAME(v130, glsl_type::vec4_type), \ - _##NAME(fp64, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec2_type), \ - _##NAME(fp64, glsl_type::dvec3_type), \ - _##NAME(fp64, glsl_type::dvec4_type), \ - NULL); - -#define FDGS5(NAME) \ - add_function(#NAME, \ - _##NAME(gpu_shader5, glsl_type::float_type), \ - _##NAME(gpu_shader5, glsl_type::vec2_type), \ - _##NAME(gpu_shader5, glsl_type::vec3_type), \ - _##NAME(gpu_shader5, glsl_type::vec4_type), \ - _##NAME(fp64, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec2_type), \ - _##NAME(fp64, glsl_type::dvec3_type), \ - _##NAME(fp64, glsl_type::dvec4_type), \ - NULL); - -#define FI(NAME) \ - add_function(#NAME, \ - _##NAME(glsl_type::float_type), \ - _##NAME(glsl_type::vec2_type), \ - _##NAME(glsl_type::vec3_type), \ - _##NAME(glsl_type::vec4_type), \ - _##NAME(glsl_type::int_type), \ - _##NAME(glsl_type::ivec2_type), \ - _##NAME(glsl_type::ivec3_type), \ - _##NAME(glsl_type::ivec4_type), \ - NULL); - -#define FID(NAME) \ - add_function(#NAME, \ - _##NAME(always_available, glsl_type::float_type), \ - _##NAME(always_available, glsl_type::vec2_type), \ - _##NAME(always_available, glsl_type::vec3_type), \ - _##NAME(always_available, glsl_type::vec4_type), \ - _##NAME(always_available, glsl_type::int_type), \ - _##NAME(always_available, glsl_type::ivec2_type), \ - _##NAME(always_available, glsl_type::ivec3_type), \ - _##NAME(always_available, glsl_type::ivec4_type), \ - _##NAME(fp64, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec2_type), \ - _##NAME(fp64, glsl_type::dvec3_type), \ - _##NAME(fp64, glsl_type::dvec4_type), \ - NULL); - -#define FIUD(NAME) \ - add_function(#NAME, \ - _##NAME(always_available, glsl_type::float_type), \ - _##NAME(always_available, glsl_type::vec2_type), \ - _##NAME(always_available, glsl_type::vec3_type), \ - _##NAME(always_available, glsl_type::vec4_type), \ - \ - _##NAME(always_available, glsl_type::int_type), \ - _##NAME(always_available, glsl_type::ivec2_type), \ - _##NAME(always_available, glsl_type::ivec3_type), \ - _##NAME(always_available, glsl_type::ivec4_type), \ - \ - _##NAME(v130, glsl_type::uint_type), \ - _##NAME(v130, glsl_type::uvec2_type), \ - _##NAME(v130, glsl_type::uvec3_type), \ - _##NAME(v130, glsl_type::uvec4_type), \ - _##NAME(fp64, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec2_type), \ - _##NAME(fp64, glsl_type::dvec3_type), \ - _##NAME(fp64, glsl_type::dvec4_type), \ - NULL); - -#define IU(NAME) \ - add_function(#NAME, \ - _##NAME(glsl_type::int_type), \ - _##NAME(glsl_type::ivec2_type), \ - _##NAME(glsl_type::ivec3_type), \ - _##NAME(glsl_type::ivec4_type), \ - \ - _##NAME(glsl_type::uint_type), \ - _##NAME(glsl_type::uvec2_type), \ - _##NAME(glsl_type::uvec3_type), \ - _##NAME(glsl_type::uvec4_type), \ - NULL); - -#define FIUBD(NAME) \ - add_function(#NAME, \ - _##NAME(always_available, glsl_type::float_type), \ - _##NAME(always_available, glsl_type::vec2_type), \ - _##NAME(always_available, glsl_type::vec3_type), \ - _##NAME(always_available, glsl_type::vec4_type), \ - \ - _##NAME(always_available, glsl_type::int_type), \ - _##NAME(always_available, glsl_type::ivec2_type), \ - _##NAME(always_available, glsl_type::ivec3_type), \ - _##NAME(always_available, glsl_type::ivec4_type), \ - \ - _##NAME(v130, glsl_type::uint_type), \ - _##NAME(v130, glsl_type::uvec2_type), \ - _##NAME(v130, glsl_type::uvec3_type), \ - _##NAME(v130, glsl_type::uvec4_type), \ - \ - _##NAME(always_available, glsl_type::bool_type), \ - _##NAME(always_available, glsl_type::bvec2_type), \ - _##NAME(always_available, glsl_type::bvec3_type), \ - _##NAME(always_available, glsl_type::bvec4_type), \ - \ - _##NAME(fp64, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec2_type), \ - _##NAME(fp64, glsl_type::dvec3_type), \ - _##NAME(fp64, glsl_type::dvec4_type), \ - NULL); - -#define FIUD2_MIXED(NAME) \ - add_function(#NAME, \ - _##NAME(always_available, glsl_type::float_type, glsl_type::float_type), \ - _##NAME(always_available, glsl_type::vec2_type, glsl_type::float_type), \ - _##NAME(always_available, glsl_type::vec3_type, glsl_type::float_type), \ - _##NAME(always_available, glsl_type::vec4_type, glsl_type::float_type), \ - \ - _##NAME(always_available, glsl_type::vec2_type, glsl_type::vec2_type), \ - _##NAME(always_available, glsl_type::vec3_type, glsl_type::vec3_type), \ - _##NAME(always_available, glsl_type::vec4_type, glsl_type::vec4_type), \ - \ - _##NAME(always_available, glsl_type::int_type, glsl_type::int_type), \ - _##NAME(always_available, glsl_type::ivec2_type, glsl_type::int_type), \ - _##NAME(always_available, glsl_type::ivec3_type, glsl_type::int_type), \ - _##NAME(always_available, glsl_type::ivec4_type, glsl_type::int_type), \ - \ - _##NAME(always_available, glsl_type::ivec2_type, glsl_type::ivec2_type), \ - _##NAME(always_available, glsl_type::ivec3_type, glsl_type::ivec3_type), \ - _##NAME(always_available, glsl_type::ivec4_type, glsl_type::ivec4_type), \ - \ - _##NAME(v130, glsl_type::uint_type, glsl_type::uint_type), \ - _##NAME(v130, glsl_type::uvec2_type, glsl_type::uint_type), \ - _##NAME(v130, glsl_type::uvec3_type, glsl_type::uint_type), \ - _##NAME(v130, glsl_type::uvec4_type, glsl_type::uint_type), \ - \ - _##NAME(v130, glsl_type::uvec2_type, glsl_type::uvec2_type), \ - _##NAME(v130, glsl_type::uvec3_type, glsl_type::uvec3_type), \ - _##NAME(v130, glsl_type::uvec4_type, glsl_type::uvec4_type), \ - \ - _##NAME(fp64, glsl_type::double_type, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec2_type, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec3_type, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec4_type, glsl_type::double_type), \ - _##NAME(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), \ - _##NAME(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), \ - _##NAME(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), \ - NULL); - - F(radians) - F(degrees) - F(sin) - F(cos) - F(tan) - F(asin) - F(acos) - - add_function("atan", - _atan(glsl_type::float_type), - _atan(glsl_type::vec2_type), - _atan(glsl_type::vec3_type), - _atan(glsl_type::vec4_type), - _atan2(glsl_type::float_type), - _atan2(glsl_type::vec2_type), - _atan2(glsl_type::vec3_type), - _atan2(glsl_type::vec4_type), - NULL); - - F(sinh) - F(cosh) - F(tanh) - F(asinh) - F(acosh) - F(atanh) - F(pow) - F(exp) - F(log) - F(exp2) - F(log2) - FD(sqrt) - FD(inversesqrt) - FID(abs) - FID(sign) - FD(floor) - FD(trunc) - FD(round) - FD(roundEven) - FD(ceil) - FD(fract) - - add_function("mod", - _mod(glsl_type::float_type, glsl_type::float_type), - _mod(glsl_type::vec2_type, glsl_type::float_type), - _mod(glsl_type::vec3_type, glsl_type::float_type), - _mod(glsl_type::vec4_type, glsl_type::float_type), - - _mod(glsl_type::vec2_type, glsl_type::vec2_type), - _mod(glsl_type::vec3_type, glsl_type::vec3_type), - _mod(glsl_type::vec4_type, glsl_type::vec4_type), - - _mod(glsl_type::double_type, glsl_type::double_type), - _mod(glsl_type::dvec2_type, glsl_type::double_type), - _mod(glsl_type::dvec3_type, glsl_type::double_type), - _mod(glsl_type::dvec4_type, glsl_type::double_type), - - _mod(glsl_type::dvec2_type, glsl_type::dvec2_type), - _mod(glsl_type::dvec3_type, glsl_type::dvec3_type), - _mod(glsl_type::dvec4_type, glsl_type::dvec4_type), - NULL); - - FD(modf) - - FIUD2_MIXED(min) - FIUD2_MIXED(max) - FIUD2_MIXED(clamp) - - add_function("mix", - _mix_lrp(always_available, glsl_type::float_type, glsl_type::float_type), - _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::float_type), - _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::float_type), - _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::float_type), - - _mix_lrp(always_available, glsl_type::vec2_type, glsl_type::vec2_type), - _mix_lrp(always_available, glsl_type::vec3_type, glsl_type::vec3_type), - _mix_lrp(always_available, glsl_type::vec4_type, glsl_type::vec4_type), - - _mix_lrp(fp64, glsl_type::double_type, glsl_type::double_type), - _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::double_type), - _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::double_type), - _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::double_type), - - _mix_lrp(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), - _mix_lrp(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), - _mix_lrp(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), - - _mix_sel(v130, glsl_type::float_type, glsl_type::bool_type), - _mix_sel(v130, glsl_type::vec2_type, glsl_type::bvec2_type), - _mix_sel(v130, glsl_type::vec3_type, glsl_type::bvec3_type), - _mix_sel(v130, glsl_type::vec4_type, glsl_type::bvec4_type), - - _mix_sel(fp64, glsl_type::double_type, glsl_type::bool_type), - _mix_sel(fp64, glsl_type::dvec2_type, glsl_type::bvec2_type), - _mix_sel(fp64, glsl_type::dvec3_type, glsl_type::bvec3_type), - _mix_sel(fp64, glsl_type::dvec4_type, glsl_type::bvec4_type), - - _mix_sel(shader_integer_mix, glsl_type::int_type, glsl_type::bool_type), - _mix_sel(shader_integer_mix, glsl_type::ivec2_type, glsl_type::bvec2_type), - _mix_sel(shader_integer_mix, glsl_type::ivec3_type, glsl_type::bvec3_type), - _mix_sel(shader_integer_mix, glsl_type::ivec4_type, glsl_type::bvec4_type), - - _mix_sel(shader_integer_mix, glsl_type::uint_type, glsl_type::bool_type), - _mix_sel(shader_integer_mix, glsl_type::uvec2_type, glsl_type::bvec2_type), - _mix_sel(shader_integer_mix, glsl_type::uvec3_type, glsl_type::bvec3_type), - _mix_sel(shader_integer_mix, glsl_type::uvec4_type, glsl_type::bvec4_type), - - _mix_sel(shader_integer_mix, glsl_type::bool_type, glsl_type::bool_type), - _mix_sel(shader_integer_mix, glsl_type::bvec2_type, glsl_type::bvec2_type), - _mix_sel(shader_integer_mix, glsl_type::bvec3_type, glsl_type::bvec3_type), - _mix_sel(shader_integer_mix, glsl_type::bvec4_type, glsl_type::bvec4_type), - NULL); - - add_function("step", - _step(always_available, glsl_type::float_type, glsl_type::float_type), - _step(always_available, glsl_type::float_type, glsl_type::vec2_type), - _step(always_available, glsl_type::float_type, glsl_type::vec3_type), - _step(always_available, glsl_type::float_type, glsl_type::vec4_type), - - _step(always_available, glsl_type::vec2_type, glsl_type::vec2_type), - _step(always_available, glsl_type::vec3_type, glsl_type::vec3_type), - _step(always_available, glsl_type::vec4_type, glsl_type::vec4_type), - _step(fp64, glsl_type::double_type, glsl_type::double_type), - _step(fp64, glsl_type::double_type, glsl_type::dvec2_type), - _step(fp64, glsl_type::double_type, glsl_type::dvec3_type), - _step(fp64, glsl_type::double_type, glsl_type::dvec4_type), - - _step(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), - _step(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), - _step(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), - NULL); - - add_function("smoothstep", - _smoothstep(always_available, glsl_type::float_type, glsl_type::float_type), - _smoothstep(always_available, glsl_type::float_type, glsl_type::vec2_type), - _smoothstep(always_available, glsl_type::float_type, glsl_type::vec3_type), - _smoothstep(always_available, glsl_type::float_type, glsl_type::vec4_type), - - _smoothstep(always_available, glsl_type::vec2_type, glsl_type::vec2_type), - _smoothstep(always_available, glsl_type::vec3_type, glsl_type::vec3_type), - _smoothstep(always_available, glsl_type::vec4_type, glsl_type::vec4_type), - _smoothstep(fp64, glsl_type::double_type, glsl_type::double_type), - _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec2_type), - _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec3_type), - _smoothstep(fp64, glsl_type::double_type, glsl_type::dvec4_type), - - _smoothstep(fp64, glsl_type::dvec2_type, glsl_type::dvec2_type), - _smoothstep(fp64, glsl_type::dvec3_type, glsl_type::dvec3_type), - _smoothstep(fp64, glsl_type::dvec4_type, glsl_type::dvec4_type), - NULL); - - FD130(isnan) - FD130(isinf) - - F(floatBitsToInt) - F(floatBitsToUint) - add_function("intBitsToFloat", - _intBitsToFloat(glsl_type::int_type), - _intBitsToFloat(glsl_type::ivec2_type), - _intBitsToFloat(glsl_type::ivec3_type), - _intBitsToFloat(glsl_type::ivec4_type), - NULL); - add_function("uintBitsToFloat", - _uintBitsToFloat(glsl_type::uint_type), - _uintBitsToFloat(glsl_type::uvec2_type), - _uintBitsToFloat(glsl_type::uvec3_type), - _uintBitsToFloat(glsl_type::uvec4_type), - NULL); - - add_function("packUnorm2x16", _packUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); - add_function("packSnorm2x16", _packSnorm2x16(shader_packing_or_es3), NULL); - add_function("packUnorm4x8", _packUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); - add_function("packSnorm4x8", _packSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); - add_function("unpackUnorm2x16", _unpackUnorm2x16(shader_packing_or_es3_or_gpu_shader5), NULL); - add_function("unpackSnorm2x16", _unpackSnorm2x16(shader_packing_or_es3), NULL); - add_function("unpackUnorm4x8", _unpackUnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); - add_function("unpackSnorm4x8", _unpackSnorm4x8(shader_packing_or_es31_or_gpu_shader5), NULL); - add_function("packHalf2x16", _packHalf2x16(shader_packing_or_es3), NULL); - add_function("unpackHalf2x16", _unpackHalf2x16(shader_packing_or_es3), NULL); - add_function("packDouble2x32", _packDouble2x32(fp64), NULL); - add_function("unpackDouble2x32", _unpackDouble2x32(fp64), NULL); - - - FD(length) - FD(distance) - FD(dot) - - add_function("cross", _cross(always_available, glsl_type::vec3_type), - _cross(fp64, glsl_type::dvec3_type), NULL); - - FD(normalize) - add_function("ftransform", _ftransform(), NULL); - FD(faceforward) - FD(reflect) - FD(refract) - // ... - add_function("matrixCompMult", - _matrixCompMult(always_available, glsl_type::mat2_type), - _matrixCompMult(always_available, glsl_type::mat3_type), - _matrixCompMult(always_available, glsl_type::mat4_type), - _matrixCompMult(always_available, glsl_type::mat2x3_type), - _matrixCompMult(always_available, glsl_type::mat2x4_type), - _matrixCompMult(always_available, glsl_type::mat3x2_type), - _matrixCompMult(always_available, glsl_type::mat3x4_type), - _matrixCompMult(always_available, glsl_type::mat4x2_type), - _matrixCompMult(always_available, glsl_type::mat4x3_type), - _matrixCompMult(fp64, glsl_type::dmat2_type), - _matrixCompMult(fp64, glsl_type::dmat3_type), - _matrixCompMult(fp64, glsl_type::dmat4_type), - _matrixCompMult(fp64, glsl_type::dmat2x3_type), - _matrixCompMult(fp64, glsl_type::dmat2x4_type), - _matrixCompMult(fp64, glsl_type::dmat3x2_type), - _matrixCompMult(fp64, glsl_type::dmat3x4_type), - _matrixCompMult(fp64, glsl_type::dmat4x2_type), - _matrixCompMult(fp64, glsl_type::dmat4x3_type), - NULL); - add_function("outerProduct", - _outerProduct(v120, glsl_type::mat2_type), - _outerProduct(v120, glsl_type::mat3_type), - _outerProduct(v120, glsl_type::mat4_type), - _outerProduct(v120, glsl_type::mat2x3_type), - _outerProduct(v120, glsl_type::mat2x4_type), - _outerProduct(v120, glsl_type::mat3x2_type), - _outerProduct(v120, glsl_type::mat3x4_type), - _outerProduct(v120, glsl_type::mat4x2_type), - _outerProduct(v120, glsl_type::mat4x3_type), - _outerProduct(fp64, glsl_type::dmat2_type), - _outerProduct(fp64, glsl_type::dmat3_type), - _outerProduct(fp64, glsl_type::dmat4_type), - _outerProduct(fp64, glsl_type::dmat2x3_type), - _outerProduct(fp64, glsl_type::dmat2x4_type), - _outerProduct(fp64, glsl_type::dmat3x2_type), - _outerProduct(fp64, glsl_type::dmat3x4_type), - _outerProduct(fp64, glsl_type::dmat4x2_type), - _outerProduct(fp64, glsl_type::dmat4x3_type), - NULL); - add_function("determinant", - _determinant_mat2(v120, glsl_type::mat2_type), - _determinant_mat3(v120, glsl_type::mat3_type), - _determinant_mat4(v120, glsl_type::mat4_type), - _determinant_mat2(fp64, glsl_type::dmat2_type), - _determinant_mat3(fp64, glsl_type::dmat3_type), - _determinant_mat4(fp64, glsl_type::dmat4_type), - - NULL); - add_function("inverse", - _inverse_mat2(v140_or_es3, glsl_type::mat2_type), - _inverse_mat3(v140_or_es3, glsl_type::mat3_type), - _inverse_mat4(v140_or_es3, glsl_type::mat4_type), - _inverse_mat2(fp64, glsl_type::dmat2_type), - _inverse_mat3(fp64, glsl_type::dmat3_type), - _inverse_mat4(fp64, glsl_type::dmat4_type), - NULL); - add_function("transpose", - _transpose(v120, glsl_type::mat2_type), - _transpose(v120, glsl_type::mat3_type), - _transpose(v120, glsl_type::mat4_type), - _transpose(v120, glsl_type::mat2x3_type), - _transpose(v120, glsl_type::mat2x4_type), - _transpose(v120, glsl_type::mat3x2_type), - _transpose(v120, glsl_type::mat3x4_type), - _transpose(v120, glsl_type::mat4x2_type), - _transpose(v120, glsl_type::mat4x3_type), - _transpose(fp64, glsl_type::dmat2_type), - _transpose(fp64, glsl_type::dmat3_type), - _transpose(fp64, glsl_type::dmat4_type), - _transpose(fp64, glsl_type::dmat2x3_type), - _transpose(fp64, glsl_type::dmat2x4_type), - _transpose(fp64, glsl_type::dmat3x2_type), - _transpose(fp64, glsl_type::dmat3x4_type), - _transpose(fp64, glsl_type::dmat4x2_type), - _transpose(fp64, glsl_type::dmat4x3_type), - NULL); - FIUD(lessThan) - FIUD(lessThanEqual) - FIUD(greaterThan) - FIUD(greaterThanEqual) - FIUBD(notEqual) - FIUBD(equal) - - add_function("any", - _any(glsl_type::bvec2_type), - _any(glsl_type::bvec3_type), - _any(glsl_type::bvec4_type), - NULL); - - add_function("all", - _all(glsl_type::bvec2_type), - _all(glsl_type::bvec3_type), - _all(glsl_type::bvec4_type), - NULL); - - add_function("not", - _not(glsl_type::bvec2_type), - _not(glsl_type::bvec3_type), - _not(glsl_type::bvec4_type), - NULL); - - add_function("textureSize", - _textureSize(v130, glsl_type::int_type, glsl_type::sampler1D_type), - _textureSize(v130, glsl_type::int_type, glsl_type::isampler1D_type), - _textureSize(v130, glsl_type::int_type, glsl_type::usampler1D_type), - - _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2D_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::isampler2D_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler2D_type), - - _textureSize(v130, glsl_type::ivec3_type, glsl_type::sampler3D_type), - _textureSize(v130, glsl_type::ivec3_type, glsl_type::isampler3D_type), - _textureSize(v130, glsl_type::ivec3_type, glsl_type::usampler3D_type), - - _textureSize(v130, glsl_type::ivec2_type, glsl_type::samplerCube_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::isamplerCube_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::usamplerCube_type), - - _textureSize(v130, glsl_type::int_type, glsl_type::sampler1DShadow_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DShadow_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::samplerCubeShadow_type), - - _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler1DArray_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::isampler1DArray_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler1DArray_type), - _textureSize(v130, glsl_type::ivec3_type, glsl_type::sampler2DArray_type), - _textureSize(v130, glsl_type::ivec3_type, glsl_type::isampler2DArray_type), - _textureSize(v130, glsl_type::ivec3_type, glsl_type::usampler2DArray_type), - - _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler1DArrayShadow_type), - _textureSize(v130, glsl_type::ivec3_type, glsl_type::sampler2DArrayShadow_type), - - _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::samplerCubeArray_type), - _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::isamplerCubeArray_type), - _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::usamplerCubeArray_type), - _textureSize(texture_cube_map_array, glsl_type::ivec3_type, glsl_type::samplerCubeArrayShadow_type), - - _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DRect_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::isampler2DRect_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::usampler2DRect_type), - _textureSize(v130, glsl_type::ivec2_type, glsl_type::sampler2DRectShadow_type), - - _textureSize(v140, glsl_type::int_type, glsl_type::samplerBuffer_type), - _textureSize(v140, glsl_type::int_type, glsl_type::isamplerBuffer_type), - _textureSize(v140, glsl_type::int_type, glsl_type::usamplerBuffer_type), - _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::sampler2DMS_type), - _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::isampler2DMS_type), - _textureSize(texture_multisample, glsl_type::ivec2_type, glsl_type::usampler2DMS_type), - - _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::sampler2DMSArray_type), - _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::isampler2DMSArray_type), - _textureSize(texture_multisample_array, glsl_type::ivec3_type, glsl_type::usampler2DMSArray_type), - NULL); - - add_function("textureSamples", - _textureSamples(glsl_type::sampler2DMS_type), - _textureSamples(glsl_type::isampler2DMS_type), - _textureSamples(glsl_type::usampler2DMS_type), - - _textureSamples(glsl_type::sampler2DMSArray_type), - _textureSamples(glsl_type::isampler2DMSArray_type), - _textureSamples(glsl_type::usampler2DMSArray_type), - NULL); - - add_function("texture", - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::samplerCubeShadow_type, glsl_type::vec4_type), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), - - _texture(ir_tex, texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_tex, texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_tex, texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), - /* samplerCubeArrayShadow is special; it has an extra parameter - * for the shadow comparitor since there is no vec5 type. - */ - _textureCubeArrayShadow(), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), - - _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), - _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), - _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::samplerCubeShadow_type, glsl_type::vec4_type), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), - - _texture(ir_txb, fs_texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_txb, fs_texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_txb, fs_texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), - - _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), - NULL); - - add_function("textureLod", - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), - - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), - - _texture(ir_txl, texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_txl, texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_txl, texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), - - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), - NULL); - - add_function("textureOffset", - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_txb, v130_fs_only, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txb, v130_fs_only, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txb, v130_fs_only, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), - NULL); - - add_function("textureProj", - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texelFetch", - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::int_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::int_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::int_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::ivec2_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::ivec3_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::ivec3_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::ivec3_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::ivec2_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::ivec2_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::ivec3_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::ivec3_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::ivec3_type), - - _texelFetch(v140, glsl_type::vec4_type, glsl_type::samplerBuffer_type, glsl_type::int_type), - _texelFetch(v140, glsl_type::ivec4_type, glsl_type::isamplerBuffer_type, glsl_type::int_type), - _texelFetch(v140, glsl_type::uvec4_type, glsl_type::usamplerBuffer_type, glsl_type::int_type), - - _texelFetch(texture_multisample, glsl_type::vec4_type, glsl_type::sampler2DMS_type, glsl_type::ivec2_type), - _texelFetch(texture_multisample, glsl_type::ivec4_type, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), - _texelFetch(texture_multisample, glsl_type::uvec4_type, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), - - _texelFetch(texture_multisample_array, glsl_type::vec4_type, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), - _texelFetch(texture_multisample_array, glsl_type::ivec4_type, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), - _texelFetch(texture_multisample_array, glsl_type::uvec4_type, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), - NULL); - - add_function("texelFetchOffset", - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::int_type, glsl_type::int_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::int_type, glsl_type::int_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::int_type, glsl_type::int_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::ivec2_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::ivec2_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::ivec2_type, glsl_type::ivec2_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::ivec3_type, glsl_type::ivec3_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::ivec3_type, glsl_type::ivec3_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::ivec3_type, glsl_type::ivec3_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::ivec2_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::ivec2_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::ivec2_type, glsl_type::ivec2_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::ivec2_type, glsl_type::int_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::ivec2_type, glsl_type::int_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::ivec2_type, glsl_type::int_type), - - _texelFetch(v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::ivec3_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::ivec3_type, glsl_type::ivec2_type), - _texelFetch(v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::ivec3_type, glsl_type::ivec2_type), - - NULL); - - add_function("textureProjOffset", - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_tex, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_tex, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txb, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txb, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - NULL); - - add_function("textureLodOffset", - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), - NULL); - - add_function("textureProjLod", - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("textureProjLodOffset", - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txl, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txl, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - NULL); - - add_function("textureGrad", - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::samplerCubeShadow_type, glsl_type::vec4_type), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), - - _texture(ir_txd, texture_cube_map_array, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_txd, texture_cube_map_array, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_txd, texture_cube_map_array, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), - NULL); - - add_function("textureGradOffset", - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::float_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::float_type, TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1DArray_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type, TEX_OFFSET), - NULL); - - add_function("textureProjGrad", - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("textureProjGradOffset", - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec2_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler1D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler3D_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - _texture(ir_txd, v130, glsl_type::float_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT | TEX_OFFSET), - NULL); - - add_function("EmitVertex", _EmitVertex(), NULL); - add_function("EndPrimitive", _EndPrimitive(), NULL); - add_function("EmitStreamVertex", - _EmitStreamVertex(gs_streams, glsl_type::uint_type), - _EmitStreamVertex(gs_streams, glsl_type::int_type), - NULL); - add_function("EndStreamPrimitive", - _EndStreamPrimitive(gs_streams, glsl_type::uint_type), - _EndStreamPrimitive(gs_streams, glsl_type::int_type), - NULL); - add_function("barrier", _barrier(), NULL); - - add_function("textureQueryLOD", - _textureQueryLod(texture_query_lod, glsl_type::sampler1D_type, glsl_type::float_type), - _textureQueryLod(texture_query_lod, glsl_type::isampler1D_type, glsl_type::float_type), - _textureQueryLod(texture_query_lod, glsl_type::usampler1D_type, glsl_type::float_type), - - _textureQueryLod(texture_query_lod, glsl_type::sampler2D_type, glsl_type::vec2_type), - _textureQueryLod(texture_query_lod, glsl_type::isampler2D_type, glsl_type::vec2_type), - _textureQueryLod(texture_query_lod, glsl_type::usampler2D_type, glsl_type::vec2_type), - - _textureQueryLod(texture_query_lod, glsl_type::sampler3D_type, glsl_type::vec3_type), - _textureQueryLod(texture_query_lod, glsl_type::isampler3D_type, glsl_type::vec3_type), - _textureQueryLod(texture_query_lod, glsl_type::usampler3D_type, glsl_type::vec3_type), - - _textureQueryLod(texture_query_lod, glsl_type::samplerCube_type, glsl_type::vec3_type), - _textureQueryLod(texture_query_lod, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _textureQueryLod(texture_query_lod, glsl_type::usamplerCube_type, glsl_type::vec3_type), - - _textureQueryLod(texture_query_lod, glsl_type::sampler1DArray_type, glsl_type::float_type), - _textureQueryLod(texture_query_lod, glsl_type::isampler1DArray_type, glsl_type::float_type), - _textureQueryLod(texture_query_lod, glsl_type::usampler1DArray_type, glsl_type::float_type), - - _textureQueryLod(texture_query_lod, glsl_type::sampler2DArray_type, glsl_type::vec2_type), - _textureQueryLod(texture_query_lod, glsl_type::isampler2DArray_type, glsl_type::vec2_type), - _textureQueryLod(texture_query_lod, glsl_type::usampler2DArray_type, glsl_type::vec2_type), - - _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), - _textureQueryLod(texture_query_lod, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), - _textureQueryLod(texture_query_lod, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), - - _textureQueryLod(texture_query_lod, glsl_type::sampler1DShadow_type, glsl_type::float_type), - _textureQueryLod(texture_query_lod, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), - _textureQueryLod(texture_query_lod, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), - _textureQueryLod(texture_query_lod, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), - _textureQueryLod(texture_query_lod, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), - _textureQueryLod(texture_query_lod, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), - NULL); - - add_function("textureQueryLod", - _textureQueryLod(v400_fs_only, glsl_type::sampler1D_type, glsl_type::float_type), - _textureQueryLod(v400_fs_only, glsl_type::isampler1D_type, glsl_type::float_type), - _textureQueryLod(v400_fs_only, glsl_type::usampler1D_type, glsl_type::float_type), - - _textureQueryLod(v400_fs_only, glsl_type::sampler2D_type, glsl_type::vec2_type), - _textureQueryLod(v400_fs_only, glsl_type::isampler2D_type, glsl_type::vec2_type), - _textureQueryLod(v400_fs_only, glsl_type::usampler2D_type, glsl_type::vec2_type), - - _textureQueryLod(v400_fs_only, glsl_type::sampler3D_type, glsl_type::vec3_type), - _textureQueryLod(v400_fs_only, glsl_type::isampler3D_type, glsl_type::vec3_type), - _textureQueryLod(v400_fs_only, glsl_type::usampler3D_type, glsl_type::vec3_type), - - _textureQueryLod(v400_fs_only, glsl_type::samplerCube_type, glsl_type::vec3_type), - _textureQueryLod(v400_fs_only, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _textureQueryLod(v400_fs_only, glsl_type::usamplerCube_type, glsl_type::vec3_type), - - _textureQueryLod(v400_fs_only, glsl_type::sampler1DArray_type, glsl_type::float_type), - _textureQueryLod(v400_fs_only, glsl_type::isampler1DArray_type, glsl_type::float_type), - _textureQueryLod(v400_fs_only, glsl_type::usampler1DArray_type, glsl_type::float_type), - - _textureQueryLod(v400_fs_only, glsl_type::sampler2DArray_type, glsl_type::vec2_type), - _textureQueryLod(v400_fs_only, glsl_type::isampler2DArray_type, glsl_type::vec2_type), - _textureQueryLod(v400_fs_only, glsl_type::usampler2DArray_type, glsl_type::vec2_type), - - _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArray_type, glsl_type::vec3_type), - _textureQueryLod(v400_fs_only, glsl_type::isamplerCubeArray_type, glsl_type::vec3_type), - _textureQueryLod(v400_fs_only, glsl_type::usamplerCubeArray_type, glsl_type::vec3_type), - - _textureQueryLod(v400_fs_only, glsl_type::sampler1DShadow_type, glsl_type::float_type), - _textureQueryLod(v400_fs_only, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), - _textureQueryLod(v400_fs_only, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), - _textureQueryLod(v400_fs_only, glsl_type::sampler1DArrayShadow_type, glsl_type::float_type), - _textureQueryLod(v400_fs_only, glsl_type::sampler2DArrayShadow_type, glsl_type::vec2_type), - _textureQueryLod(v400_fs_only, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec3_type), - NULL); - - add_function("textureQueryLevels", - _textureQueryLevels(glsl_type::sampler1D_type), - _textureQueryLevels(glsl_type::sampler2D_type), - _textureQueryLevels(glsl_type::sampler3D_type), - _textureQueryLevels(glsl_type::samplerCube_type), - _textureQueryLevels(glsl_type::sampler1DArray_type), - _textureQueryLevels(glsl_type::sampler2DArray_type), - _textureQueryLevels(glsl_type::samplerCubeArray_type), - _textureQueryLevels(glsl_type::sampler1DShadow_type), - _textureQueryLevels(glsl_type::sampler2DShadow_type), - _textureQueryLevels(glsl_type::samplerCubeShadow_type), - _textureQueryLevels(glsl_type::sampler1DArrayShadow_type), - _textureQueryLevels(glsl_type::sampler2DArrayShadow_type), - _textureQueryLevels(glsl_type::samplerCubeArrayShadow_type), - - _textureQueryLevels(glsl_type::isampler1D_type), - _textureQueryLevels(glsl_type::isampler2D_type), - _textureQueryLevels(glsl_type::isampler3D_type), - _textureQueryLevels(glsl_type::isamplerCube_type), - _textureQueryLevels(glsl_type::isampler1DArray_type), - _textureQueryLevels(glsl_type::isampler2DArray_type), - _textureQueryLevels(glsl_type::isamplerCubeArray_type), - - _textureQueryLevels(glsl_type::usampler1D_type), - _textureQueryLevels(glsl_type::usampler2D_type), - _textureQueryLevels(glsl_type::usampler3D_type), - _textureQueryLevels(glsl_type::usamplerCube_type), - _textureQueryLevels(glsl_type::usampler1DArray_type), - _textureQueryLevels(glsl_type::usampler2DArray_type), - _textureQueryLevels(glsl_type::usamplerCubeArray_type), - - NULL); - - add_function("textureSamplesIdenticalEXT", - _textureSamplesIdentical(texture_samples_identical, glsl_type::sampler2DMS_type, glsl_type::ivec2_type), - _textureSamplesIdentical(texture_samples_identical, glsl_type::isampler2DMS_type, glsl_type::ivec2_type), - _textureSamplesIdentical(texture_samples_identical, glsl_type::usampler2DMS_type, glsl_type::ivec2_type), - - _textureSamplesIdentical(texture_samples_identical_array, glsl_type::sampler2DMSArray_type, glsl_type::ivec3_type), - _textureSamplesIdentical(texture_samples_identical_array, glsl_type::isampler2DMSArray_type, glsl_type::ivec3_type), - _textureSamplesIdentical(texture_samples_identical_array, glsl_type::usampler2DMSArray_type, glsl_type::ivec3_type), - NULL); - - add_function("texture1D", - _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), - _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), - NULL); - - add_function("texture1DArray", - _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), - _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), - NULL); - - add_function("texture1DProj", - _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texture1DLod", - _texture(ir_txl, tex1d_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), - NULL); - - add_function("texture1DArrayLod", - _texture(ir_txl, texture_array_lod, glsl_type::vec4_type, glsl_type::sampler1DArray_type, glsl_type::vec2_type), - NULL); - - add_function("texture1DProjLod", - _texture(ir_txl, tex1d_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txl, tex1d_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texture2D", - _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_tex, texture_external, glsl_type::vec4_type, glsl_type::samplerExternalOES_type, glsl_type::vec2_type), - NULL); - - add_function("texture2DArray", - _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - NULL); - - add_function("texture2DProj", - _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_tex, texture_external, glsl_type::vec4_type, glsl_type::samplerExternalOES_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_tex, texture_external, glsl_type::vec4_type, glsl_type::samplerExternalOES_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texture2DLod", - _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - NULL); - - add_function("texture2DArrayLod", - _texture(ir_txl, texture_array_lod, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - NULL); - - add_function("texture2DProjLod", - _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texture3D", - _texture(ir_tex, tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), - _texture(ir_txb, fs_tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), - NULL); - - add_function("texture3DProj", - _texture(ir_tex, tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, fs_tex3d, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texture3DLod", - _texture(ir_txl, tex3d_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), - NULL); - - add_function("texture3DProjLod", - _texture(ir_txl, tex3d_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("textureCube", - _texture(ir_tex, always_available, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - _texture(ir_txb, fs_only, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - NULL); - - add_function("textureCubeLod", - _texture(ir_txl, lod_exists_in_stage, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - NULL); - - add_function("texture2DRect", - _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), - NULL); - - add_function("texture2DRectProj", - _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("shadow1D", - _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), - _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow1DArray", - _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), - _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow2D", - _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), - _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow2DArray", - _texture(ir_tex, texture_array, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), - _texture(ir_txb, fs_texture_array, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec4_type), - NULL); - - add_function("shadow1DProj", - _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("shadow2DProj", - _texture(ir_tex, v110, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - _texture(ir_txb, v110_fs_only, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("shadow1DLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow2DLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow1DArrayLod", - _texture(ir_txl, texture_array_lod, glsl_type::vec4_type, glsl_type::sampler1DArrayShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow1DProjLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("shadow2DProjLod", - _texture(ir_txl, v110_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("shadow2DRect", - _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow2DRectProj", - _texture(ir_tex, texture_rectangle, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texture1DGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::float_type), - NULL); - - add_function("texture1DProjGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec2_type, TEX_PROJECT), - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1D_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texture2DGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - NULL); - - add_function("texture2DProjGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texture3DGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec3_type), - NULL); - - add_function("texture3DProjGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler3D_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("textureCubeGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - NULL); - - add_function("shadow1DGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow1DProjGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler1DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("shadow2DGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow2DProjGradARB", - _texture(ir_txd, shader_texture_lod, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("texture2DRectGradARB", - _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), - NULL); - - add_function("texture2DRectProjGradARB", - _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec3_type, TEX_PROJECT), - _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("shadow2DRectGradARB", - _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec3_type), - NULL); - - add_function("shadow2DRectProjGradARB", - _texture(ir_txd, shader_texture_lod_and_rect, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec4_type, TEX_PROJECT), - NULL); - - add_function("textureGather", - _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type), - _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type), - - _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type), - - _texture(ir_tg4, texture_gather_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type), - _texture(ir_tg4, texture_gather_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type), - - _texture(ir_tg4, texture_gather, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_tg4, texture_gather, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type), - _texture(ir_tg4, texture_gather, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type), - - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::ivec4_type, glsl_type::isamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::uvec4_type, glsl_type::usamplerCube_type, glsl_type::vec3_type, TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usamplerCubeArray_type, glsl_type::vec4_type, TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type), - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type), - _texture(ir_tg4, gpu_shader5_or_es31, glsl_type::vec4_type, glsl_type::samplerCubeShadow_type, glsl_type::vec3_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::samplerCubeArrayShadow_type, glsl_type::vec4_type), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type), - NULL); - - add_function("textureGatherOffset", - _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET), - - _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - _texture(ir_tg4, texture_gather_only_or_es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET), - - _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET | TEX_COMPONENT), - - _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, es31, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), - _texture(ir_tg4, es31, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET | TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST | TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_NONCONST), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_NONCONST), - - _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET), - _texture(ir_tg4, es31, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET), - NULL); - - add_function("textureGatherOffsets", - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2D_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DArray_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::ivec4_type, glsl_type::isampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - _texture(ir_tg4, gpu_shader5, glsl_type::uvec4_type, glsl_type::usampler2DRect_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY | TEX_COMPONENT), - - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DArrayShadow_type, glsl_type::vec3_type, TEX_OFFSET_ARRAY), - _texture(ir_tg4, gpu_shader5, glsl_type::vec4_type, glsl_type::sampler2DRectShadow_type, glsl_type::vec2_type, TEX_OFFSET_ARRAY), - NULL); - - F(dFdx) - F(dFdy) - F(fwidth) - F(dFdxCoarse) - F(dFdyCoarse) - F(fwidthCoarse) - F(dFdxFine) - F(dFdyFine) - F(fwidthFine) - F(noise1) - F(noise2) - F(noise3) - F(noise4) - - IU(bitfieldExtract) - IU(bitfieldInsert) - IU(bitfieldReverse) - IU(bitCount) - IU(findLSB) - IU(findMSB) - FDGS5(fma) - - add_function("ldexp", - _ldexp(glsl_type::float_type, glsl_type::int_type), - _ldexp(glsl_type::vec2_type, glsl_type::ivec2_type), - _ldexp(glsl_type::vec3_type, glsl_type::ivec3_type), - _ldexp(glsl_type::vec4_type, glsl_type::ivec4_type), - _ldexp(glsl_type::double_type, glsl_type::int_type), - _ldexp(glsl_type::dvec2_type, glsl_type::ivec2_type), - _ldexp(glsl_type::dvec3_type, glsl_type::ivec3_type), - _ldexp(glsl_type::dvec4_type, glsl_type::ivec4_type), - NULL); - - add_function("frexp", - _frexp(glsl_type::float_type, glsl_type::int_type), - _frexp(glsl_type::vec2_type, glsl_type::ivec2_type), - _frexp(glsl_type::vec3_type, glsl_type::ivec3_type), - _frexp(glsl_type::vec4_type, glsl_type::ivec4_type), - _dfrexp(glsl_type::double_type, glsl_type::int_type), - _dfrexp(glsl_type::dvec2_type, glsl_type::ivec2_type), - _dfrexp(glsl_type::dvec3_type, glsl_type::ivec3_type), - _dfrexp(glsl_type::dvec4_type, glsl_type::ivec4_type), - NULL); - add_function("uaddCarry", - _uaddCarry(glsl_type::uint_type), - _uaddCarry(glsl_type::uvec2_type), - _uaddCarry(glsl_type::uvec3_type), - _uaddCarry(glsl_type::uvec4_type), - NULL); - add_function("usubBorrow", - _usubBorrow(glsl_type::uint_type), - _usubBorrow(glsl_type::uvec2_type), - _usubBorrow(glsl_type::uvec3_type), - _usubBorrow(glsl_type::uvec4_type), - NULL); - add_function("imulExtended", - _mulExtended(glsl_type::int_type), - _mulExtended(glsl_type::ivec2_type), - _mulExtended(glsl_type::ivec3_type), - _mulExtended(glsl_type::ivec4_type), - NULL); - add_function("umulExtended", - _mulExtended(glsl_type::uint_type), - _mulExtended(glsl_type::uvec2_type), - _mulExtended(glsl_type::uvec3_type), - _mulExtended(glsl_type::uvec4_type), - NULL); - add_function("interpolateAtCentroid", - _interpolateAtCentroid(glsl_type::float_type), - _interpolateAtCentroid(glsl_type::vec2_type), - _interpolateAtCentroid(glsl_type::vec3_type), - _interpolateAtCentroid(glsl_type::vec4_type), - NULL); - add_function("interpolateAtOffset", - _interpolateAtOffset(glsl_type::float_type), - _interpolateAtOffset(glsl_type::vec2_type), - _interpolateAtOffset(glsl_type::vec3_type), - _interpolateAtOffset(glsl_type::vec4_type), - NULL); - add_function("interpolateAtSample", - _interpolateAtSample(glsl_type::float_type), - _interpolateAtSample(glsl_type::vec2_type), - _interpolateAtSample(glsl_type::vec3_type), - _interpolateAtSample(glsl_type::vec4_type), - NULL); - - add_function("atomicCounter", - _atomic_counter_op("__intrinsic_atomic_read", - shader_atomic_counters), - NULL); - add_function("atomicCounterIncrement", - _atomic_counter_op("__intrinsic_atomic_increment", - shader_atomic_counters), - NULL); - add_function("atomicCounterDecrement", - _atomic_counter_op("__intrinsic_atomic_predecrement", - shader_atomic_counters), - NULL); - - add_function("atomicAdd", - _atomic_op2("__intrinsic_atomic_add", - buffer_atomics_supported, - glsl_type::uint_type), - _atomic_op2("__intrinsic_atomic_add", - buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("atomicMin", - _atomic_op2("__intrinsic_atomic_min", - buffer_atomics_supported, - glsl_type::uint_type), - _atomic_op2("__intrinsic_atomic_min", - buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("atomicMax", - _atomic_op2("__intrinsic_atomic_max", - buffer_atomics_supported, - glsl_type::uint_type), - _atomic_op2("__intrinsic_atomic_max", - buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("atomicAnd", - _atomic_op2("__intrinsic_atomic_and", - buffer_atomics_supported, - glsl_type::uint_type), - _atomic_op2("__intrinsic_atomic_and", - buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("atomicOr", - _atomic_op2("__intrinsic_atomic_or", - buffer_atomics_supported, - glsl_type::uint_type), - _atomic_op2("__intrinsic_atomic_or", - buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("atomicXor", - _atomic_op2("__intrinsic_atomic_xor", - buffer_atomics_supported, - glsl_type::uint_type), - _atomic_op2("__intrinsic_atomic_xor", - buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("atomicExchange", - _atomic_op2("__intrinsic_atomic_exchange", - buffer_atomics_supported, - glsl_type::uint_type), - _atomic_op2("__intrinsic_atomic_exchange", - buffer_atomics_supported, - glsl_type::int_type), - NULL); - add_function("atomicCompSwap", - _atomic_op3("__intrinsic_atomic_comp_swap", - buffer_atomics_supported, - glsl_type::uint_type), - _atomic_op3("__intrinsic_atomic_comp_swap", - buffer_atomics_supported, - glsl_type::int_type), - NULL); - - add_function("min3", - _min3(glsl_type::float_type), - _min3(glsl_type::vec2_type), - _min3(glsl_type::vec3_type), - _min3(glsl_type::vec4_type), - - _min3(glsl_type::int_type), - _min3(glsl_type::ivec2_type), - _min3(glsl_type::ivec3_type), - _min3(glsl_type::ivec4_type), - - _min3(glsl_type::uint_type), - _min3(glsl_type::uvec2_type), - _min3(glsl_type::uvec3_type), - _min3(glsl_type::uvec4_type), - NULL); - - add_function("max3", - _max3(glsl_type::float_type), - _max3(glsl_type::vec2_type), - _max3(glsl_type::vec3_type), - _max3(glsl_type::vec4_type), - - _max3(glsl_type::int_type), - _max3(glsl_type::ivec2_type), - _max3(glsl_type::ivec3_type), - _max3(glsl_type::ivec4_type), - - _max3(glsl_type::uint_type), - _max3(glsl_type::uvec2_type), - _max3(glsl_type::uvec3_type), - _max3(glsl_type::uvec4_type), - NULL); - - add_function("mid3", - _mid3(glsl_type::float_type), - _mid3(glsl_type::vec2_type), - _mid3(glsl_type::vec3_type), - _mid3(glsl_type::vec4_type), - - _mid3(glsl_type::int_type), - _mid3(glsl_type::ivec2_type), - _mid3(glsl_type::ivec3_type), - _mid3(glsl_type::ivec4_type), - - _mid3(glsl_type::uint_type), - _mid3(glsl_type::uvec2_type), - _mid3(glsl_type::uvec3_type), - _mid3(glsl_type::uvec4_type), - NULL); - - add_image_functions(true); - - add_function("memoryBarrier", - _memory_barrier("__intrinsic_memory_barrier", - shader_image_load_store), - NULL); - add_function("groupMemoryBarrier", - _memory_barrier("__intrinsic_group_memory_barrier", - compute_shader), - NULL); - add_function("memoryBarrierAtomicCounter", - _memory_barrier("__intrinsic_memory_barrier_atomic_counter", - compute_shader), - NULL); - add_function("memoryBarrierBuffer", - _memory_barrier("__intrinsic_memory_barrier_buffer", - compute_shader), - NULL); - add_function("memoryBarrierImage", - _memory_barrier("__intrinsic_memory_barrier_image", - compute_shader), - NULL); - add_function("memoryBarrierShared", - _memory_barrier("__intrinsic_memory_barrier_shared", - compute_shader), - NULL); - - add_function("clock2x32ARB", - _shader_clock(shader_clock, - glsl_type::uvec2_type), - NULL); - -#undef F -#undef FI -#undef FIUD -#undef FIUBD -#undef FIU2_MIXED -} - -void -builtin_builder::add_function(const char *name, ...) -{ - va_list ap; - - ir_function *f = new(mem_ctx) ir_function(name); - - va_start(ap, name); - while (true) { - ir_function_signature *sig = va_arg(ap, ir_function_signature *); - if (sig == NULL) - break; - - if (false) { - exec_list stuff; - stuff.push_tail(sig); - validate_ir_tree(&stuff); - } - - f->add_signature(sig); - } - va_end(ap); - - shader->symbols->add_function(f); -} - -void -builtin_builder::add_image_function(const char *name, - const char *intrinsic_name, - image_prototype_ctr prototype, - unsigned num_arguments, - unsigned flags) -{ - static const glsl_type *const types[] = { - glsl_type::image1D_type, - glsl_type::image2D_type, - glsl_type::image3D_type, - glsl_type::image2DRect_type, - glsl_type::imageCube_type, - glsl_type::imageBuffer_type, - glsl_type::image1DArray_type, - glsl_type::image2DArray_type, - glsl_type::imageCubeArray_type, - glsl_type::image2DMS_type, - glsl_type::image2DMSArray_type, - glsl_type::iimage1D_type, - glsl_type::iimage2D_type, - glsl_type::iimage3D_type, - glsl_type::iimage2DRect_type, - glsl_type::iimageCube_type, - glsl_type::iimageBuffer_type, - glsl_type::iimage1DArray_type, - glsl_type::iimage2DArray_type, - glsl_type::iimageCubeArray_type, - glsl_type::iimage2DMS_type, - glsl_type::iimage2DMSArray_type, - glsl_type::uimage1D_type, - glsl_type::uimage2D_type, - glsl_type::uimage3D_type, - glsl_type::uimage2DRect_type, - glsl_type::uimageCube_type, - glsl_type::uimageBuffer_type, - glsl_type::uimage1DArray_type, - glsl_type::uimage2DArray_type, - glsl_type::uimageCubeArray_type, - glsl_type::uimage2DMS_type, - glsl_type::uimage2DMSArray_type - }; - - ir_function *f = new(mem_ctx) ir_function(name); - - for (unsigned i = 0; i < ARRAY_SIZE(types); ++i) { - if ((types[i]->sampler_type != GLSL_TYPE_FLOAT || - (flags & IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE)) && - (types[i]->sampler_dimensionality == GLSL_SAMPLER_DIM_MS || - !(flags & IMAGE_FUNCTION_MS_ONLY))) - f->add_signature(_image(prototype, types[i], intrinsic_name, - num_arguments, flags)); - } - - shader->symbols->add_function(f); -} - -void -builtin_builder::add_image_functions(bool glsl) -{ - const unsigned flags = (glsl ? IMAGE_FUNCTION_EMIT_STUB : 0); - - add_image_function(glsl ? "imageLoad" : "__intrinsic_image_load", - "__intrinsic_image_load", - &builtin_builder::_image_prototype, 0, - (flags | IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | - IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | - IMAGE_FUNCTION_READ_ONLY)); - - add_image_function(glsl ? "imageStore" : "__intrinsic_image_store", - "__intrinsic_image_store", - &builtin_builder::_image_prototype, 1, - (flags | IMAGE_FUNCTION_RETURNS_VOID | - IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE | - IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | - IMAGE_FUNCTION_WRITE_ONLY)); - - const unsigned atom_flags = flags | IMAGE_FUNCTION_AVAIL_ATOMIC; - - add_image_function(glsl ? "imageAtomicAdd" : "__intrinsic_image_atomic_add", - "__intrinsic_image_atomic_add", - &builtin_builder::_image_prototype, 1, atom_flags); - - add_image_function(glsl ? "imageAtomicMin" : "__intrinsic_image_atomic_min", - "__intrinsic_image_atomic_min", - &builtin_builder::_image_prototype, 1, atom_flags); - - add_image_function(glsl ? "imageAtomicMax" : "__intrinsic_image_atomic_max", - "__intrinsic_image_atomic_max", - &builtin_builder::_image_prototype, 1, atom_flags); - - add_image_function(glsl ? "imageAtomicAnd" : "__intrinsic_image_atomic_and", - "__intrinsic_image_atomic_and", - &builtin_builder::_image_prototype, 1, atom_flags); - - add_image_function(glsl ? "imageAtomicOr" : "__intrinsic_image_atomic_or", - "__intrinsic_image_atomic_or", - &builtin_builder::_image_prototype, 1, atom_flags); - - add_image_function(glsl ? "imageAtomicXor" : "__intrinsic_image_atomic_xor", - "__intrinsic_image_atomic_xor", - &builtin_builder::_image_prototype, 1, atom_flags); - - add_image_function((glsl ? "imageAtomicExchange" : - "__intrinsic_image_atomic_exchange"), - "__intrinsic_image_atomic_exchange", - &builtin_builder::_image_prototype, 1, atom_flags); - - add_image_function((glsl ? "imageAtomicCompSwap" : - "__intrinsic_image_atomic_comp_swap"), - "__intrinsic_image_atomic_comp_swap", - &builtin_builder::_image_prototype, 2, atom_flags); - - add_image_function(glsl ? "imageSize" : "__intrinsic_image_size", - "__intrinsic_image_size", - &builtin_builder::_image_size_prototype, 1, - flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE); - - add_image_function(glsl ? "imageSamples" : "__intrinsic_image_samples", - "__intrinsic_image_samples", - &builtin_builder::_image_samples_prototype, 1, - flags | IMAGE_FUNCTION_SUPPORTS_FLOAT_DATA_TYPE | - IMAGE_FUNCTION_MS_ONLY); -} - -ir_variable * -builtin_builder::in_var(const glsl_type *type, const char *name) -{ - return new(mem_ctx) ir_variable(type, name, ir_var_function_in); -} - -ir_variable * -builtin_builder::out_var(const glsl_type *type, const char *name) -{ - return new(mem_ctx) ir_variable(type, name, ir_var_function_out); -} - -ir_constant * -builtin_builder::imm(bool b, unsigned vector_elements) -{ - return new(mem_ctx) ir_constant(b, vector_elements); -} - -ir_constant * -builtin_builder::imm(float f, unsigned vector_elements) -{ - return new(mem_ctx) ir_constant(f, vector_elements); -} - -ir_constant * -builtin_builder::imm(int i, unsigned vector_elements) -{ - return new(mem_ctx) ir_constant(i, vector_elements); -} - -ir_constant * -builtin_builder::imm(unsigned u, unsigned vector_elements) -{ - return new(mem_ctx) ir_constant(u, vector_elements); -} - -ir_constant * -builtin_builder::imm(double d, unsigned vector_elements) -{ - return new(mem_ctx) ir_constant(d, vector_elements); -} - -ir_constant * -builtin_builder::imm(const glsl_type *type, const ir_constant_data &data) -{ - return new(mem_ctx) ir_constant(type, &data); -} - -#define IMM_FP(type, val) (type->base_type == GLSL_TYPE_DOUBLE) ? imm(val) : imm((float)val) - -ir_dereference_variable * -builtin_builder::var_ref(ir_variable *var) -{ - return new(mem_ctx) ir_dereference_variable(var); -} - -ir_dereference_array * -builtin_builder::array_ref(ir_variable *var, int idx) -{ - return new(mem_ctx) ir_dereference_array(var, imm(idx)); -} - -/** Return an element of a matrix */ -ir_swizzle * -builtin_builder::matrix_elt(ir_variable *var, int column, int row) -{ - return swizzle(array_ref(var, column), row, 1); -} - -/** - * Implementations of built-in functions: - * @{ - */ -ir_function_signature * -builtin_builder::new_sig(const glsl_type *return_type, - builtin_available_predicate avail, - int num_params, - ...) -{ - va_list ap; - - ir_function_signature *sig = - new(mem_ctx) ir_function_signature(return_type, avail); - - exec_list plist; - va_start(ap, num_params); - for (int i = 0; i < num_params; i++) { - plist.push_tail(va_arg(ap, ir_variable *)); - } - va_end(ap); - - sig->replace_parameters(&plist); - return sig; -} - -#define MAKE_SIG(return_type, avail, ...) \ - ir_function_signature *sig = \ - new_sig(return_type, avail, __VA_ARGS__); \ - ir_factory body(&sig->body, mem_ctx); \ - sig->is_defined = true; - -#define MAKE_INTRINSIC(return_type, avail, ...) \ - ir_function_signature *sig = \ - new_sig(return_type, avail, __VA_ARGS__); \ - sig->is_intrinsic = true; - -ir_function_signature * -builtin_builder::unop(builtin_available_predicate avail, - ir_expression_operation opcode, - const glsl_type *return_type, - const glsl_type *param_type) -{ - ir_variable *x = in_var(param_type, "x"); - MAKE_SIG(return_type, avail, 1, x); - body.emit(ret(expr(opcode, x))); - return sig; -} - -#define UNOP(NAME, OPCODE, AVAIL) \ -ir_function_signature * \ -builtin_builder::_##NAME(const glsl_type *type) \ -{ \ - return unop(&AVAIL, OPCODE, type, type); \ -} - -#define UNOPA(NAME, OPCODE) \ -ir_function_signature * \ -builtin_builder::_##NAME(builtin_available_predicate avail, const glsl_type *type) \ -{ \ - return unop(avail, OPCODE, type, type); \ -} - -ir_function_signature * -builtin_builder::binop(builtin_available_predicate avail, - ir_expression_operation opcode, - const glsl_type *return_type, - const glsl_type *param0_type, - const glsl_type *param1_type) -{ - ir_variable *x = in_var(param0_type, "x"); - ir_variable *y = in_var(param1_type, "y"); - MAKE_SIG(return_type, avail, 2, x, y); - body.emit(ret(expr(opcode, x, y))); - return sig; -} - -#define BINOP(NAME, OPCODE, AVAIL) \ -ir_function_signature * \ -builtin_builder::_##NAME(const glsl_type *return_type, \ - const glsl_type *param0_type, \ - const glsl_type *param1_type) \ -{ \ - return binop(&AVAIL, OPCODE, return_type, param0_type, param1_type); \ -} - -/** - * Angle and Trigonometry Functions @{ - */ - -ir_function_signature * -builtin_builder::_radians(const glsl_type *type) -{ - ir_variable *degrees = in_var(type, "degrees"); - MAKE_SIG(type, always_available, 1, degrees); - body.emit(ret(mul(degrees, imm(0.0174532925f)))); - return sig; -} - -ir_function_signature * -builtin_builder::_degrees(const glsl_type *type) -{ - ir_variable *radians = in_var(type, "radians"); - MAKE_SIG(type, always_available, 1, radians); - body.emit(ret(mul(radians, imm(57.29578f)))); - return sig; -} - -UNOP(sin, ir_unop_sin, always_available) -UNOP(cos, ir_unop_cos, always_available) - -ir_function_signature * -builtin_builder::_tan(const glsl_type *type) -{ - ir_variable *theta = in_var(type, "theta"); - MAKE_SIG(type, always_available, 1, theta); - body.emit(ret(div(sin(theta), cos(theta)))); - return sig; -} - -ir_expression * -builtin_builder::asin_expr(ir_variable *x) -{ - return mul(sign(x), - sub(imm(M_PI_2f), - mul(sqrt(sub(imm(1.0f), abs(x))), - add(imm(M_PI_2f), - mul(abs(x), - add(imm(M_PI_4f - 1.0f), - mul(abs(x), - add(imm(0.086566724f), - mul(abs(x), imm(-0.03102955f)))))))))); -} - -ir_call * -builtin_builder::call(ir_function *f, ir_variable *ret, exec_list params) -{ - exec_list actual_params; - - foreach_in_list(ir_variable, var, ¶ms) { - actual_params.push_tail(var_ref(var)); - } - - ir_function_signature *sig = - f->exact_matching_signature(NULL, &actual_params); - if (!sig) - return NULL; - - ir_dereference_variable *deref = - (sig->return_type->is_void() ? NULL : var_ref(ret)); - - return new(mem_ctx) ir_call(sig, deref, &actual_params); -} - -ir_function_signature * -builtin_builder::_asin(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, always_available, 1, x); - - body.emit(ret(asin_expr(x))); - - return sig; -} - -ir_function_signature * -builtin_builder::_acos(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, always_available, 1, x); - - body.emit(ret(sub(imm(M_PI_2f), asin_expr(x)))); - - return sig; -} - -ir_function_signature * -builtin_builder::_atan2(const glsl_type *type) -{ - ir_variable *vec_y = in_var(type, "vec_y"); - ir_variable *vec_x = in_var(type, "vec_x"); - MAKE_SIG(type, always_available, 2, vec_y, vec_x); - - ir_variable *vec_result = body.make_temp(type, "vec_result"); - ir_variable *r = body.make_temp(glsl_type::float_type, "r"); - for (int i = 0; i < type->vector_elements; i++) { - ir_variable *y = body.make_temp(glsl_type::float_type, "y"); - ir_variable *x = body.make_temp(glsl_type::float_type, "x"); - body.emit(assign(y, swizzle(vec_y, i, 1))); - body.emit(assign(x, swizzle(vec_x, i, 1))); - - /* If |x| >= 1.0e-8 * |y|: */ - ir_if *outer_if = - new(mem_ctx) ir_if(greater(abs(x), mul(imm(1.0e-8f), abs(y)))); - - ir_factory outer_then(&outer_if->then_instructions, mem_ctx); - - /* Then...call atan(y/x) */ - do_atan(outer_then, glsl_type::float_type, r, div(y, x)); - - /* ...and fix it up: */ - ir_if *inner_if = new(mem_ctx) ir_if(less(x, imm(0.0f))); - inner_if->then_instructions.push_tail( - if_tree(gequal(y, imm(0.0f)), - assign(r, add(r, imm(M_PIf))), - assign(r, sub(r, imm(M_PIf))))); - outer_then.emit(inner_if); - - /* Else... */ - outer_if->else_instructions.push_tail( - assign(r, mul(sign(y), imm(M_PI_2f)))); - - body.emit(outer_if); - - body.emit(assign(vec_result, r, 1 << i)); - } - body.emit(ret(vec_result)); - - return sig; -} - -void -builtin_builder::do_atan(ir_factory &body, const glsl_type *type, ir_variable *res, operand y_over_x) -{ - /* - * range-reduction, first step: - * - * / y_over_x if |y_over_x| <= 1.0; - * x = < - * \ 1.0 / y_over_x otherwise - */ - ir_variable *x = body.make_temp(type, "atan_x"); - body.emit(assign(x, div(min2(abs(y_over_x), - imm(1.0f)), - max2(abs(y_over_x), - imm(1.0f))))); - - /* - * approximate atan by evaluating polynomial: - * - * x * 0.9999793128310355 - x^3 * 0.3326756418091246 + - * x^5 * 0.1938924977115610 - x^7 * 0.1173503194786851 + - * x^9 * 0.0536813784310406 - x^11 * 0.0121323213173444 - */ - ir_variable *tmp = body.make_temp(type, "atan_tmp"); - body.emit(assign(tmp, mul(x, x))); - body.emit(assign(tmp, mul(add(mul(sub(mul(add(mul(sub(mul(add(mul(imm(-0.0121323213173444f), - tmp), - imm(0.0536813784310406f)), - tmp), - imm(0.1173503194786851f)), - tmp), - imm(0.1938924977115610f)), - tmp), - imm(0.3326756418091246f)), - tmp), - imm(0.9999793128310355f)), - x))); - - /* range-reduction fixup */ - body.emit(assign(tmp, add(tmp, - mul(b2f(greater(abs(y_over_x), - imm(1.0f, type->components()))), - add(mul(tmp, - imm(-2.0f)), - imm(M_PI_2f)))))); - - /* sign fixup */ - body.emit(assign(res, mul(tmp, sign(y_over_x)))); -} - -ir_function_signature * -builtin_builder::_atan(const glsl_type *type) -{ - ir_variable *y_over_x = in_var(type, "y_over_x"); - MAKE_SIG(type, always_available, 1, y_over_x); - - ir_variable *tmp = body.make_temp(type, "tmp"); - do_atan(body, type, tmp, y_over_x); - body.emit(ret(tmp)); - - return sig; -} - -ir_function_signature * -builtin_builder::_sinh(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, v130, 1, x); - - /* 0.5 * (e^x - e^(-x)) */ - body.emit(ret(mul(imm(0.5f), sub(exp(x), exp(neg(x)))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_cosh(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, v130, 1, x); - - /* 0.5 * (e^x + e^(-x)) */ - body.emit(ret(mul(imm(0.5f), add(exp(x), exp(neg(x)))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_tanh(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, v130, 1, x); - - /* (e^x - e^(-x)) / (e^x + e^(-x)) */ - body.emit(ret(div(sub(exp(x), exp(neg(x))), - add(exp(x), exp(neg(x)))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_asinh(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, v130, 1, x); - - body.emit(ret(mul(sign(x), log(add(abs(x), sqrt(add(mul(x, x), - imm(1.0f)))))))); - return sig; -} - -ir_function_signature * -builtin_builder::_acosh(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, v130, 1, x); - - body.emit(ret(log(add(x, sqrt(sub(mul(x, x), imm(1.0f))))))); - return sig; -} - -ir_function_signature * -builtin_builder::_atanh(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, v130, 1, x); - - body.emit(ret(mul(imm(0.5f), log(div(add(imm(1.0f), x), - sub(imm(1.0f), x)))))); - return sig; -} -/** @} */ - -/** - * Exponential Functions @{ - */ - -ir_function_signature * -builtin_builder::_pow(const glsl_type *type) -{ - return binop(always_available, ir_binop_pow, type, type, type); -} - -UNOP(exp, ir_unop_exp, always_available) -UNOP(log, ir_unop_log, always_available) -UNOP(exp2, ir_unop_exp2, always_available) -UNOP(log2, ir_unop_log2, always_available) -UNOPA(sqrt, ir_unop_sqrt) -UNOPA(inversesqrt, ir_unop_rsq) - -/** @} */ - -UNOPA(abs, ir_unop_abs) -UNOPA(sign, ir_unop_sign) -UNOPA(floor, ir_unop_floor) -UNOPA(trunc, ir_unop_trunc) -UNOPA(round, ir_unop_round_even) -UNOPA(roundEven, ir_unop_round_even) -UNOPA(ceil, ir_unop_ceil) -UNOPA(fract, ir_unop_fract) - -ir_function_signature * -builtin_builder::_mod(const glsl_type *x_type, const glsl_type *y_type) -{ - return binop(always_available, ir_binop_mod, x_type, x_type, y_type); -} - -ir_function_signature * -builtin_builder::_modf(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - ir_variable *i = out_var(type, "i"); - MAKE_SIG(type, avail, 2, x, i); - - ir_variable *t = body.make_temp(type, "t"); - body.emit(assign(t, expr(ir_unop_trunc, x))); - body.emit(assign(i, t)); - body.emit(ret(sub(x, t))); - - return sig; -} - -ir_function_signature * -builtin_builder::_min(builtin_available_predicate avail, - const glsl_type *x_type, const glsl_type *y_type) -{ - return binop(avail, ir_binop_min, x_type, x_type, y_type); -} - -ir_function_signature * -builtin_builder::_max(builtin_available_predicate avail, - const glsl_type *x_type, const glsl_type *y_type) -{ - return binop(avail, ir_binop_max, x_type, x_type, y_type); -} - -ir_function_signature * -builtin_builder::_clamp(builtin_available_predicate avail, - const glsl_type *val_type, const glsl_type *bound_type) -{ - ir_variable *x = in_var(val_type, "x"); - ir_variable *minVal = in_var(bound_type, "minVal"); - ir_variable *maxVal = in_var(bound_type, "maxVal"); - MAKE_SIG(val_type, avail, 3, x, minVal, maxVal); - - body.emit(ret(clamp(x, minVal, maxVal))); - - return sig; -} - -ir_function_signature * -builtin_builder::_mix_lrp(builtin_available_predicate avail, const glsl_type *val_type, const glsl_type *blend_type) -{ - ir_variable *x = in_var(val_type, "x"); - ir_variable *y = in_var(val_type, "y"); - ir_variable *a = in_var(blend_type, "a"); - MAKE_SIG(val_type, avail, 3, x, y, a); - - body.emit(ret(lrp(x, y, a))); - - return sig; -} - -ir_function_signature * -builtin_builder::_mix_sel(builtin_available_predicate avail, - const glsl_type *val_type, - const glsl_type *blend_type) -{ - ir_variable *x = in_var(val_type, "x"); - ir_variable *y = in_var(val_type, "y"); - ir_variable *a = in_var(blend_type, "a"); - MAKE_SIG(val_type, avail, 3, x, y, a); - - /* csel matches the ternary operator in that a selector of true choses the - * first argument. This differs from mix(x, y, false) which choses the - * second argument (to remain consistent with the interpolating version of - * mix() which takes a blend factor from 0.0 to 1.0 where 0.0 is only x. - * - * To handle the behavior mismatch, reverse the x and y arguments. - */ - body.emit(ret(csel(a, y, x))); - - return sig; -} - -ir_function_signature * -builtin_builder::_step(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) -{ - ir_variable *edge = in_var(edge_type, "edge"); - ir_variable *x = in_var(x_type, "x"); - MAKE_SIG(x_type, avail, 2, edge, x); - - ir_variable *t = body.make_temp(x_type, "t"); - if (x_type->vector_elements == 1) { - /* Both are floats */ - if (edge_type->base_type == GLSL_TYPE_DOUBLE) - body.emit(assign(t, f2d(b2f(gequal(x, edge))))); - else - body.emit(assign(t, b2f(gequal(x, edge)))); - } else if (edge_type->vector_elements == 1) { - /* x is a vector but edge is a float */ - for (int i = 0; i < x_type->vector_elements; i++) { - if (edge_type->base_type == GLSL_TYPE_DOUBLE) - body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), edge))), 1 << i)); - else - body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), edge)), 1 << i)); - } - } else { - /* Both are vectors */ - for (int i = 0; i < x_type->vector_elements; i++) { - if (edge_type->base_type == GLSL_TYPE_DOUBLE) - body.emit(assign(t, f2d(b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1)))), - 1 << i)); - else - body.emit(assign(t, b2f(gequal(swizzle(x, i, 1), swizzle(edge, i, 1))), - 1 << i)); - - } - } - body.emit(ret(t)); - - return sig; -} - -ir_function_signature * -builtin_builder::_smoothstep(builtin_available_predicate avail, const glsl_type *edge_type, const glsl_type *x_type) -{ - ir_variable *edge0 = in_var(edge_type, "edge0"); - ir_variable *edge1 = in_var(edge_type, "edge1"); - ir_variable *x = in_var(x_type, "x"); - MAKE_SIG(x_type, avail, 3, edge0, edge1, x); - - /* From the GLSL 1.10 specification: - * - * genType t; - * t = clamp((x - edge0) / (edge1 - edge0), 0, 1); - * return t * t * (3 - 2 * t); - */ - - ir_variable *t = body.make_temp(x_type, "t"); - body.emit(assign(t, clamp(div(sub(x, edge0), sub(edge1, edge0)), - IMM_FP(x_type, 0.0), IMM_FP(x_type, 1.0)))); - - body.emit(ret(mul(t, mul(t, sub(IMM_FP(x_type, 3.0), mul(IMM_FP(x_type, 2.0), t)))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_isnan(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); - - body.emit(ret(nequal(x, x))); - - return sig; -} - -ir_function_signature * -builtin_builder::_isinf(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::bvec(type->vector_elements), avail, 1, x); - - ir_constant_data infinities; - for (int i = 0; i < type->vector_elements; i++) { - switch (type->base_type) { - case GLSL_TYPE_FLOAT: - infinities.f[i] = INFINITY; - break; - case GLSL_TYPE_DOUBLE: - infinities.d[i] = INFINITY; - break; - default: - unreachable("unknown type"); - } - } - - body.emit(ret(equal(abs(x), imm(type, infinities)))); - - return sig; -} - -ir_function_signature * -builtin_builder::_floatBitsToInt(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::ivec(type->vector_elements), shader_bit_encoding, 1, x); - body.emit(ret(bitcast_f2i(x))); - return sig; -} - -ir_function_signature * -builtin_builder::_floatBitsToUint(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::uvec(type->vector_elements), shader_bit_encoding, 1, x); - body.emit(ret(bitcast_f2u(x))); - return sig; -} - -ir_function_signature * -builtin_builder::_intBitsToFloat(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::vec(type->vector_elements), shader_bit_encoding, 1, x); - body.emit(ret(bitcast_i2f(x))); - return sig; -} - -ir_function_signature * -builtin_builder::_uintBitsToFloat(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(glsl_type::vec(type->vector_elements), shader_bit_encoding, 1, x); - body.emit(ret(bitcast_u2f(x))); - return sig; -} - -ir_function_signature * -builtin_builder::_packUnorm2x16(builtin_available_predicate avail) -{ - ir_variable *v = in_var(glsl_type::vec2_type, "v"); - MAKE_SIG(glsl_type::uint_type, avail, 1, v); - body.emit(ret(expr(ir_unop_pack_unorm_2x16, v))); - return sig; -} - -ir_function_signature * -builtin_builder::_packSnorm2x16(builtin_available_predicate avail) -{ - ir_variable *v = in_var(glsl_type::vec2_type, "v"); - MAKE_SIG(glsl_type::uint_type, avail, 1, v); - body.emit(ret(expr(ir_unop_pack_snorm_2x16, v))); - return sig; -} - -ir_function_signature * -builtin_builder::_packUnorm4x8(builtin_available_predicate avail) -{ - ir_variable *v = in_var(glsl_type::vec4_type, "v"); - MAKE_SIG(glsl_type::uint_type, avail, 1, v); - body.emit(ret(expr(ir_unop_pack_unorm_4x8, v))); - return sig; -} - -ir_function_signature * -builtin_builder::_packSnorm4x8(builtin_available_predicate avail) -{ - ir_variable *v = in_var(glsl_type::vec4_type, "v"); - MAKE_SIG(glsl_type::uint_type, avail, 1, v); - body.emit(ret(expr(ir_unop_pack_snorm_4x8, v))); - return sig; -} - -ir_function_signature * -builtin_builder::_unpackUnorm2x16(builtin_available_predicate avail) -{ - ir_variable *p = in_var(glsl_type::uint_type, "p"); - MAKE_SIG(glsl_type::vec2_type, avail, 1, p); - body.emit(ret(expr(ir_unop_unpack_unorm_2x16, p))); - return sig; -} - -ir_function_signature * -builtin_builder::_unpackSnorm2x16(builtin_available_predicate avail) -{ - ir_variable *p = in_var(glsl_type::uint_type, "p"); - MAKE_SIG(glsl_type::vec2_type, avail, 1, p); - body.emit(ret(expr(ir_unop_unpack_snorm_2x16, p))); - return sig; -} - - -ir_function_signature * -builtin_builder::_unpackUnorm4x8(builtin_available_predicate avail) -{ - ir_variable *p = in_var(glsl_type::uint_type, "p"); - MAKE_SIG(glsl_type::vec4_type, avail, 1, p); - body.emit(ret(expr(ir_unop_unpack_unorm_4x8, p))); - return sig; -} - -ir_function_signature * -builtin_builder::_unpackSnorm4x8(builtin_available_predicate avail) -{ - ir_variable *p = in_var(glsl_type::uint_type, "p"); - MAKE_SIG(glsl_type::vec4_type, avail, 1, p); - body.emit(ret(expr(ir_unop_unpack_snorm_4x8, p))); - return sig; -} - -ir_function_signature * -builtin_builder::_packHalf2x16(builtin_available_predicate avail) -{ - ir_variable *v = in_var(glsl_type::vec2_type, "v"); - MAKE_SIG(glsl_type::uint_type, avail, 1, v); - body.emit(ret(expr(ir_unop_pack_half_2x16, v))); - return sig; -} - -ir_function_signature * -builtin_builder::_unpackHalf2x16(builtin_available_predicate avail) -{ - ir_variable *p = in_var(glsl_type::uint_type, "p"); - MAKE_SIG(glsl_type::vec2_type, avail, 1, p); - body.emit(ret(expr(ir_unop_unpack_half_2x16, p))); - return sig; -} - -ir_function_signature * -builtin_builder::_packDouble2x32(builtin_available_predicate avail) -{ - ir_variable *v = in_var(glsl_type::uvec2_type, "v"); - MAKE_SIG(glsl_type::double_type, avail, 1, v); - body.emit(ret(expr(ir_unop_pack_double_2x32, v))); - return sig; -} - -ir_function_signature * -builtin_builder::_unpackDouble2x32(builtin_available_predicate avail) -{ - ir_variable *p = in_var(glsl_type::double_type, "p"); - MAKE_SIG(glsl_type::uvec2_type, avail, 1, p); - body.emit(ret(expr(ir_unop_unpack_double_2x32, p))); - return sig; -} - -ir_function_signature * -builtin_builder::_length(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type->get_base_type(), avail, 1, x); - - body.emit(ret(sqrt(dot(x, x)))); - - return sig; -} - -ir_function_signature * -builtin_builder::_distance(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *p0 = in_var(type, "p0"); - ir_variable *p1 = in_var(type, "p1"); - MAKE_SIG(type->get_base_type(), avail, 2, p0, p1); - - if (type->vector_elements == 1) { - body.emit(ret(abs(sub(p0, p1)))); - } else { - ir_variable *p = body.make_temp(type, "p"); - body.emit(assign(p, sub(p0, p1))); - body.emit(ret(sqrt(dot(p, p)))); - } - - return sig; -} - -ir_function_signature * -builtin_builder::_dot(builtin_available_predicate avail, const glsl_type *type) -{ - if (type->vector_elements == 1) - return binop(avail, ir_binop_mul, type, type, type); - - return binop(avail, ir_binop_dot, - type->get_base_type(), type, type); -} - -ir_function_signature * -builtin_builder::_cross(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *a = in_var(type, "a"); - ir_variable *b = in_var(type, "b"); - MAKE_SIG(type, avail, 2, a, b); - - int yzx = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_X, 0); - int zxy = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_X, SWIZZLE_Y, 0); - - body.emit(ret(sub(mul(swizzle(a, yzx, 3), swizzle(b, zxy, 3)), - mul(swizzle(a, zxy, 3), swizzle(b, yzx, 3))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_normalize(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - MAKE_SIG(type, avail, 1, x); - - if (type->vector_elements == 1) { - body.emit(ret(sign(x))); - } else { - body.emit(ret(mul(x, rsq(dot(x, x))))); - } - - return sig; -} - -ir_function_signature * -builtin_builder::_ftransform() -{ - MAKE_SIG(glsl_type::vec4_type, compatibility_vs_only, 0); - - body.emit(ret(new(mem_ctx) ir_expression(ir_binop_mul, - glsl_type::vec4_type, - var_ref(gl_ModelViewProjectionMatrix), - var_ref(gl_Vertex)))); - - /* FINISHME: Once the ir_expression() constructor handles type inference - * for matrix operations, we can simplify this to: - * - * body.emit(ret(mul(gl_ModelViewProjectionMatrix, gl_Vertex))); - */ - return sig; -} - -ir_function_signature * -builtin_builder::_faceforward(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *N = in_var(type, "N"); - ir_variable *I = in_var(type, "I"); - ir_variable *Nref = in_var(type, "Nref"); - MAKE_SIG(type, avail, 3, N, I, Nref); - - body.emit(if_tree(less(dot(Nref, I), IMM_FP(type, 0.0)), - ret(N), ret(neg(N)))); - - return sig; -} - -ir_function_signature * -builtin_builder::_reflect(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *I = in_var(type, "I"); - ir_variable *N = in_var(type, "N"); - MAKE_SIG(type, avail, 2, I, N); - - /* I - 2 * dot(N, I) * N */ - body.emit(ret(sub(I, mul(IMM_FP(type, 2.0), mul(dot(N, I), N))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_refract(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *I = in_var(type, "I"); - ir_variable *N = in_var(type, "N"); - ir_variable *eta = in_var(type->get_base_type(), "eta"); - MAKE_SIG(type, avail, 3, I, N, eta); - - ir_variable *n_dot_i = body.make_temp(type->get_base_type(), "n_dot_i"); - body.emit(assign(n_dot_i, dot(N, I))); - - /* From the GLSL 1.10 specification: - * k = 1.0 - eta * eta * (1.0 - dot(N, I) * dot(N, I)) - * if (k < 0.0) - * return genType(0.0) - * else - * return eta * I - (eta * dot(N, I) + sqrt(k)) * N - */ - ir_variable *k = body.make_temp(type->get_base_type(), "k"); - body.emit(assign(k, sub(IMM_FP(type, 1.0), - mul(eta, mul(eta, sub(IMM_FP(type, 1.0), - mul(n_dot_i, n_dot_i))))))); - body.emit(if_tree(less(k, IMM_FP(type, 0.0)), - ret(ir_constant::zero(mem_ctx, type)), - ret(sub(mul(eta, I), - mul(add(mul(eta, n_dot_i), sqrt(k)), N))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_matrixCompMult(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - ir_variable *y = in_var(type, "y"); - MAKE_SIG(type, avail, 2, x, y); - - ir_variable *z = body.make_temp(type, "z"); - for (int i = 0; i < type->matrix_columns; i++) { - body.emit(assign(array_ref(z, i), mul(array_ref(x, i), array_ref(y, i)))); - } - body.emit(ret(z)); - - return sig; -} - -ir_function_signature * -builtin_builder::_outerProduct(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *c; - ir_variable *r; - - if (type->base_type == GLSL_TYPE_DOUBLE) { - r = in_var(glsl_type::dvec(type->matrix_columns), "r"); - c = in_var(glsl_type::dvec(type->vector_elements), "c"); - } else { - r = in_var(glsl_type::vec(type->matrix_columns), "r"); - c = in_var(glsl_type::vec(type->vector_elements), "c"); - } - MAKE_SIG(type, avail, 2, c, r); - - ir_variable *m = body.make_temp(type, "m"); - for (int i = 0; i < type->matrix_columns; i++) { - body.emit(assign(array_ref(m, i), mul(c, swizzle(r, i, 1)))); - } - body.emit(ret(m)); - - return sig; -} - -ir_function_signature * -builtin_builder::_transpose(builtin_available_predicate avail, const glsl_type *orig_type) -{ - const glsl_type *transpose_type = - glsl_type::get_instance(orig_type->base_type, - orig_type->matrix_columns, - orig_type->vector_elements); - - ir_variable *m = in_var(orig_type, "m"); - MAKE_SIG(transpose_type, avail, 1, m); - - ir_variable *t = body.make_temp(transpose_type, "t"); - for (int i = 0; i < orig_type->matrix_columns; i++) { - for (int j = 0; j < orig_type->vector_elements; j++) { - body.emit(assign(array_ref(t, j), - matrix_elt(m, i, j), - 1 << i)); - } - } - body.emit(ret(t)); - - return sig; -} - -ir_function_signature * -builtin_builder::_determinant_mat2(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *m = in_var(type, "m"); - MAKE_SIG(type->get_base_type(), avail, 1, m); - - body.emit(ret(sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), - mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_determinant_mat3(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *m = in_var(type, "m"); - MAKE_SIG(type->get_base_type(), avail, 1, m); - - ir_expression *f1 = - sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), - mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 1))); - - ir_expression *f2 = - sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), - mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 0))); - - ir_expression *f3 = - sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), - mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 0))); - - body.emit(ret(add(sub(mul(matrix_elt(m, 0, 0), f1), - mul(matrix_elt(m, 0, 1), f2)), - mul(matrix_elt(m, 0, 2), f3)))); - - return sig; -} - -ir_function_signature * -builtin_builder::_determinant_mat4(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *m = in_var(type, "m"); - const glsl_type *btype = type->get_base_type(); - MAKE_SIG(btype, avail, 1, m); - - ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); - ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); - ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); - ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); - ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); - ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); - ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); - ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); - ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); - ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); - ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); - ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); - ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); - ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); - ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); - ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); - ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); - ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); - ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); - - body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); - body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); - body.emit(assign(SubFactor02, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 2))))); - body.emit(assign(SubFactor03, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 3))))); - body.emit(assign(SubFactor04, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 2))))); - body.emit(assign(SubFactor05, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 1))))); - body.emit(assign(SubFactor06, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor07, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor08, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 2))))); - body.emit(assign(SubFactor09, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor10, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 2))))); - body.emit(assign(SubFactor11, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor12, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 1))))); - body.emit(assign(SubFactor13, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 2), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor14, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor15, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 2))))); - body.emit(assign(SubFactor16, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); - body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); - - ir_variable *adj_0 = body.make_temp(btype == glsl_type::float_type ? glsl_type::vec4_type : glsl_type::dvec4_type, "adj_0"); - - body.emit(assign(adj_0, - add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), - mul(matrix_elt(m, 1, 2), SubFactor01)), - mul(matrix_elt(m, 1, 3), SubFactor02)), - WRITEMASK_X)); - body.emit(assign(adj_0, neg( - add(sub(mul(matrix_elt(m, 1, 0), SubFactor00), - mul(matrix_elt(m, 1, 2), SubFactor03)), - mul(matrix_elt(m, 1, 3), SubFactor04))), - WRITEMASK_Y)); - body.emit(assign(adj_0, - add(sub(mul(matrix_elt(m, 1, 0), SubFactor01), - mul(matrix_elt(m, 1, 1), SubFactor03)), - mul(matrix_elt(m, 1, 3), SubFactor05)), - WRITEMASK_Z)); - body.emit(assign(adj_0, neg( - add(sub(mul(matrix_elt(m, 1, 0), SubFactor02), - mul(matrix_elt(m, 1, 1), SubFactor04)), - mul(matrix_elt(m, 1, 2), SubFactor05))), - WRITEMASK_W)); - - body.emit(ret(dot(array_ref(m, 0), adj_0))); - - return sig; -} - -ir_function_signature * -builtin_builder::_inverse_mat2(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *m = in_var(type, "m"); - MAKE_SIG(type, avail, 1, m); - - ir_variable *adj = body.make_temp(type, "adj"); - body.emit(assign(array_ref(adj, 0), matrix_elt(m, 1, 1), 1 << 0)); - body.emit(assign(array_ref(adj, 0), neg(matrix_elt(m, 0, 1)), 1 << 1)); - body.emit(assign(array_ref(adj, 1), neg(matrix_elt(m, 1, 0)), 1 << 0)); - body.emit(assign(array_ref(adj, 1), matrix_elt(m, 0, 0), 1 << 1)); - - ir_expression *det = - sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), - mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))); - - body.emit(ret(div(adj, det))); - return sig; -} - -ir_function_signature * -builtin_builder::_inverse_mat3(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *m = in_var(type, "m"); - const glsl_type *btype = type->get_base_type(); - MAKE_SIG(type, avail, 1, m); - - ir_variable *f11_22_21_12 = body.make_temp(btype, "f11_22_21_12"); - ir_variable *f10_22_20_12 = body.make_temp(btype, "f10_22_20_12"); - ir_variable *f10_21_20_11 = body.make_temp(btype, "f10_21_20_11"); - - body.emit(assign(f11_22_21_12, - sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), - mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 2))))); - body.emit(assign(f10_22_20_12, - sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), - mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); - body.emit(assign(f10_21_20_11, - sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), - mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); - - ir_variable *adj = body.make_temp(type, "adj"); - body.emit(assign(array_ref(adj, 0), f11_22_21_12, WRITEMASK_X)); - body.emit(assign(array_ref(adj, 1), neg(f10_22_20_12), WRITEMASK_X)); - body.emit(assign(array_ref(adj, 2), f10_21_20_11, WRITEMASK_X)); - - body.emit(assign(array_ref(adj, 0), neg( - sub(mul(matrix_elt(m, 0, 1), matrix_elt(m, 2, 2)), - mul(matrix_elt(m, 2, 1), matrix_elt(m, 0, 2)))), - WRITEMASK_Y)); - body.emit(assign(array_ref(adj, 1), - sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 2, 2)), - mul(matrix_elt(m, 2, 0), matrix_elt(m, 0, 2))), - WRITEMASK_Y)); - body.emit(assign(array_ref(adj, 2), neg( - sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 2, 1)), - mul(matrix_elt(m, 2, 0), matrix_elt(m, 0, 1)))), - WRITEMASK_Y)); - - body.emit(assign(array_ref(adj, 0), - sub(mul(matrix_elt(m, 0, 1), matrix_elt(m, 1, 2)), - mul(matrix_elt(m, 1, 1), matrix_elt(m, 0, 2))), - WRITEMASK_Z)); - body.emit(assign(array_ref(adj, 1), neg( - sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 2)), - mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 2)))), - WRITEMASK_Z)); - body.emit(assign(array_ref(adj, 2), - sub(mul(matrix_elt(m, 0, 0), matrix_elt(m, 1, 1)), - mul(matrix_elt(m, 1, 0), matrix_elt(m, 0, 1))), - WRITEMASK_Z)); - - ir_expression *det = - add(sub(mul(matrix_elt(m, 0, 0), f11_22_21_12), - mul(matrix_elt(m, 0, 1), f10_22_20_12)), - mul(matrix_elt(m, 0, 2), f10_21_20_11)); - - body.emit(ret(div(adj, det))); - - return sig; -} - -ir_function_signature * -builtin_builder::_inverse_mat4(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *m = in_var(type, "m"); - const glsl_type *btype = type->get_base_type(); - MAKE_SIG(type, avail, 1, m); - - ir_variable *SubFactor00 = body.make_temp(btype, "SubFactor00"); - ir_variable *SubFactor01 = body.make_temp(btype, "SubFactor01"); - ir_variable *SubFactor02 = body.make_temp(btype, "SubFactor02"); - ir_variable *SubFactor03 = body.make_temp(btype, "SubFactor03"); - ir_variable *SubFactor04 = body.make_temp(btype, "SubFactor04"); - ir_variable *SubFactor05 = body.make_temp(btype, "SubFactor05"); - ir_variable *SubFactor06 = body.make_temp(btype, "SubFactor06"); - ir_variable *SubFactor07 = body.make_temp(btype, "SubFactor07"); - ir_variable *SubFactor08 = body.make_temp(btype, "SubFactor08"); - ir_variable *SubFactor09 = body.make_temp(btype, "SubFactor09"); - ir_variable *SubFactor10 = body.make_temp(btype, "SubFactor10"); - ir_variable *SubFactor11 = body.make_temp(btype, "SubFactor11"); - ir_variable *SubFactor12 = body.make_temp(btype, "SubFactor12"); - ir_variable *SubFactor13 = body.make_temp(btype, "SubFactor13"); - ir_variable *SubFactor14 = body.make_temp(btype, "SubFactor14"); - ir_variable *SubFactor15 = body.make_temp(btype, "SubFactor15"); - ir_variable *SubFactor16 = body.make_temp(btype, "SubFactor16"); - ir_variable *SubFactor17 = body.make_temp(btype, "SubFactor17"); - ir_variable *SubFactor18 = body.make_temp(btype, "SubFactor18"); - - body.emit(assign(SubFactor00, sub(mul(matrix_elt(m, 2, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 2, 3))))); - body.emit(assign(SubFactor01, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 3))))); - body.emit(assign(SubFactor02, sub(mul(matrix_elt(m, 2, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 2, 2))))); - body.emit(assign(SubFactor03, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 3))))); - body.emit(assign(SubFactor04, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 2))))); - body.emit(assign(SubFactor05, sub(mul(matrix_elt(m, 2, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 2, 1))))); - body.emit(assign(SubFactor06, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 2), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor07, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor08, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 2))))); - body.emit(assign(SubFactor09, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor10, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 2)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 2))))); - body.emit(assign(SubFactor11, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 3, 3)), mul(matrix_elt(m, 3, 1), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor12, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 3, 1)), mul(matrix_elt(m, 3, 0), matrix_elt(m, 1, 1))))); - body.emit(assign(SubFactor13, sub(mul(matrix_elt(m, 1, 2), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 2), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor14, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor15, sub(mul(matrix_elt(m, 1, 1), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 1), matrix_elt(m, 1, 2))))); - body.emit(assign(SubFactor16, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 3)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 3))))); - body.emit(assign(SubFactor17, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 2)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 2))))); - body.emit(assign(SubFactor18, sub(mul(matrix_elt(m, 1, 0), matrix_elt(m, 2, 1)), mul(matrix_elt(m, 2, 0), matrix_elt(m, 1, 1))))); - - ir_variable *adj = body.make_temp(btype == glsl_type::float_type ? glsl_type::mat4_type : glsl_type::dmat4_type, "adj"); - body.emit(assign(array_ref(adj, 0), - add(sub(mul(matrix_elt(m, 1, 1), SubFactor00), - mul(matrix_elt(m, 1, 2), SubFactor01)), - mul(matrix_elt(m, 1, 3), SubFactor02)), - WRITEMASK_X)); - body.emit(assign(array_ref(adj, 1), neg( - add(sub(mul(matrix_elt(m, 1, 0), SubFactor00), - mul(matrix_elt(m, 1, 2), SubFactor03)), - mul(matrix_elt(m, 1, 3), SubFactor04))), - WRITEMASK_X)); - body.emit(assign(array_ref(adj, 2), - add(sub(mul(matrix_elt(m, 1, 0), SubFactor01), - mul(matrix_elt(m, 1, 1), SubFactor03)), - mul(matrix_elt(m, 1, 3), SubFactor05)), - WRITEMASK_X)); - body.emit(assign(array_ref(adj, 3), neg( - add(sub(mul(matrix_elt(m, 1, 0), SubFactor02), - mul(matrix_elt(m, 1, 1), SubFactor04)), - mul(matrix_elt(m, 1, 2), SubFactor05))), - WRITEMASK_X)); - - body.emit(assign(array_ref(adj, 0), neg( - add(sub(mul(matrix_elt(m, 0, 1), SubFactor00), - mul(matrix_elt(m, 0, 2), SubFactor01)), - mul(matrix_elt(m, 0, 3), SubFactor02))), - WRITEMASK_Y)); - body.emit(assign(array_ref(adj, 1), - add(sub(mul(matrix_elt(m, 0, 0), SubFactor00), - mul(matrix_elt(m, 0, 2), SubFactor03)), - mul(matrix_elt(m, 0, 3), SubFactor04)), - WRITEMASK_Y)); - body.emit(assign(array_ref(adj, 2), neg( - add(sub(mul(matrix_elt(m, 0, 0), SubFactor01), - mul(matrix_elt(m, 0, 1), SubFactor03)), - mul(matrix_elt(m, 0, 3), SubFactor05))), - WRITEMASK_Y)); - body.emit(assign(array_ref(adj, 3), - add(sub(mul(matrix_elt(m, 0, 0), SubFactor02), - mul(matrix_elt(m, 0, 1), SubFactor04)), - mul(matrix_elt(m, 0, 2), SubFactor05)), - WRITEMASK_Y)); - - body.emit(assign(array_ref(adj, 0), - add(sub(mul(matrix_elt(m, 0, 1), SubFactor06), - mul(matrix_elt(m, 0, 2), SubFactor07)), - mul(matrix_elt(m, 0, 3), SubFactor08)), - WRITEMASK_Z)); - body.emit(assign(array_ref(adj, 1), neg( - add(sub(mul(matrix_elt(m, 0, 0), SubFactor06), - mul(matrix_elt(m, 0, 2), SubFactor09)), - mul(matrix_elt(m, 0, 3), SubFactor10))), - WRITEMASK_Z)); - body.emit(assign(array_ref(adj, 2), - add(sub(mul(matrix_elt(m, 0, 0), SubFactor11), - mul(matrix_elt(m, 0, 1), SubFactor09)), - mul(matrix_elt(m, 0, 3), SubFactor12)), - WRITEMASK_Z)); - body.emit(assign(array_ref(adj, 3), neg( - add(sub(mul(matrix_elt(m, 0, 0), SubFactor08), - mul(matrix_elt(m, 0, 1), SubFactor10)), - mul(matrix_elt(m, 0, 2), SubFactor12))), - WRITEMASK_Z)); - - body.emit(assign(array_ref(adj, 0), neg( - add(sub(mul(matrix_elt(m, 0, 1), SubFactor13), - mul(matrix_elt(m, 0, 2), SubFactor14)), - mul(matrix_elt(m, 0, 3), SubFactor15))), - WRITEMASK_W)); - body.emit(assign(array_ref(adj, 1), - add(sub(mul(matrix_elt(m, 0, 0), SubFactor13), - mul(matrix_elt(m, 0, 2), SubFactor16)), - mul(matrix_elt(m, 0, 3), SubFactor17)), - WRITEMASK_W)); - body.emit(assign(array_ref(adj, 2), neg( - add(sub(mul(matrix_elt(m, 0, 0), SubFactor14), - mul(matrix_elt(m, 0, 1), SubFactor16)), - mul(matrix_elt(m, 0, 3), SubFactor18))), - WRITEMASK_W)); - body.emit(assign(array_ref(adj, 3), - add(sub(mul(matrix_elt(m, 0, 0), SubFactor15), - mul(matrix_elt(m, 0, 1), SubFactor17)), - mul(matrix_elt(m, 0, 2), SubFactor18)), - WRITEMASK_W)); - - ir_expression *det = - add(mul(matrix_elt(m, 0, 0), matrix_elt(adj, 0, 0)), - add(mul(matrix_elt(m, 0, 1), matrix_elt(adj, 1, 0)), - add(mul(matrix_elt(m, 0, 2), matrix_elt(adj, 2, 0)), - mul(matrix_elt(m, 0, 3), matrix_elt(adj, 3, 0))))); - - body.emit(ret(div(adj, det))); - - return sig; -} - - -ir_function_signature * -builtin_builder::_lessThan(builtin_available_predicate avail, - const glsl_type *type) -{ - return binop(avail, ir_binop_less, - glsl_type::bvec(type->vector_elements), type, type); -} - -ir_function_signature * -builtin_builder::_lessThanEqual(builtin_available_predicate avail, - const glsl_type *type) -{ - return binop(avail, ir_binop_lequal, - glsl_type::bvec(type->vector_elements), type, type); -} - -ir_function_signature * -builtin_builder::_greaterThan(builtin_available_predicate avail, - const glsl_type *type) -{ - return binop(avail, ir_binop_greater, - glsl_type::bvec(type->vector_elements), type, type); -} - -ir_function_signature * -builtin_builder::_greaterThanEqual(builtin_available_predicate avail, - const glsl_type *type) -{ - return binop(avail, ir_binop_gequal, - glsl_type::bvec(type->vector_elements), type, type); -} - -ir_function_signature * -builtin_builder::_equal(builtin_available_predicate avail, - const glsl_type *type) -{ - return binop(avail, ir_binop_equal, - glsl_type::bvec(type->vector_elements), type, type); -} - -ir_function_signature * -builtin_builder::_notEqual(builtin_available_predicate avail, - const glsl_type *type) -{ - return binop(avail, ir_binop_nequal, - glsl_type::bvec(type->vector_elements), type, type); -} - -ir_function_signature * -builtin_builder::_any(const glsl_type *type) -{ - ir_variable *v = in_var(type, "v"); - MAKE_SIG(glsl_type::bool_type, always_available, 1, v); - - const unsigned vec_elem = v->type->vector_elements; - body.emit(ret(expr(ir_binop_any_nequal, v, imm(false, vec_elem)))); - - return sig; -} - -ir_function_signature * -builtin_builder::_all(const glsl_type *type) -{ - ir_variable *v = in_var(type, "v"); - MAKE_SIG(glsl_type::bool_type, always_available, 1, v); - - const unsigned vec_elem = v->type->vector_elements; - body.emit(ret(expr(ir_binop_all_equal, v, imm(true, vec_elem)))); - - return sig; -} - -UNOP(not, ir_unop_logic_not, always_available) - -static bool -has_lod(const glsl_type *sampler_type) -{ - assert(sampler_type->is_sampler()); - - switch (sampler_type->sampler_dimensionality) { - case GLSL_SAMPLER_DIM_RECT: - case GLSL_SAMPLER_DIM_BUF: - case GLSL_SAMPLER_DIM_MS: - return false; - default: - return true; - } -} - -ir_function_signature * -builtin_builder::_textureSize(builtin_available_predicate avail, - const glsl_type *return_type, - const glsl_type *sampler_type) -{ - ir_variable *s = in_var(sampler_type, "sampler"); - /* The sampler always exists; add optional lod later. */ - MAKE_SIG(return_type, avail, 1, s); - - ir_texture *tex = new(mem_ctx) ir_texture(ir_txs); - tex->set_sampler(new(mem_ctx) ir_dereference_variable(s), return_type); - - if (has_lod(sampler_type)) { - ir_variable *lod = in_var(glsl_type::int_type, "lod"); - sig->parameters.push_tail(lod); - tex->lod_info.lod = var_ref(lod); - } else { - tex->lod_info.lod = imm(0u); - } - - body.emit(ret(tex)); - - return sig; -} - -ir_function_signature * -builtin_builder::_textureSamples(const glsl_type *sampler_type) -{ - ir_variable *s = in_var(sampler_type, "sampler"); - MAKE_SIG(glsl_type::int_type, shader_samples, 1, s); - - ir_texture *tex = new(mem_ctx) ir_texture(ir_texture_samples); - tex->set_sampler(new(mem_ctx) ir_dereference_variable(s), glsl_type::int_type); - body.emit(ret(tex)); - - return sig; -} - -ir_function_signature * -builtin_builder::_texture(ir_texture_opcode opcode, - builtin_available_predicate avail, - const glsl_type *return_type, - const glsl_type *sampler_type, - const glsl_type *coord_type, - int flags) -{ - ir_variable *s = in_var(sampler_type, "sampler"); - ir_variable *P = in_var(coord_type, "P"); - /* The sampler and coordinate always exist; add optional parameters later. */ - MAKE_SIG(return_type, avail, 2, s, P); - - ir_texture *tex = new(mem_ctx) ir_texture(opcode); - tex->set_sampler(var_ref(s), return_type); - - const int coord_size = sampler_type->coordinate_components(); - - if (coord_size == coord_type->vector_elements) { - tex->coordinate = var_ref(P); - } else { - /* The incoming coordinate also has the projector or shadow comparitor, - * so we need to swizzle those away. - */ - tex->coordinate = swizzle_for_size(P, coord_size); - } - - /* The projector is always in the last component. */ - if (flags & TEX_PROJECT) - tex->projector = swizzle(P, coord_type->vector_elements - 1, 1); - - if (sampler_type->sampler_shadow) { - if (opcode == ir_tg4) { - /* gather has refz as a separate parameter, immediately after the - * coordinate - */ - ir_variable *refz = in_var(glsl_type::float_type, "refz"); - sig->parameters.push_tail(refz); - tex->shadow_comparitor = var_ref(refz); - } else { - /* The shadow comparitor is normally in the Z component, but a few types - * have sufficiently large coordinates that it's in W. - */ - tex->shadow_comparitor = swizzle(P, MAX2(coord_size, SWIZZLE_Z), 1); - } - } - - if (opcode == ir_txl) { - ir_variable *lod = in_var(glsl_type::float_type, "lod"); - sig->parameters.push_tail(lod); - tex->lod_info.lod = var_ref(lod); - } else if (opcode == ir_txd) { - int grad_size = coord_size - (sampler_type->sampler_array ? 1 : 0); - ir_variable *dPdx = in_var(glsl_type::vec(grad_size), "dPdx"); - ir_variable *dPdy = in_var(glsl_type::vec(grad_size), "dPdy"); - sig->parameters.push_tail(dPdx); - sig->parameters.push_tail(dPdy); - tex->lod_info.grad.dPdx = var_ref(dPdx); - tex->lod_info.grad.dPdy = var_ref(dPdy); - } - - if (flags & (TEX_OFFSET | TEX_OFFSET_NONCONST)) { - int offset_size = coord_size - (sampler_type->sampler_array ? 1 : 0); - ir_variable *offset = - new(mem_ctx) ir_variable(glsl_type::ivec(offset_size), "offset", - (flags & TEX_OFFSET) ? ir_var_const_in : ir_var_function_in); - sig->parameters.push_tail(offset); - tex->offset = var_ref(offset); - } - - if (flags & TEX_OFFSET_ARRAY) { - ir_variable *offsets = - new(mem_ctx) ir_variable(glsl_type::get_array_instance(glsl_type::ivec2_type, 4), - "offsets", ir_var_const_in); - sig->parameters.push_tail(offsets); - tex->offset = var_ref(offsets); - } - - if (opcode == ir_tg4) { - if (flags & TEX_COMPONENT) { - ir_variable *component = - new(mem_ctx) ir_variable(glsl_type::int_type, "comp", ir_var_const_in); - sig->parameters.push_tail(component); - tex->lod_info.component = var_ref(component); - } - else { - tex->lod_info.component = imm(0); - } - } - - /* The "bias" parameter comes /after/ the "offset" parameter, which is - * inconsistent with both textureLodOffset and textureGradOffset. - */ - if (opcode == ir_txb) { - ir_variable *bias = in_var(glsl_type::float_type, "bias"); - sig->parameters.push_tail(bias); - tex->lod_info.bias = var_ref(bias); - } - - body.emit(ret(tex)); - - return sig; -} - -ir_function_signature * -builtin_builder::_textureCubeArrayShadow() -{ - ir_variable *s = in_var(glsl_type::samplerCubeArrayShadow_type, "sampler"); - ir_variable *P = in_var(glsl_type::vec4_type, "P"); - ir_variable *compare = in_var(glsl_type::float_type, "compare"); - MAKE_SIG(glsl_type::float_type, texture_cube_map_array, 3, s, P, compare); - - ir_texture *tex = new(mem_ctx) ir_texture(ir_tex); - tex->set_sampler(var_ref(s), glsl_type::float_type); - - tex->coordinate = var_ref(P); - tex->shadow_comparitor = var_ref(compare); - - body.emit(ret(tex)); - - return sig; -} - -ir_function_signature * -builtin_builder::_texelFetch(builtin_available_predicate avail, - const glsl_type *return_type, - const glsl_type *sampler_type, - const glsl_type *coord_type, - const glsl_type *offset_type) -{ - ir_variable *s = in_var(sampler_type, "sampler"); - ir_variable *P = in_var(coord_type, "P"); - /* The sampler and coordinate always exist; add optional parameters later. */ - MAKE_SIG(return_type, avail, 2, s, P); - - ir_texture *tex = new(mem_ctx) ir_texture(ir_txf); - tex->coordinate = var_ref(P); - tex->set_sampler(var_ref(s), return_type); - - if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { - ir_variable *sample = in_var(glsl_type::int_type, "sample"); - sig->parameters.push_tail(sample); - tex->lod_info.sample_index = var_ref(sample); - tex->op = ir_txf_ms; - } else if (has_lod(sampler_type)) { - ir_variable *lod = in_var(glsl_type::int_type, "lod"); - sig->parameters.push_tail(lod); - tex->lod_info.lod = var_ref(lod); - } else { - tex->lod_info.lod = imm(0u); - } - - if (offset_type != NULL) { - ir_variable *offset = - new(mem_ctx) ir_variable(offset_type, "offset", ir_var_const_in); - sig->parameters.push_tail(offset); - tex->offset = var_ref(offset); - } - - body.emit(ret(tex)); - - return sig; -} - -ir_function_signature * -builtin_builder::_EmitVertex() -{ - MAKE_SIG(glsl_type::void_type, gs_only, 0); - - ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); - body.emit(new(mem_ctx) ir_emit_vertex(stream)); - - return sig; -} - -ir_function_signature * -builtin_builder::_EmitStreamVertex(builtin_available_predicate avail, - const glsl_type *stream_type) -{ - /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says: - * - * "Emit the current values of output variables to the current output - * primitive on stream stream. The argument to stream must be a constant - * integral expression." - */ - ir_variable *stream = - new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); - - MAKE_SIG(glsl_type::void_type, avail, 1, stream); - - body.emit(new(mem_ctx) ir_emit_vertex(var_ref(stream))); - - return sig; -} - -ir_function_signature * -builtin_builder::_EndPrimitive() -{ - MAKE_SIG(glsl_type::void_type, gs_only, 0); - - ir_rvalue *stream = new(mem_ctx) ir_constant(0, 1); - body.emit(new(mem_ctx) ir_end_primitive(stream)); - - return sig; -} - -ir_function_signature * -builtin_builder::_EndStreamPrimitive(builtin_available_predicate avail, - const glsl_type *stream_type) -{ - /* Section 8.12 (Geometry Shader Functions) of the GLSL 4.0 spec says: - * - * "Completes the current output primitive on stream stream and starts - * a new one. The argument to stream must be a constant integral - * expression." - */ - ir_variable *stream = - new(mem_ctx) ir_variable(stream_type, "stream", ir_var_const_in); - - MAKE_SIG(glsl_type::void_type, avail, 1, stream); - - body.emit(new(mem_ctx) ir_end_primitive(var_ref(stream))); - - return sig; -} - -ir_function_signature * -builtin_builder::_barrier() -{ - MAKE_SIG(glsl_type::void_type, barrier_supported, 0); - - body.emit(new(mem_ctx) ir_barrier()); - return sig; -} - -ir_function_signature * -builtin_builder::_textureQueryLod(builtin_available_predicate avail, - const glsl_type *sampler_type, - const glsl_type *coord_type) -{ - ir_variable *s = in_var(sampler_type, "sampler"); - ir_variable *coord = in_var(coord_type, "coord"); - /* The sampler and coordinate always exist; add optional parameters later. */ - MAKE_SIG(glsl_type::vec2_type, avail, 2, s, coord); - - ir_texture *tex = new(mem_ctx) ir_texture(ir_lod); - tex->coordinate = var_ref(coord); - tex->set_sampler(var_ref(s), glsl_type::vec2_type); - - body.emit(ret(tex)); - - return sig; -} - -ir_function_signature * -builtin_builder::_textureQueryLevels(const glsl_type *sampler_type) -{ - ir_variable *s = in_var(sampler_type, "sampler"); - const glsl_type *return_type = glsl_type::int_type; - MAKE_SIG(return_type, texture_query_levels, 1, s); - - ir_texture *tex = new(mem_ctx) ir_texture(ir_query_levels); - tex->set_sampler(var_ref(s), return_type); - - body.emit(ret(tex)); - - return sig; -} - -ir_function_signature * -builtin_builder::_textureSamplesIdentical(builtin_available_predicate avail, - const glsl_type *sampler_type, - const glsl_type *coord_type) -{ - ir_variable *s = in_var(sampler_type, "sampler"); - ir_variable *P = in_var(coord_type, "P"); - const glsl_type *return_type = glsl_type::bool_type; - MAKE_SIG(return_type, avail, 2, s, P); - - ir_texture *tex = new(mem_ctx) ir_texture(ir_samples_identical); - tex->coordinate = var_ref(P); - tex->set_sampler(var_ref(s), return_type); - - body.emit(ret(tex)); - - return sig; -} - -UNOP(dFdx, ir_unop_dFdx, fs_oes_derivatives) -UNOP(dFdxCoarse, ir_unop_dFdx_coarse, fs_derivative_control) -UNOP(dFdxFine, ir_unop_dFdx_fine, fs_derivative_control) -UNOP(dFdy, ir_unop_dFdy, fs_oes_derivatives) -UNOP(dFdyCoarse, ir_unop_dFdy_coarse, fs_derivative_control) -UNOP(dFdyFine, ir_unop_dFdy_fine, fs_derivative_control) - -ir_function_signature * -builtin_builder::_fwidth(const glsl_type *type) -{ - ir_variable *p = in_var(type, "p"); - MAKE_SIG(type, fs_oes_derivatives, 1, p); - - body.emit(ret(add(abs(expr(ir_unop_dFdx, p)), abs(expr(ir_unop_dFdy, p))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_fwidthCoarse(const glsl_type *type) -{ - ir_variable *p = in_var(type, "p"); - MAKE_SIG(type, fs_derivative_control, 1, p); - - body.emit(ret(add(abs(expr(ir_unop_dFdx_coarse, p)), - abs(expr(ir_unop_dFdy_coarse, p))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_fwidthFine(const glsl_type *type) -{ - ir_variable *p = in_var(type, "p"); - MAKE_SIG(type, fs_derivative_control, 1, p); - - body.emit(ret(add(abs(expr(ir_unop_dFdx_fine, p)), - abs(expr(ir_unop_dFdy_fine, p))))); - - return sig; -} - -ir_function_signature * -builtin_builder::_noise1(const glsl_type *type) -{ - return unop(v110, ir_unop_noise, glsl_type::float_type, type); -} - -ir_function_signature * -builtin_builder::_noise2(const glsl_type *type) -{ - ir_variable *p = in_var(type, "p"); - MAKE_SIG(glsl_type::vec2_type, v110, 1, p); - - ir_constant_data b_offset; - b_offset.f[0] = 601.0f; - b_offset.f[1] = 313.0f; - b_offset.f[2] = 29.0f; - b_offset.f[3] = 277.0f; - - ir_variable *a = body.make_temp(glsl_type::float_type, "a"); - ir_variable *b = body.make_temp(glsl_type::float_type, "b"); - ir_variable *t = body.make_temp(glsl_type::vec2_type, "t"); - body.emit(assign(a, expr(ir_unop_noise, p))); - body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset))))); - body.emit(assign(t, a, WRITEMASK_X)); - body.emit(assign(t, b, WRITEMASK_Y)); - body.emit(ret(t)); - - return sig; -} - -ir_function_signature * -builtin_builder::_noise3(const glsl_type *type) -{ - ir_variable *p = in_var(type, "p"); - MAKE_SIG(glsl_type::vec3_type, v110, 1, p); - - ir_constant_data b_offset; - b_offset.f[0] = 601.0f; - b_offset.f[1] = 313.0f; - b_offset.f[2] = 29.0f; - b_offset.f[3] = 277.0f; - - ir_constant_data c_offset; - c_offset.f[0] = 1559.0f; - c_offset.f[1] = 113.0f; - c_offset.f[2] = 1861.0f; - c_offset.f[3] = 797.0f; - - ir_variable *a = body.make_temp(glsl_type::float_type, "a"); - ir_variable *b = body.make_temp(glsl_type::float_type, "b"); - ir_variable *c = body.make_temp(glsl_type::float_type, "c"); - ir_variable *t = body.make_temp(glsl_type::vec3_type, "t"); - body.emit(assign(a, expr(ir_unop_noise, p))); - body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, b_offset))))); - body.emit(assign(c, expr(ir_unop_noise, add(p, imm(type, c_offset))))); - body.emit(assign(t, a, WRITEMASK_X)); - body.emit(assign(t, b, WRITEMASK_Y)); - body.emit(assign(t, c, WRITEMASK_Z)); - body.emit(ret(t)); - - return sig; -} - -ir_function_signature * -builtin_builder::_noise4(const glsl_type *type) -{ - ir_variable *p = in_var(type, "p"); - MAKE_SIG(glsl_type::vec4_type, v110, 1, p); - - ir_variable *_p = body.make_temp(type, "_p"); - - ir_constant_data p_offset; - p_offset.f[0] = 1559.0f; - p_offset.f[1] = 113.0f; - p_offset.f[2] = 1861.0f; - p_offset.f[3] = 797.0f; - - body.emit(assign(_p, add(p, imm(type, p_offset)))); - - ir_constant_data offset; - offset.f[0] = 601.0f; - offset.f[1] = 313.0f; - offset.f[2] = 29.0f; - offset.f[3] = 277.0f; - - ir_variable *a = body.make_temp(glsl_type::float_type, "a"); - ir_variable *b = body.make_temp(glsl_type::float_type, "b"); - ir_variable *c = body.make_temp(glsl_type::float_type, "c"); - ir_variable *d = body.make_temp(glsl_type::float_type, "d"); - ir_variable *t = body.make_temp(glsl_type::vec4_type, "t"); - body.emit(assign(a, expr(ir_unop_noise, p))); - body.emit(assign(b, expr(ir_unop_noise, add(p, imm(type, offset))))); - body.emit(assign(c, expr(ir_unop_noise, _p))); - body.emit(assign(d, expr(ir_unop_noise, add(_p, imm(type, offset))))); - body.emit(assign(t, a, WRITEMASK_X)); - body.emit(assign(t, b, WRITEMASK_Y)); - body.emit(assign(t, c, WRITEMASK_Z)); - body.emit(assign(t, d, WRITEMASK_W)); - body.emit(ret(t)); - - return sig; -} - -ir_function_signature * -builtin_builder::_bitfieldExtract(const glsl_type *type) -{ - bool is_uint = type->base_type == GLSL_TYPE_UINT; - ir_variable *value = in_var(type, "value"); - ir_variable *offset = in_var(glsl_type::int_type, "offset"); - ir_variable *bits = in_var(glsl_type::int_type, "bits"); - MAKE_SIG(type, gpu_shader5_or_es31, 3, value, offset, bits); - - operand cast_offset = is_uint ? i2u(offset) : operand(offset); - operand cast_bits = is_uint ? i2u(bits) : operand(bits); - - body.emit(ret(expr(ir_triop_bitfield_extract, value, - swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements), - swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements)))); - - return sig; -} - -ir_function_signature * -builtin_builder::_bitfieldInsert(const glsl_type *type) -{ - bool is_uint = type->base_type == GLSL_TYPE_UINT; - ir_variable *base = in_var(type, "base"); - ir_variable *insert = in_var(type, "insert"); - ir_variable *offset = in_var(glsl_type::int_type, "offset"); - ir_variable *bits = in_var(glsl_type::int_type, "bits"); - MAKE_SIG(type, gpu_shader5_or_es31, 4, base, insert, offset, bits); - - operand cast_offset = is_uint ? i2u(offset) : operand(offset); - operand cast_bits = is_uint ? i2u(bits) : operand(bits); - - body.emit(ret(bitfield_insert(base, insert, - swizzle(cast_offset, SWIZZLE_XXXX, type->vector_elements), - swizzle(cast_bits, SWIZZLE_XXXX, type->vector_elements)))); - - return sig; -} - -UNOP(bitfieldReverse, ir_unop_bitfield_reverse, gpu_shader5_or_es31) - -ir_function_signature * -builtin_builder::_bitCount(const glsl_type *type) -{ - return unop(gpu_shader5_or_es31, ir_unop_bit_count, - glsl_type::ivec(type->vector_elements), type); -} - -ir_function_signature * -builtin_builder::_findLSB(const glsl_type *type) -{ - return unop(gpu_shader5_or_es31, ir_unop_find_lsb, - glsl_type::ivec(type->vector_elements), type); -} - -ir_function_signature * -builtin_builder::_findMSB(const glsl_type *type) -{ - return unop(gpu_shader5_or_es31, ir_unop_find_msb, - glsl_type::ivec(type->vector_elements), type); -} - -ir_function_signature * -builtin_builder::_fma(builtin_available_predicate avail, const glsl_type *type) -{ - ir_variable *a = in_var(type, "a"); - ir_variable *b = in_var(type, "b"); - ir_variable *c = in_var(type, "c"); - MAKE_SIG(type, avail, 3, a, b, c); - - body.emit(ret(ir_builder::fma(a, b, c))); - - return sig; -} - -ir_function_signature * -builtin_builder::_ldexp(const glsl_type *x_type, const glsl_type *exp_type) -{ - return binop(x_type->base_type == GLSL_TYPE_DOUBLE ? fp64 : gpu_shader5_or_es31, - ir_binop_ldexp, x_type, x_type, exp_type); -} - -ir_function_signature * -builtin_builder::_dfrexp(const glsl_type *x_type, const glsl_type *exp_type) -{ - ir_variable *x = in_var(x_type, "x"); - ir_variable *exponent = out_var(exp_type, "exp"); - MAKE_SIG(x_type, fp64, 2, x, exponent); - - body.emit(assign(exponent, expr(ir_unop_frexp_exp, x))); - - body.emit(ret(expr(ir_unop_frexp_sig, x))); - return sig; -} - -ir_function_signature * -builtin_builder::_frexp(const glsl_type *x_type, const glsl_type *exp_type) -{ - ir_variable *x = in_var(x_type, "x"); - ir_variable *exponent = out_var(exp_type, "exp"); - MAKE_SIG(x_type, gpu_shader5_or_es31, 2, x, exponent); - - const unsigned vec_elem = x_type->vector_elements; - const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); - const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); - - /* Single-precision floating-point values are stored as - * 1 sign bit; - * 8 exponent bits; - * 23 mantissa bits. - * - * An exponent shift of 23 will shift the mantissa out, leaving only the - * exponent and sign bit (which itself may be zero, if the absolute value - * was taken before the bitcast and shift. - */ - ir_constant *exponent_shift = imm(23); - ir_constant *exponent_bias = imm(-126, vec_elem); - - ir_constant *sign_mantissa_mask = imm(0x807fffffu, vec_elem); - - /* Exponent of floating-point values in the range [0.5, 1.0). */ - ir_constant *exponent_value = imm(0x3f000000u, vec_elem); - - ir_variable *is_not_zero = body.make_temp(bvec, "is_not_zero"); - body.emit(assign(is_not_zero, nequal(abs(x), imm(0.0f, vec_elem)))); - - /* Since abs(x) ensures that the sign bit is zero, we don't need to bitcast - * to unsigned integers to ensure that 1 bits aren't shifted in. - */ - body.emit(assign(exponent, rshift(bitcast_f2i(abs(x)), exponent_shift))); - body.emit(assign(exponent, add(exponent, csel(is_not_zero, exponent_bias, - imm(0, vec_elem))))); - - ir_variable *bits = body.make_temp(uvec, "bits"); - body.emit(assign(bits, bitcast_f2u(x))); - body.emit(assign(bits, bit_and(bits, sign_mantissa_mask))); - body.emit(assign(bits, bit_or(bits, csel(is_not_zero, exponent_value, - imm(0u, vec_elem))))); - body.emit(ret(bitcast_u2f(bits))); - - return sig; -} - -ir_function_signature * -builtin_builder::_uaddCarry(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - ir_variable *y = in_var(type, "y"); - ir_variable *carry = out_var(type, "carry"); - MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, carry); - - body.emit(assign(carry, ir_builder::carry(x, y))); - body.emit(ret(add(x, y))); - - return sig; -} - -ir_function_signature * -builtin_builder::_usubBorrow(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - ir_variable *y = in_var(type, "y"); - ir_variable *borrow = out_var(type, "borrow"); - MAKE_SIG(type, gpu_shader5_or_es31, 3, x, y, borrow); - - body.emit(assign(borrow, ir_builder::borrow(x, y))); - body.emit(ret(sub(x, y))); - - return sig; -} - -/** - * For both imulExtended() and umulExtended() built-ins. - */ -ir_function_signature * -builtin_builder::_mulExtended(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - ir_variable *y = in_var(type, "y"); - ir_variable *msb = out_var(type, "msb"); - ir_variable *lsb = out_var(type, "lsb"); - MAKE_SIG(glsl_type::void_type, gpu_shader5_or_es31, 4, x, y, msb, lsb); - - body.emit(assign(msb, imul_high(x, y))); - body.emit(assign(lsb, mul(x, y))); - - return sig; -} - -ir_function_signature * -builtin_builder::_interpolateAtCentroid(const glsl_type *type) -{ - ir_variable *interpolant = in_var(type, "interpolant"); - interpolant->data.must_be_shader_input = 1; - MAKE_SIG(type, fs_gpu_shader5, 1, interpolant); - - body.emit(ret(interpolate_at_centroid(interpolant))); - - return sig; -} - -ir_function_signature * -builtin_builder::_interpolateAtOffset(const glsl_type *type) -{ - ir_variable *interpolant = in_var(type, "interpolant"); - interpolant->data.must_be_shader_input = 1; - ir_variable *offset = in_var(glsl_type::vec2_type, "offset"); - MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, offset); - - body.emit(ret(interpolate_at_offset(interpolant, offset))); - - return sig; -} - -ir_function_signature * -builtin_builder::_interpolateAtSample(const glsl_type *type) -{ - ir_variable *interpolant = in_var(type, "interpolant"); - interpolant->data.must_be_shader_input = 1; - ir_variable *sample_num = in_var(glsl_type::int_type, "sample_num"); - MAKE_SIG(type, fs_gpu_shader5, 2, interpolant, sample_num); - - body.emit(ret(interpolate_at_sample(interpolant, sample_num))); - - return sig; -} - -ir_function_signature * -builtin_builder::_atomic_counter_intrinsic(builtin_available_predicate avail) -{ - ir_variable *counter = in_var(glsl_type::atomic_uint_type, "counter"); - MAKE_INTRINSIC(glsl_type::uint_type, avail, 1, counter); - return sig; -} - -ir_function_signature * -builtin_builder::_atomic_intrinsic2(builtin_available_predicate avail, - const glsl_type *type) -{ - ir_variable *atomic = in_var(type, "atomic"); - ir_variable *data = in_var(type, "data"); - MAKE_INTRINSIC(type, avail, 2, atomic, data); - return sig; -} - -ir_function_signature * -builtin_builder::_atomic_intrinsic3(builtin_available_predicate avail, - const glsl_type *type) -{ - ir_variable *atomic = in_var(type, "atomic"); - ir_variable *data1 = in_var(type, "data1"); - ir_variable *data2 = in_var(type, "data2"); - MAKE_INTRINSIC(type, avail, 3, atomic, data1, data2); - return sig; -} - -ir_function_signature * -builtin_builder::_atomic_counter_op(const char *intrinsic, - builtin_available_predicate avail) -{ - ir_variable *counter = in_var(glsl_type::atomic_uint_type, "atomic_counter"); - MAKE_SIG(glsl_type::uint_type, avail, 1, counter); - - ir_variable *retval = body.make_temp(glsl_type::uint_type, "atomic_retval"); - body.emit(call(shader->symbols->get_function(intrinsic), retval, - sig->parameters)); - body.emit(ret(retval)); - return sig; -} - -ir_function_signature * -builtin_builder::_atomic_op2(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type) -{ - ir_variable *atomic = in_var(type, "atomic_var"); - ir_variable *data = in_var(type, "atomic_data"); - MAKE_SIG(type, avail, 2, atomic, data); - - ir_variable *retval = body.make_temp(type, "atomic_retval"); - body.emit(call(shader->symbols->get_function(intrinsic), retval, - sig->parameters)); - body.emit(ret(retval)); - return sig; -} - -ir_function_signature * -builtin_builder::_atomic_op3(const char *intrinsic, - builtin_available_predicate avail, - const glsl_type *type) -{ - ir_variable *atomic = in_var(type, "atomic_var"); - ir_variable *data1 = in_var(type, "atomic_data1"); - ir_variable *data2 = in_var(type, "atomic_data2"); - MAKE_SIG(type, avail, 3, atomic, data1, data2); - - ir_variable *retval = body.make_temp(type, "atomic_retval"); - body.emit(call(shader->symbols->get_function(intrinsic), retval, - sig->parameters)); - body.emit(ret(retval)); - return sig; -} - -ir_function_signature * -builtin_builder::_min3(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - ir_variable *y = in_var(type, "y"); - ir_variable *z = in_var(type, "z"); - MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); - - ir_expression *min3 = min2(x, min2(y,z)); - body.emit(ret(min3)); - - return sig; -} - -ir_function_signature * -builtin_builder::_max3(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - ir_variable *y = in_var(type, "y"); - ir_variable *z = in_var(type, "z"); - MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); - - ir_expression *max3 = max2(x, max2(y,z)); - body.emit(ret(max3)); - - return sig; -} - -ir_function_signature * -builtin_builder::_mid3(const glsl_type *type) -{ - ir_variable *x = in_var(type, "x"); - ir_variable *y = in_var(type, "y"); - ir_variable *z = in_var(type, "z"); - MAKE_SIG(type, shader_trinary_minmax, 3, x, y, z); - - ir_expression *mid3 = max2(min2(x, y), max2(min2(x, z), min2(y, z))); - body.emit(ret(mid3)); - - return sig; -} - -ir_function_signature * -builtin_builder::_image_prototype(const glsl_type *image_type, - unsigned num_arguments, - unsigned flags) -{ - const glsl_type *data_type = glsl_type::get_instance( - image_type->sampler_type, - (flags & IMAGE_FUNCTION_HAS_VECTOR_DATA_TYPE ? 4 : 1), - 1); - const glsl_type *ret_type = (flags & IMAGE_FUNCTION_RETURNS_VOID ? - glsl_type::void_type : data_type); - - /* Addressing arguments that are always present. */ - ir_variable *image = in_var(image_type, "image"); - ir_variable *coord = in_var( - glsl_type::ivec(image_type->coordinate_components()), "coord"); - - const builtin_available_predicate avail = - (flags & IMAGE_FUNCTION_AVAIL_ATOMIC ? shader_image_atomic : - shader_image_load_store); - ir_function_signature *sig = new_sig(ret_type, avail, 2, image, coord); - - /* Sample index for multisample images. */ - if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) - sig->parameters.push_tail(in_var(glsl_type::int_type, "sample")); - - /* Data arguments. */ - for (unsigned i = 0; i < num_arguments; ++i) { - char *arg_name = ralloc_asprintf(NULL, "arg%d", i); - sig->parameters.push_tail(in_var(data_type, arg_name)); - ralloc_free(arg_name); - } - - /* Set the maximal set of qualifiers allowed for this image - * built-in. Function calls with arguments having fewer - * qualifiers than present in the prototype are allowed by the - * spec, but not with more, i.e. this will make the compiler - * accept everything that needs to be accepted, and reject cases - * like loads from write-only or stores to read-only images. - */ - image->data.image_read_only = (flags & IMAGE_FUNCTION_READ_ONLY) != 0; - image->data.image_write_only = (flags & IMAGE_FUNCTION_WRITE_ONLY) != 0; - image->data.image_coherent = true; - image->data.image_volatile = true; - image->data.image_restrict = true; - - return sig; -} - -ir_function_signature * -builtin_builder::_image_size_prototype(const glsl_type *image_type, - unsigned /* num_arguments */, - unsigned /* flags */) -{ - const glsl_type *ret_type; - unsigned num_components = image_type->coordinate_components(); - - /* From the ARB_shader_image_size extension: - * "Cube images return the dimensions of one face." - */ - if (image_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - !image_type->sampler_array) { - num_components = 2; - } - - /* FIXME: Add the highp precision qualifier for GLES 3.10 when it is - * supported by mesa. - */ - ret_type = glsl_type::get_instance(GLSL_TYPE_INT, num_components, 1); - - ir_variable *image = in_var(image_type, "image"); - ir_function_signature *sig = new_sig(ret_type, shader_image_size, 1, image); - - /* Set the maximal set of qualifiers allowed for this image - * built-in. Function calls with arguments having fewer - * qualifiers than present in the prototype are allowed by the - * spec, but not with more, i.e. this will make the compiler - * accept everything that needs to be accepted, and reject cases - * like loads from write-only or stores to read-only images. - */ - image->data.image_read_only = true; - image->data.image_write_only = true; - image->data.image_coherent = true; - image->data.image_volatile = true; - image->data.image_restrict = true; - - return sig; -} - -ir_function_signature * -builtin_builder::_image_samples_prototype(const glsl_type *image_type, - unsigned /* num_arguments */, - unsigned /* flags */) -{ - ir_variable *image = in_var(image_type, "image"); - ir_function_signature *sig = - new_sig(glsl_type::int_type, shader_samples, 1, image); - - /* Set the maximal set of qualifiers allowed for this image - * built-in. Function calls with arguments having fewer - * qualifiers than present in the prototype are allowed by the - * spec, but not with more, i.e. this will make the compiler - * accept everything that needs to be accepted, and reject cases - * like loads from write-only or stores to read-only images. - */ - image->data.image_read_only = true; - image->data.image_write_only = true; - image->data.image_coherent = true; - image->data.image_volatile = true; - image->data.image_restrict = true; - - return sig; -} - -ir_function_signature * -builtin_builder::_image(image_prototype_ctr prototype, - const glsl_type *image_type, - const char *intrinsic_name, - unsigned num_arguments, - unsigned flags) -{ - ir_function_signature *sig = (this->*prototype)(image_type, - num_arguments, flags); - - if (flags & IMAGE_FUNCTION_EMIT_STUB) { - ir_factory body(&sig->body, mem_ctx); - ir_function *f = shader->symbols->get_function(intrinsic_name); - - if (flags & IMAGE_FUNCTION_RETURNS_VOID) { - body.emit(call(f, NULL, sig->parameters)); - } else { - ir_variable *ret_val = - body.make_temp(sig->return_type, "_ret_val"); - body.emit(call(f, ret_val, sig->parameters)); - body.emit(ret(ret_val)); - } - - sig->is_defined = true; - - } else { - sig->is_intrinsic = true; - } - - return sig; -} - -ir_function_signature * -builtin_builder::_memory_barrier_intrinsic(builtin_available_predicate avail) -{ - MAKE_INTRINSIC(glsl_type::void_type, avail, 0); - return sig; -} - -ir_function_signature * -builtin_builder::_memory_barrier(const char *intrinsic_name, - builtin_available_predicate avail) -{ - MAKE_SIG(glsl_type::void_type, avail, 0); - body.emit(call(shader->symbols->get_function(intrinsic_name), - NULL, sig->parameters)); - return sig; -} - -ir_function_signature * -builtin_builder::_shader_clock_intrinsic(builtin_available_predicate avail, - const glsl_type *type) -{ - MAKE_INTRINSIC(type, avail, 0); - return sig; -} - -ir_function_signature * -builtin_builder::_shader_clock(builtin_available_predicate avail, - const glsl_type *type) -{ - MAKE_SIG(type, avail, 0); - - ir_variable *retval = body.make_temp(type, "clock_retval"); - - body.emit(call(shader->symbols->get_function("__intrinsic_shader_clock"), - retval, sig->parameters)); - body.emit(ret(retval)); - return sig; -} - -/** @} */ - -/******************************************************************************/ - -/* The singleton instance of builtin_builder. */ -static builtin_builder builtins; -static mtx_t builtins_lock = _MTX_INITIALIZER_NP; - -/** - * External API (exposing the built-in module to the rest of the compiler): - * @{ - */ -void -_mesa_glsl_initialize_builtin_functions() -{ - mtx_lock(&builtins_lock); - builtins.initialize(); - mtx_unlock(&builtins_lock); -} - -void -_mesa_glsl_release_builtin_functions() -{ - mtx_lock(&builtins_lock); - builtins.release(); - mtx_unlock(&builtins_lock); -} - -ir_function_signature * -_mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, - const char *name, exec_list *actual_parameters) -{ - ir_function_signature * s; - mtx_lock(&builtins_lock); - s = builtins.find(state, name, actual_parameters); - mtx_unlock(&builtins_lock); - return s; -} - -ir_function * -_mesa_glsl_find_builtin_function_by_name(const char *name) -{ - ir_function *f; - mtx_lock(&builtins_lock); - f = builtins.shader->symbols->get_function(name); - mtx_unlock(&builtins_lock); - return f; -} - -gl_shader * -_mesa_glsl_get_builtin_function_shader() -{ - return builtins.shader; -} - - -/** - * Get the function signature for main from a shader - */ -ir_function_signature * -_mesa_get_main_function_signature(gl_shader *sh) -{ - ir_function *const f = sh->symbols->get_function("main"); - if (f != NULL) { - exec_list void_parameters; - - /* Look for the 'void main()' signature and ensure that it's defined. - * This keeps the linker from accidentally pick a shader that just - * contains a prototype for main. - * - * We don't have to check for multiple definitions of main (in multiple - * shaders) because that would have already been caught above. - */ - ir_function_signature *sig = - f->matching_signature(NULL, &void_parameters, false); - if ((sig != NULL) && sig->is_defined) { - return sig; - } - } - - return NULL; -} - -/** @} */ diff --git a/src/glsl/builtin_types.cpp b/src/glsl/builtin_types.cpp deleted file mode 100644 index ee24bd5e411..00000000000 --- a/src/glsl/builtin_types.cpp +++ /dev/null @@ -1,394 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file builtin_types.cpp - * - * The glsl_type class has static members to represent all the built-in types - * (such as the glsl_type::_float_type flyweight) as well as convenience pointer - * accessors (such as glsl_type::float_type). Those global variables are - * declared and initialized in this file. - * - * This also contains _mesa_glsl_initialize_types(), a function which populates - * a symbol table with the available built-in types for a particular language - * version and set of enabled extensions. - */ - -#include "compiler/glsl_types.h" -#include "glsl_parser_extras.h" -#include "util/macros.h" - -/** - * Declarations of type flyweights (glsl_type::_foo_type) and - * convenience pointers (glsl_type::foo_type). - * @{ - */ -#define DECL_TYPE(NAME, ...) - -#define STRUCT_TYPE(NAME) \ - const glsl_type glsl_type::_struct_##NAME##_type = \ - glsl_type(NAME##_fields, ARRAY_SIZE(NAME##_fields), #NAME); \ - const glsl_type *const glsl_type::struct_##NAME##_type = \ - &glsl_type::_struct_##NAME##_type; - -static const struct glsl_struct_field gl_DepthRangeParameters_fields[] = { - glsl_struct_field(glsl_type::float_type, "near"), - glsl_struct_field(glsl_type::float_type, "far"), - glsl_struct_field(glsl_type::float_type, "diff"), -}; - -static const struct glsl_struct_field gl_PointParameters_fields[] = { - glsl_struct_field(glsl_type::float_type, "size"), - glsl_struct_field(glsl_type::float_type, "sizeMin"), - glsl_struct_field(glsl_type::float_type, "sizeMax"), - glsl_struct_field(glsl_type::float_type, "fadeThresholdSize"), - glsl_struct_field(glsl_type::float_type, "distanceConstantAttenuation"), - glsl_struct_field(glsl_type::float_type, "distanceLinearAttenuation"), - glsl_struct_field(glsl_type::float_type, "distanceQuadraticAttenuation"), -}; - -static const struct glsl_struct_field gl_MaterialParameters_fields[] = { - glsl_struct_field(glsl_type::vec4_type, "emission"), - glsl_struct_field(glsl_type::vec4_type, "ambient"), - glsl_struct_field(glsl_type::vec4_type, "diffuse"), - glsl_struct_field(glsl_type::vec4_type, "specular"), - glsl_struct_field(glsl_type::float_type, "shininess"), -}; - -static const struct glsl_struct_field gl_LightSourceParameters_fields[] = { - glsl_struct_field(glsl_type::vec4_type, "ambient"), - glsl_struct_field(glsl_type::vec4_type, "diffuse"), - glsl_struct_field(glsl_type::vec4_type, "specular"), - glsl_struct_field(glsl_type::vec4_type, "position"), - glsl_struct_field(glsl_type::vec4_type, "halfVector"), - glsl_struct_field(glsl_type::vec3_type, "spotDirection"), - glsl_struct_field(glsl_type::float_type, "spotExponent"), - glsl_struct_field(glsl_type::float_type, "spotCutoff"), - glsl_struct_field(glsl_type::float_type, "spotCosCutoff"), - glsl_struct_field(glsl_type::float_type, "constantAttenuation"), - glsl_struct_field(glsl_type::float_type, "linearAttenuation"), - glsl_struct_field(glsl_type::float_type, "quadraticAttenuation"), -}; - -static const struct glsl_struct_field gl_LightModelParameters_fields[] = { - glsl_struct_field(glsl_type::vec4_type, "ambient"), -}; - -static const struct glsl_struct_field gl_LightModelProducts_fields[] = { - glsl_struct_field(glsl_type::vec4_type, "sceneColor"), -}; - -static const struct glsl_struct_field gl_LightProducts_fields[] = { - glsl_struct_field(glsl_type::vec4_type, "ambient"), - glsl_struct_field(glsl_type::vec4_type, "diffuse"), - glsl_struct_field(glsl_type::vec4_type, "specular"), -}; - -static const struct glsl_struct_field gl_FogParameters_fields[] = { - glsl_struct_field(glsl_type::vec4_type, "color"), - glsl_struct_field(glsl_type::float_type, "density"), - glsl_struct_field(glsl_type::float_type, "start"), - glsl_struct_field(glsl_type::float_type, "end"), - glsl_struct_field(glsl_type::float_type, "scale"), -}; - -#include "compiler/builtin_type_macros.h" -/** @} */ - -/** - * Code to populate a symbol table with the built-in types available in a - * particular shading language version. The table below contains tags every - * type with the GLSL/GLSL ES versions where it was introduced. - * - * @{ - */ -#define T(TYPE, MIN_GL, MIN_ES) \ - { glsl_type::TYPE##_type, MIN_GL, MIN_ES }, - -static const struct builtin_type_versions { - const glsl_type *const type; - int min_gl; - int min_es; -} builtin_type_versions[] = { - T(void, 110, 100) - T(bool, 110, 100) - T(bvec2, 110, 100) - T(bvec3, 110, 100) - T(bvec4, 110, 100) - T(int, 110, 100) - T(ivec2, 110, 100) - T(ivec3, 110, 100) - T(ivec4, 110, 100) - T(uint, 130, 300) - T(uvec2, 130, 300) - T(uvec3, 130, 300) - T(uvec4, 130, 300) - T(float, 110, 100) - T(vec2, 110, 100) - T(vec3, 110, 100) - T(vec4, 110, 100) - T(mat2, 110, 100) - T(mat3, 110, 100) - T(mat4, 110, 100) - T(mat2x3, 120, 300) - T(mat2x4, 120, 300) - T(mat3x2, 120, 300) - T(mat3x4, 120, 300) - T(mat4x2, 120, 300) - T(mat4x3, 120, 300) - - T(double, 400, 999) - T(dvec2, 400, 999) - T(dvec3, 400, 999) - T(dvec4, 400, 999) - T(dmat2, 400, 999) - T(dmat3, 400, 999) - T(dmat4, 400, 999) - T(dmat2x3, 400, 999) - T(dmat2x4, 400, 999) - T(dmat3x2, 400, 999) - T(dmat3x4, 400, 999) - T(dmat4x2, 400, 999) - T(dmat4x3, 400, 999) - - T(sampler1D, 110, 999) - T(sampler2D, 110, 100) - T(sampler3D, 110, 300) - T(samplerCube, 110, 100) - T(sampler1DArray, 130, 999) - T(sampler2DArray, 130, 300) - T(samplerCubeArray, 400, 999) - T(sampler2DRect, 140, 999) - T(samplerBuffer, 140, 999) - T(sampler2DMS, 150, 310) - T(sampler2DMSArray, 150, 999) - - T(isampler1D, 130, 999) - T(isampler2D, 130, 300) - T(isampler3D, 130, 300) - T(isamplerCube, 130, 300) - T(isampler1DArray, 130, 999) - T(isampler2DArray, 130, 300) - T(isamplerCubeArray, 400, 999) - T(isampler2DRect, 140, 999) - T(isamplerBuffer, 140, 999) - T(isampler2DMS, 150, 310) - T(isampler2DMSArray, 150, 999) - - T(usampler1D, 130, 999) - T(usampler2D, 130, 300) - T(usampler3D, 130, 300) - T(usamplerCube, 130, 300) - T(usampler1DArray, 130, 999) - T(usampler2DArray, 130, 300) - T(usamplerCubeArray, 400, 999) - T(usampler2DRect, 140, 999) - T(usamplerBuffer, 140, 999) - T(usampler2DMS, 150, 310) - T(usampler2DMSArray, 150, 999) - - T(sampler1DShadow, 110, 999) - T(sampler2DShadow, 110, 300) - T(samplerCubeShadow, 130, 300) - T(sampler1DArrayShadow, 130, 999) - T(sampler2DArrayShadow, 130, 300) - T(samplerCubeArrayShadow, 400, 999) - T(sampler2DRectShadow, 140, 999) - - T(struct_gl_DepthRangeParameters, 110, 100) - - T(image1D, 420, 999) - T(image2D, 420, 310) - T(image3D, 420, 310) - T(image2DRect, 420, 999) - T(imageCube, 420, 310) - T(imageBuffer, 420, 999) - T(image1DArray, 420, 999) - T(image2DArray, 420, 310) - T(imageCubeArray, 420, 999) - T(image2DMS, 420, 999) - T(image2DMSArray, 420, 999) - T(iimage1D, 420, 999) - T(iimage2D, 420, 310) - T(iimage3D, 420, 310) - T(iimage2DRect, 420, 999) - T(iimageCube, 420, 310) - T(iimageBuffer, 420, 999) - T(iimage1DArray, 420, 999) - T(iimage2DArray, 420, 310) - T(iimageCubeArray, 420, 999) - T(iimage2DMS, 420, 999) - T(iimage2DMSArray, 420, 999) - T(uimage1D, 420, 999) - T(uimage2D, 420, 310) - T(uimage3D, 420, 310) - T(uimage2DRect, 420, 999) - T(uimageCube, 420, 310) - T(uimageBuffer, 420, 999) - T(uimage1DArray, 420, 999) - T(uimage2DArray, 420, 310) - T(uimageCubeArray, 420, 999) - T(uimage2DMS, 420, 999) - T(uimage2DMSArray, 420, 999) - - T(atomic_uint, 420, 310) -}; - -static const glsl_type *const deprecated_types[] = { - glsl_type::struct_gl_PointParameters_type, - glsl_type::struct_gl_MaterialParameters_type, - glsl_type::struct_gl_LightSourceParameters_type, - glsl_type::struct_gl_LightModelParameters_type, - glsl_type::struct_gl_LightModelProducts_type, - glsl_type::struct_gl_LightProducts_type, - glsl_type::struct_gl_FogParameters_type, -}; - -static inline void -add_type(glsl_symbol_table *symbols, const glsl_type *const type) -{ - symbols->add_type(type->name, type); -} - -/** - * Populate the symbol table with available built-in types. - */ -void -_mesa_glsl_initialize_types(struct _mesa_glsl_parse_state *state) -{ - struct glsl_symbol_table *symbols = state->symbols; - - for (unsigned i = 0; i < ARRAY_SIZE(builtin_type_versions); i++) { - const struct builtin_type_versions *const t = &builtin_type_versions[i]; - if (state->is_version(t->min_gl, t->min_es)) { - add_type(symbols, t->type); - } - } - - /* Add deprecated structure types. While these were deprecated in 1.30, - * they're still present. We've removed them in 1.40+ (OpenGL 3.1+). - */ - if (!state->es_shader && state->language_version < 140) { - for (unsigned i = 0; i < ARRAY_SIZE(deprecated_types); i++) { - add_type(symbols, deprecated_types[i]); - } - } - - /* Add types for enabled extensions. They may have already been added - * by the version-based loop, but attempting to add them a second time - * is harmless. - */ - if (state->ARB_texture_cube_map_array_enable) { - add_type(symbols, glsl_type::samplerCubeArray_type); - add_type(symbols, glsl_type::samplerCubeArrayShadow_type); - add_type(symbols, glsl_type::isamplerCubeArray_type); - add_type(symbols, glsl_type::usamplerCubeArray_type); - } - - if (state->ARB_texture_multisample_enable || - state->OES_texture_storage_multisample_2d_array_enable) { - add_type(symbols, glsl_type::sampler2DMS_type); - add_type(symbols, glsl_type::isampler2DMS_type); - add_type(symbols, glsl_type::usampler2DMS_type); - add_type(symbols, glsl_type::sampler2DMSArray_type); - add_type(symbols, glsl_type::isampler2DMSArray_type); - add_type(symbols, glsl_type::usampler2DMSArray_type); - } - - if (state->ARB_texture_rectangle_enable) { - add_type(symbols, glsl_type::sampler2DRect_type); - add_type(symbols, glsl_type::sampler2DRectShadow_type); - } - - if (state->EXT_texture_array_enable) { - add_type(symbols, glsl_type::sampler1DArray_type); - add_type(symbols, glsl_type::sampler2DArray_type); - add_type(symbols, glsl_type::sampler1DArrayShadow_type); - add_type(symbols, glsl_type::sampler2DArrayShadow_type); - } - - if (state->OES_EGL_image_external_enable) { - add_type(symbols, glsl_type::samplerExternalOES_type); - } - - if (state->OES_texture_3D_enable) { - add_type(symbols, glsl_type::sampler3D_type); - } - - if (state->ARB_shader_image_load_store_enable) { - add_type(symbols, glsl_type::image1D_type); - add_type(symbols, glsl_type::image2D_type); - add_type(symbols, glsl_type::image3D_type); - add_type(symbols, glsl_type::image2DRect_type); - add_type(symbols, glsl_type::imageCube_type); - add_type(symbols, glsl_type::imageBuffer_type); - add_type(symbols, glsl_type::image1DArray_type); - add_type(symbols, glsl_type::image2DArray_type); - add_type(symbols, glsl_type::imageCubeArray_type); - add_type(symbols, glsl_type::image2DMS_type); - add_type(symbols, glsl_type::image2DMSArray_type); - add_type(symbols, glsl_type::iimage1D_type); - add_type(symbols, glsl_type::iimage2D_type); - add_type(symbols, glsl_type::iimage3D_type); - add_type(symbols, glsl_type::iimage2DRect_type); - add_type(symbols, glsl_type::iimageCube_type); - add_type(symbols, glsl_type::iimageBuffer_type); - add_type(symbols, glsl_type::iimage1DArray_type); - add_type(symbols, glsl_type::iimage2DArray_type); - add_type(symbols, glsl_type::iimageCubeArray_type); - add_type(symbols, glsl_type::iimage2DMS_type); - add_type(symbols, glsl_type::iimage2DMSArray_type); - add_type(symbols, glsl_type::uimage1D_type); - add_type(symbols, glsl_type::uimage2D_type); - add_type(symbols, glsl_type::uimage3D_type); - add_type(symbols, glsl_type::uimage2DRect_type); - add_type(symbols, glsl_type::uimageCube_type); - add_type(symbols, glsl_type::uimageBuffer_type); - add_type(symbols, glsl_type::uimage1DArray_type); - add_type(symbols, glsl_type::uimage2DArray_type); - add_type(symbols, glsl_type::uimageCubeArray_type); - add_type(symbols, glsl_type::uimage2DMS_type); - add_type(symbols, glsl_type::uimage2DMSArray_type); - } - - if (state->has_atomic_counters()) { - add_type(symbols, glsl_type::atomic_uint_type); - } - - if (state->ARB_gpu_shader_fp64_enable) { - add_type(symbols, glsl_type::double_type); - add_type(symbols, glsl_type::dvec2_type); - add_type(symbols, glsl_type::dvec3_type); - add_type(symbols, glsl_type::dvec4_type); - add_type(symbols, glsl_type::dmat2_type); - add_type(symbols, glsl_type::dmat3_type); - add_type(symbols, glsl_type::dmat4_type); - add_type(symbols, glsl_type::dmat2x3_type); - add_type(symbols, glsl_type::dmat2x4_type); - add_type(symbols, glsl_type::dmat3x2_type); - add_type(symbols, glsl_type::dmat3x4_type); - add_type(symbols, glsl_type::dmat4x2_type); - add_type(symbols, glsl_type::dmat4x3_type); - } -} -/** @} */ diff --git a/src/glsl/builtin_variables.cpp b/src/glsl/builtin_variables.cpp deleted file mode 100644 index ccc04c00cea..00000000000 --- a/src/glsl/builtin_variables.cpp +++ /dev/null @@ -1,1394 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir.h" -#include "ir_builder.h" -#include "linker.h" -#include "glsl_parser_extras.h" -#include "glsl_symbol_table.h" -#include "main/core.h" -#include "main/uniforms.h" -#include "program/prog_statevars.h" -#include "program/prog_instruction.h" - -using namespace ir_builder; - -static const struct gl_builtin_uniform_element gl_NumSamples_elements[] = { - {NULL, {STATE_NUM_SAMPLES, 0, 0}, SWIZZLE_XXXX} -}; - -static const struct gl_builtin_uniform_element gl_DepthRange_elements[] = { - {"near", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_XXXX}, - {"far", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_YYYY}, - {"diff", {STATE_DEPTH_RANGE, 0, 0}, SWIZZLE_ZZZZ}, -}; - -static const struct gl_builtin_uniform_element gl_ClipPlane_elements[] = { - {NULL, {STATE_CLIPPLANE, 0, 0}, SWIZZLE_XYZW} -}; - -static const struct gl_builtin_uniform_element gl_Point_elements[] = { - {"size", {STATE_POINT_SIZE}, SWIZZLE_XXXX}, - {"sizeMin", {STATE_POINT_SIZE}, SWIZZLE_YYYY}, - {"sizeMax", {STATE_POINT_SIZE}, SWIZZLE_ZZZZ}, - {"fadeThresholdSize", {STATE_POINT_SIZE}, SWIZZLE_WWWW}, - {"distanceConstantAttenuation", {STATE_POINT_ATTENUATION}, SWIZZLE_XXXX}, - {"distanceLinearAttenuation", {STATE_POINT_ATTENUATION}, SWIZZLE_YYYY}, - {"distanceQuadraticAttenuation", {STATE_POINT_ATTENUATION}, SWIZZLE_ZZZZ}, -}; - -static const struct gl_builtin_uniform_element gl_FrontMaterial_elements[] = { - {"emission", {STATE_MATERIAL, 0, STATE_EMISSION}, SWIZZLE_XYZW}, - {"ambient", {STATE_MATERIAL, 0, STATE_AMBIENT}, SWIZZLE_XYZW}, - {"diffuse", {STATE_MATERIAL, 0, STATE_DIFFUSE}, SWIZZLE_XYZW}, - {"specular", {STATE_MATERIAL, 0, STATE_SPECULAR}, SWIZZLE_XYZW}, - {"shininess", {STATE_MATERIAL, 0, STATE_SHININESS}, SWIZZLE_XXXX}, -}; - -static const struct gl_builtin_uniform_element gl_BackMaterial_elements[] = { - {"emission", {STATE_MATERIAL, 1, STATE_EMISSION}, SWIZZLE_XYZW}, - {"ambient", {STATE_MATERIAL, 1, STATE_AMBIENT}, SWIZZLE_XYZW}, - {"diffuse", {STATE_MATERIAL, 1, STATE_DIFFUSE}, SWIZZLE_XYZW}, - {"specular", {STATE_MATERIAL, 1, STATE_SPECULAR}, SWIZZLE_XYZW}, - {"shininess", {STATE_MATERIAL, 1, STATE_SHININESS}, SWIZZLE_XXXX}, -}; - -static const struct gl_builtin_uniform_element gl_LightSource_elements[] = { - {"ambient", {STATE_LIGHT, 0, STATE_AMBIENT}, SWIZZLE_XYZW}, - {"diffuse", {STATE_LIGHT, 0, STATE_DIFFUSE}, SWIZZLE_XYZW}, - {"specular", {STATE_LIGHT, 0, STATE_SPECULAR}, SWIZZLE_XYZW}, - {"position", {STATE_LIGHT, 0, STATE_POSITION}, SWIZZLE_XYZW}, - {"halfVector", {STATE_LIGHT, 0, STATE_HALF_VECTOR}, SWIZZLE_XYZW}, - {"spotDirection", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, - MAKE_SWIZZLE4(SWIZZLE_X, - SWIZZLE_Y, - SWIZZLE_Z, - SWIZZLE_Z)}, - {"spotCosCutoff", {STATE_LIGHT, 0, STATE_SPOT_DIRECTION}, SWIZZLE_WWWW}, - {"spotCutoff", {STATE_LIGHT, 0, STATE_SPOT_CUTOFF}, SWIZZLE_XXXX}, - {"spotExponent", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_WWWW}, - {"constantAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_XXXX}, - {"linearAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_YYYY}, - {"quadraticAttenuation", {STATE_LIGHT, 0, STATE_ATTENUATION}, SWIZZLE_ZZZZ}, -}; - -static const struct gl_builtin_uniform_element gl_LightModel_elements[] = { - {"ambient", {STATE_LIGHTMODEL_AMBIENT, 0}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_FrontLightModelProduct_elements[] = { - {"sceneColor", {STATE_LIGHTMODEL_SCENECOLOR, 0}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_BackLightModelProduct_elements[] = { - {"sceneColor", {STATE_LIGHTMODEL_SCENECOLOR, 1}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_FrontLightProduct_elements[] = { - {"ambient", {STATE_LIGHTPROD, 0, 0, STATE_AMBIENT}, SWIZZLE_XYZW}, - {"diffuse", {STATE_LIGHTPROD, 0, 0, STATE_DIFFUSE}, SWIZZLE_XYZW}, - {"specular", {STATE_LIGHTPROD, 0, 0, STATE_SPECULAR}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_BackLightProduct_elements[] = { - {"ambient", {STATE_LIGHTPROD, 0, 1, STATE_AMBIENT}, SWIZZLE_XYZW}, - {"diffuse", {STATE_LIGHTPROD, 0, 1, STATE_DIFFUSE}, SWIZZLE_XYZW}, - {"specular", {STATE_LIGHTPROD, 0, 1, STATE_SPECULAR}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_TextureEnvColor_elements[] = { - {NULL, {STATE_TEXENV_COLOR, 0}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_EyePlaneS_elements[] = { - {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_S}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_EyePlaneT_elements[] = { - {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_T}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_EyePlaneR_elements[] = { - {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_R}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_EyePlaneQ_elements[] = { - {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_EYE_Q}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_ObjectPlaneS_elements[] = { - {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_S}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_ObjectPlaneT_elements[] = { - {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_T}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_ObjectPlaneR_elements[] = { - {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_R}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_ObjectPlaneQ_elements[] = { - {NULL, {STATE_TEXGEN, 0, STATE_TEXGEN_OBJECT_Q}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_Fog_elements[] = { - {"color", {STATE_FOG_COLOR}, SWIZZLE_XYZW}, - {"density", {STATE_FOG_PARAMS}, SWIZZLE_XXXX}, - {"start", {STATE_FOG_PARAMS}, SWIZZLE_YYYY}, - {"end", {STATE_FOG_PARAMS}, SWIZZLE_ZZZZ}, - {"scale", {STATE_FOG_PARAMS}, SWIZZLE_WWWW}, -}; - -static const struct gl_builtin_uniform_element gl_NormalScale_elements[] = { - {NULL, {STATE_NORMAL_SCALE}, SWIZZLE_XXXX}, -}; - -static const struct gl_builtin_uniform_element gl_FogParamsOptimizedMESA_elements[] = { - {NULL, {STATE_INTERNAL, STATE_FOG_PARAMS_OPTIMIZED}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_CurrentAttribVertMESA_elements[] = { - {NULL, {STATE_INTERNAL, STATE_CURRENT_ATTRIB, 0}, SWIZZLE_XYZW}, -}; - -static const struct gl_builtin_uniform_element gl_CurrentAttribFragMESA_elements[] = { - {NULL, {STATE_INTERNAL, STATE_CURRENT_ATTRIB_MAYBE_VP_CLAMPED, 0}, SWIZZLE_XYZW}, -}; - -#define MATRIX(name, statevar, modifier) \ - static const struct gl_builtin_uniform_element name ## _elements[] = { \ - { NULL, { statevar, 0, 0, 0, modifier}, SWIZZLE_XYZW }, \ - { NULL, { statevar, 0, 1, 1, modifier}, SWIZZLE_XYZW }, \ - { NULL, { statevar, 0, 2, 2, modifier}, SWIZZLE_XYZW }, \ - { NULL, { statevar, 0, 3, 3, modifier}, SWIZZLE_XYZW }, \ - } - -MATRIX(gl_ModelViewMatrix, - STATE_MODELVIEW_MATRIX, STATE_MATRIX_TRANSPOSE); -MATRIX(gl_ModelViewMatrixInverse, - STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVTRANS); -MATRIX(gl_ModelViewMatrixTranspose, - STATE_MODELVIEW_MATRIX, 0); -MATRIX(gl_ModelViewMatrixInverseTranspose, - STATE_MODELVIEW_MATRIX, STATE_MATRIX_INVERSE); - -MATRIX(gl_ProjectionMatrix, - STATE_PROJECTION_MATRIX, STATE_MATRIX_TRANSPOSE); -MATRIX(gl_ProjectionMatrixInverse, - STATE_PROJECTION_MATRIX, STATE_MATRIX_INVTRANS); -MATRIX(gl_ProjectionMatrixTranspose, - STATE_PROJECTION_MATRIX, 0); -MATRIX(gl_ProjectionMatrixInverseTranspose, - STATE_PROJECTION_MATRIX, STATE_MATRIX_INVERSE); - -MATRIX(gl_ModelViewProjectionMatrix, - STATE_MVP_MATRIX, STATE_MATRIX_TRANSPOSE); -MATRIX(gl_ModelViewProjectionMatrixInverse, - STATE_MVP_MATRIX, STATE_MATRIX_INVTRANS); -MATRIX(gl_ModelViewProjectionMatrixTranspose, - STATE_MVP_MATRIX, 0); -MATRIX(gl_ModelViewProjectionMatrixInverseTranspose, - STATE_MVP_MATRIX, STATE_MATRIX_INVERSE); - -MATRIX(gl_TextureMatrix, - STATE_TEXTURE_MATRIX, STATE_MATRIX_TRANSPOSE); -MATRIX(gl_TextureMatrixInverse, - STATE_TEXTURE_MATRIX, STATE_MATRIX_INVTRANS); -MATRIX(gl_TextureMatrixTranspose, - STATE_TEXTURE_MATRIX, 0); -MATRIX(gl_TextureMatrixInverseTranspose, - STATE_TEXTURE_MATRIX, STATE_MATRIX_INVERSE); - -static const struct gl_builtin_uniform_element gl_NormalMatrix_elements[] = { - { NULL, { STATE_MODELVIEW_MATRIX, 0, 0, 0, STATE_MATRIX_INVERSE}, - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) }, - { NULL, { STATE_MODELVIEW_MATRIX, 0, 1, 1, STATE_MATRIX_INVERSE}, - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) }, - { NULL, { STATE_MODELVIEW_MATRIX, 0, 2, 2, STATE_MATRIX_INVERSE}, - MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z) }, -}; - -#undef MATRIX - -#define STATEVAR(name) {#name, name ## _elements, ARRAY_SIZE(name ## _elements)} - -static const struct gl_builtin_uniform_desc _mesa_builtin_uniform_desc[] = { - STATEVAR(gl_NumSamples), - STATEVAR(gl_DepthRange), - STATEVAR(gl_ClipPlane), - STATEVAR(gl_Point), - STATEVAR(gl_FrontMaterial), - STATEVAR(gl_BackMaterial), - STATEVAR(gl_LightSource), - STATEVAR(gl_LightModel), - STATEVAR(gl_FrontLightModelProduct), - STATEVAR(gl_BackLightModelProduct), - STATEVAR(gl_FrontLightProduct), - STATEVAR(gl_BackLightProduct), - STATEVAR(gl_TextureEnvColor), - STATEVAR(gl_EyePlaneS), - STATEVAR(gl_EyePlaneT), - STATEVAR(gl_EyePlaneR), - STATEVAR(gl_EyePlaneQ), - STATEVAR(gl_ObjectPlaneS), - STATEVAR(gl_ObjectPlaneT), - STATEVAR(gl_ObjectPlaneR), - STATEVAR(gl_ObjectPlaneQ), - STATEVAR(gl_Fog), - - STATEVAR(gl_ModelViewMatrix), - STATEVAR(gl_ModelViewMatrixInverse), - STATEVAR(gl_ModelViewMatrixTranspose), - STATEVAR(gl_ModelViewMatrixInverseTranspose), - - STATEVAR(gl_ProjectionMatrix), - STATEVAR(gl_ProjectionMatrixInverse), - STATEVAR(gl_ProjectionMatrixTranspose), - STATEVAR(gl_ProjectionMatrixInverseTranspose), - - STATEVAR(gl_ModelViewProjectionMatrix), - STATEVAR(gl_ModelViewProjectionMatrixInverse), - STATEVAR(gl_ModelViewProjectionMatrixTranspose), - STATEVAR(gl_ModelViewProjectionMatrixInverseTranspose), - - STATEVAR(gl_TextureMatrix), - STATEVAR(gl_TextureMatrixInverse), - STATEVAR(gl_TextureMatrixTranspose), - STATEVAR(gl_TextureMatrixInverseTranspose), - - STATEVAR(gl_NormalMatrix), - STATEVAR(gl_NormalScale), - - STATEVAR(gl_FogParamsOptimizedMESA), - STATEVAR(gl_CurrentAttribVertMESA), - STATEVAR(gl_CurrentAttribFragMESA), - - {NULL, NULL, 0} -}; - - -namespace { - -/** - * Data structure that accumulates fields for the gl_PerVertex interface - * block. - */ -class per_vertex_accumulator -{ -public: - per_vertex_accumulator(); - void add_field(int slot, const glsl_type *type, const char *name); - const glsl_type *construct_interface_instance() const; - -private: - glsl_struct_field fields[10]; - unsigned num_fields; -}; - - -per_vertex_accumulator::per_vertex_accumulator() - : fields(), - num_fields(0) -{ -} - - -void -per_vertex_accumulator::add_field(int slot, const glsl_type *type, - const char *name) -{ - assert(this->num_fields < ARRAY_SIZE(this->fields)); - this->fields[this->num_fields].type = type; - this->fields[this->num_fields].name = name; - this->fields[this->num_fields].matrix_layout = GLSL_MATRIX_LAYOUT_INHERITED; - this->fields[this->num_fields].location = slot; - this->fields[this->num_fields].interpolation = INTERP_QUALIFIER_NONE; - this->fields[this->num_fields].centroid = 0; - this->fields[this->num_fields].sample = 0; - this->fields[this->num_fields].patch = 0; - this->fields[this->num_fields].precision = GLSL_PRECISION_NONE; - this->num_fields++; -} - - -const glsl_type * -per_vertex_accumulator::construct_interface_instance() const -{ - return glsl_type::get_interface_instance(this->fields, this->num_fields, - GLSL_INTERFACE_PACKING_STD140, - "gl_PerVertex"); -} - - -class builtin_variable_generator -{ -public: - builtin_variable_generator(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - void generate_constants(); - void generate_uniforms(); - void generate_vs_special_vars(); - void generate_tcs_special_vars(); - void generate_tes_special_vars(); - void generate_gs_special_vars(); - void generate_fs_special_vars(); - void generate_cs_special_vars(); - void generate_varyings(); - -private: - const glsl_type *array(const glsl_type *base, unsigned elements) - { - return glsl_type::get_array_instance(base, elements); - } - - const glsl_type *type(const char *name) - { - return symtab->get_type(name); - } - - ir_variable *add_input(int slot, const glsl_type *type, const char *name) - { - return add_variable(name, type, ir_var_shader_in, slot); - } - - ir_variable *add_output(int slot, const glsl_type *type, const char *name) - { - return add_variable(name, type, ir_var_shader_out, slot); - } - - ir_variable *add_index_output(int slot, int index, const glsl_type *type, const char *name) - { - return add_index_variable(name, type, ir_var_shader_out, slot, index); - } - - ir_variable *add_system_value(int slot, const glsl_type *type, - const char *name) - { - return add_variable(name, type, ir_var_system_value, slot); - } - - ir_variable *add_variable(const char *name, const glsl_type *type, - enum ir_variable_mode mode, int slot); - ir_variable *add_index_variable(const char *name, const glsl_type *type, - enum ir_variable_mode mode, int slot, int index); - ir_variable *add_uniform(const glsl_type *type, const char *name); - ir_variable *add_const(const char *name, int value); - ir_variable *add_const_ivec3(const char *name, int x, int y, int z); - void add_varying(int slot, const glsl_type *type, const char *name); - - exec_list * const instructions; - struct _mesa_glsl_parse_state * const state; - glsl_symbol_table * const symtab; - - /** - * True if compatibility-profile-only variables should be included. (In - * desktop GL, these are always included when the GLSL version is 1.30 and - * or below). - */ - const bool compatibility; - - const glsl_type * const bool_t; - const glsl_type * const int_t; - const glsl_type * const uint_t; - const glsl_type * const float_t; - const glsl_type * const vec2_t; - const glsl_type * const vec3_t; - const glsl_type * const vec4_t; - const glsl_type * const uvec3_t; - const glsl_type * const mat3_t; - const glsl_type * const mat4_t; - - per_vertex_accumulator per_vertex_in; - per_vertex_accumulator per_vertex_out; -}; - - -builtin_variable_generator::builtin_variable_generator( - exec_list *instructions, struct _mesa_glsl_parse_state *state) - : instructions(instructions), state(state), symtab(state->symbols), - compatibility(!state->is_version(140, 100)), - bool_t(glsl_type::bool_type), int_t(glsl_type::int_type), - uint_t(glsl_type::uint_type), - float_t(glsl_type::float_type), vec2_t(glsl_type::vec2_type), - vec3_t(glsl_type::vec3_type), vec4_t(glsl_type::vec4_type), - uvec3_t(glsl_type::uvec3_type), - mat3_t(glsl_type::mat3_type), mat4_t(glsl_type::mat4_type) -{ -} - -ir_variable * -builtin_variable_generator::add_index_variable(const char *name, - const glsl_type *type, - enum ir_variable_mode mode, int slot, int index) -{ - ir_variable *var = new(symtab) ir_variable(type, name, mode); - var->data.how_declared = ir_var_declared_implicitly; - - switch (var->data.mode) { - case ir_var_auto: - case ir_var_shader_in: - case ir_var_uniform: - case ir_var_system_value: - var->data.read_only = true; - break; - case ir_var_shader_out: - case ir_var_shader_storage: - break; - default: - /* The only variables that are added using this function should be - * uniforms, shader storage, shader inputs, and shader outputs, constants - * (which use ir_var_auto), and system values. - */ - assert(0); - break; - } - - var->data.location = slot; - var->data.explicit_location = (slot >= 0); - var->data.explicit_index = 1; - var->data.index = index; - - /* Once the variable is created an initialized, add it to the symbol table - * and add the declaration to the IR stream. - */ - instructions->push_tail(var); - - symtab->add_variable(var); - return var; -} - -ir_variable * -builtin_variable_generator::add_variable(const char *name, - const glsl_type *type, - enum ir_variable_mode mode, int slot) -{ - ir_variable *var = new(symtab) ir_variable(type, name, mode); - var->data.how_declared = ir_var_declared_implicitly; - - switch (var->data.mode) { - case ir_var_auto: - case ir_var_shader_in: - case ir_var_uniform: - case ir_var_system_value: - var->data.read_only = true; - break; - case ir_var_shader_out: - case ir_var_shader_storage: - break; - default: - /* The only variables that are added using this function should be - * uniforms, shader storage, shader inputs, and shader outputs, constants - * (which use ir_var_auto), and system values. - */ - assert(0); - break; - } - - var->data.location = slot; - var->data.explicit_location = (slot >= 0); - var->data.explicit_index = 0; - - /* Once the variable is created an initialized, add it to the symbol table - * and add the declaration to the IR stream. - */ - instructions->push_tail(var); - - symtab->add_variable(var); - return var; -} - - -ir_variable * -builtin_variable_generator::add_uniform(const glsl_type *type, - const char *name) -{ - ir_variable *const uni = add_variable(name, type, ir_var_uniform, -1); - - unsigned i; - for (i = 0; _mesa_builtin_uniform_desc[i].name != NULL; i++) { - if (strcmp(_mesa_builtin_uniform_desc[i].name, name) == 0) { - break; - } - } - - assert(_mesa_builtin_uniform_desc[i].name != NULL); - const struct gl_builtin_uniform_desc* const statevar = - &_mesa_builtin_uniform_desc[i]; - - const unsigned array_count = type->is_array() ? type->length : 1; - - ir_state_slot *slots = - uni->allocate_state_slots(array_count * statevar->num_elements); - - for (unsigned a = 0; a < array_count; a++) { - for (unsigned j = 0; j < statevar->num_elements; j++) { - const struct gl_builtin_uniform_element *element = - &statevar->elements[j]; - - memcpy(slots->tokens, element->tokens, sizeof(element->tokens)); - if (type->is_array()) { - if (strcmp(name, "gl_CurrentAttribVertMESA") == 0 || - strcmp(name, "gl_CurrentAttribFragMESA") == 0) { - slots->tokens[2] = a; - } else { - slots->tokens[1] = a; - } - } - - slots->swizzle = element->swizzle; - slots++; - } - } - - return uni; -} - - -ir_variable * -builtin_variable_generator::add_const(const char *name, int value) -{ - ir_variable *const var = add_variable(name, glsl_type::int_type, - ir_var_auto, -1); - var->constant_value = new(var) ir_constant(value); - var->constant_initializer = new(var) ir_constant(value); - var->data.has_initializer = true; - return var; -} - - -ir_variable * -builtin_variable_generator::add_const_ivec3(const char *name, int x, int y, - int z) -{ - ir_variable *const var = add_variable(name, glsl_type::ivec3_type, - ir_var_auto, -1); - ir_constant_data data; - memset(&data, 0, sizeof(data)); - data.i[0] = x; - data.i[1] = y; - data.i[2] = z; - var->constant_value = new(var) ir_constant(glsl_type::ivec3_type, &data); - var->constant_initializer = - new(var) ir_constant(glsl_type::ivec3_type, &data); - var->data.has_initializer = true; - return var; -} - - -void -builtin_variable_generator::generate_constants() -{ - add_const("gl_MaxVertexAttribs", state->Const.MaxVertexAttribs); - add_const("gl_MaxVertexTextureImageUnits", - state->Const.MaxVertexTextureImageUnits); - add_const("gl_MaxCombinedTextureImageUnits", - state->Const.MaxCombinedTextureImageUnits); - add_const("gl_MaxTextureImageUnits", state->Const.MaxTextureImageUnits); - add_const("gl_MaxDrawBuffers", state->Const.MaxDrawBuffers); - - /* Max uniforms/varyings: GLSL ES counts these in units of vectors; desktop - * GL counts them in units of "components" or "floats". - */ - if (state->es_shader) { - add_const("gl_MaxVertexUniformVectors", - state->Const.MaxVertexUniformComponents / 4); - add_const("gl_MaxFragmentUniformVectors", - state->Const.MaxFragmentUniformComponents / 4); - - /* In GLSL ES 3.00, gl_MaxVaryingVectors was split out to separate - * vertex and fragment shader constants. - */ - if (state->is_version(0, 300)) { - add_const("gl_MaxVertexOutputVectors", - state->ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4); - add_const("gl_MaxFragmentInputVectors", - state->ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents / 4); - } else { - add_const("gl_MaxVaryingVectors", - state->ctx->Const.MaxVarying); - } - - /* EXT_blend_func_extended brings a built in constant - * for determining number of dual source draw buffers - */ - if (state->EXT_blend_func_extended_enable) { - add_const("gl_MaxDualSourceDrawBuffersEXT", - state->Const.MaxDualSourceDrawBuffers); - } - } else { - add_const("gl_MaxVertexUniformComponents", - state->Const.MaxVertexUniformComponents); - - /* Note: gl_MaxVaryingFloats was deprecated in GLSL 1.30+, but not - * removed - */ - add_const("gl_MaxVaryingFloats", state->ctx->Const.MaxVarying * 4); - - add_const("gl_MaxFragmentUniformComponents", - state->Const.MaxFragmentUniformComponents); - } - - /* Texel offsets were introduced in ARB_shading_language_420pack (which - * requires desktop GLSL version 130), and adopted into desktop GLSL - * version 4.20 and GLSL ES version 3.00. - */ - if ((state->is_version(130, 0) && - state->ARB_shading_language_420pack_enable) || - state->is_version(420, 300)) { - add_const("gl_MinProgramTexelOffset", - state->Const.MinProgramTexelOffset); - add_const("gl_MaxProgramTexelOffset", - state->Const.MaxProgramTexelOffset); - } - - if (state->is_version(130, 0)) { - add_const("gl_MaxClipDistances", state->Const.MaxClipPlanes); - add_const("gl_MaxVaryingComponents", state->ctx->Const.MaxVarying * 4); - } - - if (state->has_geometry_shader()) { - add_const("gl_MaxVertexOutputComponents", - state->Const.MaxVertexOutputComponents); - add_const("gl_MaxGeometryInputComponents", - state->Const.MaxGeometryInputComponents); - add_const("gl_MaxGeometryOutputComponents", - state->Const.MaxGeometryOutputComponents); - add_const("gl_MaxFragmentInputComponents", - state->Const.MaxFragmentInputComponents); - add_const("gl_MaxGeometryTextureImageUnits", - state->Const.MaxGeometryTextureImageUnits); - add_const("gl_MaxGeometryOutputVertices", - state->Const.MaxGeometryOutputVertices); - add_const("gl_MaxGeometryTotalOutputComponents", - state->Const.MaxGeometryTotalOutputComponents); - add_const("gl_MaxGeometryUniformComponents", - state->Const.MaxGeometryUniformComponents); - - /* Note: the GLSL 1.50-4.40 specs require - * gl_MaxGeometryVaryingComponents to be present, and to be at least 64. - * But they do not define what it means (and there does not appear to be - * any corresponding constant in the GL specs). However, - * ARB_geometry_shader4 defines MAX_GEOMETRY_VARYING_COMPONENTS_ARB to - * be the maximum number of components available for use as geometry - * outputs. So we assume this is a synonym for - * gl_MaxGeometryOutputComponents. - */ - add_const("gl_MaxGeometryVaryingComponents", - state->Const.MaxGeometryOutputComponents); - } - - if (compatibility) { - /* Note: gl_MaxLights stopped being listed as an explicit constant in - * GLSL 1.30, however it continues to be referred to (as a minimum size - * for compatibility-mode uniforms) all the way up through GLSL 4.30, so - * this seems like it was probably an oversight. - */ - add_const("gl_MaxLights", state->Const.MaxLights); - - add_const("gl_MaxClipPlanes", state->Const.MaxClipPlanes); - - /* Note: gl_MaxTextureUnits wasn't made compatibility-only until GLSL - * 1.50, however this seems like it was probably an oversight. - */ - add_const("gl_MaxTextureUnits", state->Const.MaxTextureUnits); - - /* Note: gl_MaxTextureCoords was left out of GLSL 1.40, but it was - * re-introduced in GLSL 1.50, so this seems like it was probably an - * oversight. - */ - add_const("gl_MaxTextureCoords", state->Const.MaxTextureCoords); - } - - if (state->has_atomic_counters()) { - add_const("gl_MaxVertexAtomicCounters", - state->Const.MaxVertexAtomicCounters); - add_const("gl_MaxFragmentAtomicCounters", - state->Const.MaxFragmentAtomicCounters); - add_const("gl_MaxCombinedAtomicCounters", - state->Const.MaxCombinedAtomicCounters); - add_const("gl_MaxAtomicCounterBindings", - state->Const.MaxAtomicBufferBindings); - - if (state->has_geometry_shader()) { - add_const("gl_MaxGeometryAtomicCounters", - state->Const.MaxGeometryAtomicCounters); - } - if (!state->es_shader) { - add_const("gl_MaxTessControlAtomicCounters", - state->Const.MaxTessControlAtomicCounters); - add_const("gl_MaxTessEvaluationAtomicCounters", - state->Const.MaxTessEvaluationAtomicCounters); - } - } - - if (state->is_version(420, 310)) { - add_const("gl_MaxVertexAtomicCounterBuffers", - state->Const.MaxVertexAtomicCounterBuffers); - add_const("gl_MaxFragmentAtomicCounterBuffers", - state->Const.MaxFragmentAtomicCounterBuffers); - add_const("gl_MaxCombinedAtomicCounterBuffers", - state->Const.MaxCombinedAtomicCounterBuffers); - add_const("gl_MaxAtomicCounterBufferSize", - state->Const.MaxAtomicCounterBufferSize); - - if (state->has_geometry_shader()) { - add_const("gl_MaxGeometryAtomicCounterBuffers", - state->Const.MaxGeometryAtomicCounterBuffers); - } - if (!state->es_shader) { - add_const("gl_MaxTessControlAtomicCounterBuffers", - state->Const.MaxTessControlAtomicCounterBuffers); - add_const("gl_MaxTessEvaluationAtomicCounterBuffers", - state->Const.MaxTessEvaluationAtomicCounterBuffers); - } - } - - if (state->is_version(430, 310) || state->ARB_compute_shader_enable) { - add_const("gl_MaxComputeAtomicCounterBuffers", MAX_COMPUTE_ATOMIC_COUNTER_BUFFERS); - add_const("gl_MaxComputeAtomicCounters", MAX_COMPUTE_ATOMIC_COUNTERS); - add_const("gl_MaxComputeImageUniforms", MAX_COMPUTE_IMAGE_UNIFORMS); - add_const("gl_MaxComputeTextureImageUnits", MAX_COMPUTE_TEXTURE_IMAGE_UNITS); - add_const("gl_MaxComputeUniformComponents", MAX_COMPUTE_UNIFORM_COMPONENTS); - - add_const_ivec3("gl_MaxComputeWorkGroupCount", - state->Const.MaxComputeWorkGroupCount[0], - state->Const.MaxComputeWorkGroupCount[1], - state->Const.MaxComputeWorkGroupCount[2]); - add_const_ivec3("gl_MaxComputeWorkGroupSize", - state->Const.MaxComputeWorkGroupSize[0], - state->Const.MaxComputeWorkGroupSize[1], - state->Const.MaxComputeWorkGroupSize[2]); - - /* From the GLSL 4.40 spec, section 7.1 (Built-In Language Variables): - * - * The built-in constant gl_WorkGroupSize is a compute-shader - * constant containing the local work-group size of the shader. The - * size of the work group in the X, Y, and Z dimensions is stored in - * the x, y, and z components. The constants values in - * gl_WorkGroupSize will match those specified in the required - * local_size_x, local_size_y, and local_size_z layout qualifiers - * for the current shader. This is a constant so that it can be - * used to size arrays of memory that can be shared within the local - * work group. It is a compile-time error to use gl_WorkGroupSize - * in a shader that does not declare a fixed local group size, or - * before that shader has declared a fixed local group size, using - * local_size_x, local_size_y, and local_size_z. - * - * To prevent the shader from trying to refer to gl_WorkGroupSize before - * the layout declaration, we don't define it here. Intead we define it - * in ast_cs_input_layout::hir(). - */ - } - - if (state->is_version(420, 310) || - state->ARB_shader_image_load_store_enable) { - add_const("gl_MaxImageUnits", - state->Const.MaxImageUnits); - add_const("gl_MaxVertexImageUniforms", - state->Const.MaxVertexImageUniforms); - add_const("gl_MaxFragmentImageUniforms", - state->Const.MaxFragmentImageUniforms); - add_const("gl_MaxCombinedImageUniforms", - state->Const.MaxCombinedImageUniforms); - - if (state->has_geometry_shader()) { - add_const("gl_MaxGeometryImageUniforms", - state->Const.MaxGeometryImageUniforms); - } - - if (!state->es_shader) { - add_const("gl_MaxCombinedImageUnitsAndFragmentOutputs", - state->Const.MaxCombinedShaderOutputResources); - add_const("gl_MaxImageSamples", - state->Const.MaxImageSamples); - } - - if (state->is_version(450, 310)) { - add_const("gl_MaxCombinedShaderOutputResources", - state->Const.MaxCombinedShaderOutputResources); - } - - if (state->is_version(400, 0) || - state->ARB_tessellation_shader_enable) { - add_const("gl_MaxTessControlImageUniforms", - state->Const.MaxTessControlImageUniforms); - add_const("gl_MaxTessEvaluationImageUniforms", - state->Const.MaxTessEvaluationImageUniforms); - } - } - - if (state->is_version(410, 0) || - state->ARB_viewport_array_enable) - add_const("gl_MaxViewports", state->Const.MaxViewports); - - if (state->is_version(400, 0) || - state->ARB_tessellation_shader_enable) { - add_const("gl_MaxPatchVertices", state->Const.MaxPatchVertices); - add_const("gl_MaxTessGenLevel", state->Const.MaxTessGenLevel); - add_const("gl_MaxTessControlInputComponents", state->Const.MaxTessControlInputComponents); - add_const("gl_MaxTessControlOutputComponents", state->Const.MaxTessControlOutputComponents); - add_const("gl_MaxTessControlTextureImageUnits", state->Const.MaxTessControlTextureImageUnits); - add_const("gl_MaxTessEvaluationInputComponents", state->Const.MaxTessEvaluationInputComponents); - add_const("gl_MaxTessEvaluationOutputComponents", state->Const.MaxTessEvaluationOutputComponents); - add_const("gl_MaxTessEvaluationTextureImageUnits", state->Const.MaxTessEvaluationTextureImageUnits); - add_const("gl_MaxTessPatchComponents", state->Const.MaxTessPatchComponents); - add_const("gl_MaxTessControlTotalOutputComponents", state->Const.MaxTessControlTotalOutputComponents); - add_const("gl_MaxTessControlUniformComponents", state->Const.MaxTessControlUniformComponents); - add_const("gl_MaxTessEvaluationUniformComponents", state->Const.MaxTessEvaluationUniformComponents); - } -} - - -/** - * Generate uniform variables (which exist in all types of shaders). - */ -void -builtin_variable_generator::generate_uniforms() -{ - if (state->is_version(400, 0) || state->ARB_sample_shading_enable) - add_uniform(int_t, "gl_NumSamples"); - add_uniform(type("gl_DepthRangeParameters"), "gl_DepthRange"); - add_uniform(array(vec4_t, VERT_ATTRIB_MAX), "gl_CurrentAttribVertMESA"); - add_uniform(array(vec4_t, VARYING_SLOT_MAX), "gl_CurrentAttribFragMESA"); - - if (compatibility) { - add_uniform(mat4_t, "gl_ModelViewMatrix"); - add_uniform(mat4_t, "gl_ProjectionMatrix"); - add_uniform(mat4_t, "gl_ModelViewProjectionMatrix"); - add_uniform(mat3_t, "gl_NormalMatrix"); - add_uniform(mat4_t, "gl_ModelViewMatrixInverse"); - add_uniform(mat4_t, "gl_ProjectionMatrixInverse"); - add_uniform(mat4_t, "gl_ModelViewProjectionMatrixInverse"); - add_uniform(mat4_t, "gl_ModelViewMatrixTranspose"); - add_uniform(mat4_t, "gl_ProjectionMatrixTranspose"); - add_uniform(mat4_t, "gl_ModelViewProjectionMatrixTranspose"); - add_uniform(mat4_t, "gl_ModelViewMatrixInverseTranspose"); - add_uniform(mat4_t, "gl_ProjectionMatrixInverseTranspose"); - add_uniform(mat4_t, "gl_ModelViewProjectionMatrixInverseTranspose"); - add_uniform(float_t, "gl_NormalScale"); - add_uniform(type("gl_LightModelParameters"), "gl_LightModel"); - add_uniform(vec4_t, "gl_FogParamsOptimizedMESA"); - - const glsl_type *const mat4_array_type = - array(mat4_t, state->Const.MaxTextureCoords); - add_uniform(mat4_array_type, "gl_TextureMatrix"); - add_uniform(mat4_array_type, "gl_TextureMatrixInverse"); - add_uniform(mat4_array_type, "gl_TextureMatrixTranspose"); - add_uniform(mat4_array_type, "gl_TextureMatrixInverseTranspose"); - - add_uniform(array(vec4_t, state->Const.MaxClipPlanes), "gl_ClipPlane"); - add_uniform(type("gl_PointParameters"), "gl_Point"); - - const glsl_type *const material_parameters_type = - type("gl_MaterialParameters"); - add_uniform(material_parameters_type, "gl_FrontMaterial"); - add_uniform(material_parameters_type, "gl_BackMaterial"); - - add_uniform(array(type("gl_LightSourceParameters"), - state->Const.MaxLights), - "gl_LightSource"); - - const glsl_type *const light_model_products_type = - type("gl_LightModelProducts"); - add_uniform(light_model_products_type, "gl_FrontLightModelProduct"); - add_uniform(light_model_products_type, "gl_BackLightModelProduct"); - - const glsl_type *const light_products_type = - array(type("gl_LightProducts"), state->Const.MaxLights); - add_uniform(light_products_type, "gl_FrontLightProduct"); - add_uniform(light_products_type, "gl_BackLightProduct"); - - add_uniform(array(vec4_t, state->Const.MaxTextureUnits), - "gl_TextureEnvColor"); - - const glsl_type *const texcoords_vec4 = - array(vec4_t, state->Const.MaxTextureCoords); - add_uniform(texcoords_vec4, "gl_EyePlaneS"); - add_uniform(texcoords_vec4, "gl_EyePlaneT"); - add_uniform(texcoords_vec4, "gl_EyePlaneR"); - add_uniform(texcoords_vec4, "gl_EyePlaneQ"); - add_uniform(texcoords_vec4, "gl_ObjectPlaneS"); - add_uniform(texcoords_vec4, "gl_ObjectPlaneT"); - add_uniform(texcoords_vec4, "gl_ObjectPlaneR"); - add_uniform(texcoords_vec4, "gl_ObjectPlaneQ"); - - add_uniform(type("gl_FogParameters"), "gl_Fog"); - } -} - - -/** - * Generate variables which only exist in vertex shaders. - */ -void -builtin_variable_generator::generate_vs_special_vars() -{ - ir_variable *var; - - if (state->is_version(130, 300)) - add_system_value(SYSTEM_VALUE_VERTEX_ID, int_t, "gl_VertexID"); - if (state->ARB_draw_instanced_enable) - add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceIDARB"); - if (state->ARB_draw_instanced_enable || state->is_version(140, 300)) - add_system_value(SYSTEM_VALUE_INSTANCE_ID, int_t, "gl_InstanceID"); - if (state->ARB_shader_draw_parameters_enable) { - add_system_value(SYSTEM_VALUE_BASE_VERTEX, int_t, "gl_BaseVertexARB"); - add_system_value(SYSTEM_VALUE_BASE_INSTANCE, int_t, "gl_BaseInstanceARB"); - add_system_value(SYSTEM_VALUE_DRAW_ID, int_t, "gl_DrawIDARB"); - } - if (state->AMD_vertex_shader_layer_enable) { - var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - var->data.interpolation = INTERP_QUALIFIER_FLAT; - } - if (state->AMD_vertex_shader_viewport_index_enable) { - var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); - var->data.interpolation = INTERP_QUALIFIER_FLAT; - } - if (compatibility) { - add_input(VERT_ATTRIB_POS, vec4_t, "gl_Vertex"); - add_input(VERT_ATTRIB_NORMAL, vec3_t, "gl_Normal"); - add_input(VERT_ATTRIB_COLOR0, vec4_t, "gl_Color"); - add_input(VERT_ATTRIB_COLOR1, vec4_t, "gl_SecondaryColor"); - add_input(VERT_ATTRIB_TEX0, vec4_t, "gl_MultiTexCoord0"); - add_input(VERT_ATTRIB_TEX1, vec4_t, "gl_MultiTexCoord1"); - add_input(VERT_ATTRIB_TEX2, vec4_t, "gl_MultiTexCoord2"); - add_input(VERT_ATTRIB_TEX3, vec4_t, "gl_MultiTexCoord3"); - add_input(VERT_ATTRIB_TEX4, vec4_t, "gl_MultiTexCoord4"); - add_input(VERT_ATTRIB_TEX5, vec4_t, "gl_MultiTexCoord5"); - add_input(VERT_ATTRIB_TEX6, vec4_t, "gl_MultiTexCoord6"); - add_input(VERT_ATTRIB_TEX7, vec4_t, "gl_MultiTexCoord7"); - add_input(VERT_ATTRIB_FOG, float_t, "gl_FogCoord"); - } -} - - -/** - * Generate variables which only exist in tessellation control shaders. - */ -void -builtin_variable_generator::generate_tcs_special_vars() -{ - add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); - add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn"); - add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); - - add_output(VARYING_SLOT_TESS_LEVEL_OUTER, array(float_t, 4), - "gl_TessLevelOuter")->data.patch = 1; - add_output(VARYING_SLOT_TESS_LEVEL_INNER, array(float_t, 2), - "gl_TessLevelInner")->data.patch = 1; -} - - -/** - * Generate variables which only exist in tessellation evaluation shaders. - */ -void -builtin_variable_generator::generate_tes_special_vars() -{ - add_system_value(SYSTEM_VALUE_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); - add_system_value(SYSTEM_VALUE_VERTICES_IN, int_t, "gl_PatchVerticesIn"); - add_system_value(SYSTEM_VALUE_TESS_COORD, vec3_t, "gl_TessCoord"); - add_system_value(SYSTEM_VALUE_TESS_LEVEL_OUTER, array(float_t, 4), - "gl_TessLevelOuter"); - add_system_value(SYSTEM_VALUE_TESS_LEVEL_INNER, array(float_t, 2), - "gl_TessLevelInner"); -} - - -/** - * Generate variables which only exist in geometry shaders. - */ -void -builtin_variable_generator::generate_gs_special_vars() -{ - ir_variable *var; - - var = add_output(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - var->data.interpolation = INTERP_QUALIFIER_FLAT; - if (state->is_version(410, 0) || state->ARB_viewport_array_enable) { - var = add_output(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); - var->data.interpolation = INTERP_QUALIFIER_FLAT; - } - if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) - add_system_value(SYSTEM_VALUE_INVOCATION_ID, int_t, "gl_InvocationID"); - - /* Although gl_PrimitiveID appears in tessellation control and tessellation - * evaluation shaders, it has a different function there than it has in - * geometry shaders, so we treat it (and its counterpart gl_PrimitiveIDIn) - * as special geometry shader variables. - * - * Note that although the general convention of suffixing geometry shader - * input varyings with "In" was not adopted into GLSL 1.50, it is used in - * the specific case of gl_PrimitiveIDIn. So we don't need to treat - * gl_PrimitiveIDIn as an {ARB,EXT}_geometry_shader4-only variable. - */ - var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveIDIn"); - var->data.interpolation = INTERP_QUALIFIER_FLAT; - var = add_output(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); - var->data.interpolation = INTERP_QUALIFIER_FLAT; -} - - -/** - * Generate variables which only exist in fragment shaders. - */ -void -builtin_variable_generator::generate_fs_special_vars() -{ - ir_variable *var; - - if (this->state->ctx->Const.GLSLFragCoordIsSysVal) - add_system_value(SYSTEM_VALUE_FRAG_COORD, vec4_t, "gl_FragCoord"); - else - add_input(VARYING_SLOT_POS, vec4_t, "gl_FragCoord"); - - if (this->state->ctx->Const.GLSLFrontFacingIsSysVal) - add_system_value(SYSTEM_VALUE_FRONT_FACE, bool_t, "gl_FrontFacing"); - else - add_input(VARYING_SLOT_FACE, bool_t, "gl_FrontFacing"); - - if (state->is_version(120, 100)) - add_input(VARYING_SLOT_PNTC, vec2_t, "gl_PointCoord"); - - if (state->has_geometry_shader()) { - var = add_input(VARYING_SLOT_PRIMITIVE_ID, int_t, "gl_PrimitiveID"); - var->data.interpolation = INTERP_QUALIFIER_FLAT; - } - - /* gl_FragColor and gl_FragData were deprecated starting in desktop GLSL - * 1.30, and were relegated to the compatibility profile in GLSL 4.20. - * They were removed from GLSL ES 3.00. - */ - if (compatibility || !state->is_version(420, 300)) { - add_output(FRAG_RESULT_COLOR, vec4_t, "gl_FragColor"); - add_output(FRAG_RESULT_DATA0, - array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData"); - } - - if (state->es_shader && state->language_version == 100 && state->EXT_blend_func_extended_enable) { - /* We make an assumption here that there will only ever be one dual-source draw buffer - * In case this assumption is ever proven to be false, make sure to assert here - * since we don't handle this case. - * In practice, this issue will never arise since no hardware will support it. - */ - assert(state->Const.MaxDualSourceDrawBuffers <= 1); - add_index_output(FRAG_RESULT_DATA0, 1, vec4_t, "gl_SecondaryFragColorEXT"); - add_index_output(FRAG_RESULT_DATA0, 1, - array(vec4_t, state->Const.MaxDualSourceDrawBuffers), - "gl_SecondaryFragDataEXT"); - } - - /* gl_FragDepth has always been in desktop GLSL, but did not appear in GLSL - * ES 1.00. - */ - if (state->is_version(110, 300)) - add_output(FRAG_RESULT_DEPTH, float_t, "gl_FragDepth"); - - if (state->ARB_shader_stencil_export_enable) { - ir_variable *const var = - add_output(FRAG_RESULT_STENCIL, int_t, "gl_FragStencilRefARB"); - if (state->ARB_shader_stencil_export_warn) - var->enable_extension_warning("GL_ARB_shader_stencil_export"); - } - - if (state->AMD_shader_stencil_export_enable) { - ir_variable *const var = - add_output(FRAG_RESULT_STENCIL, int_t, "gl_FragStencilRefAMD"); - if (state->AMD_shader_stencil_export_warn) - var->enable_extension_warning("GL_AMD_shader_stencil_export"); - } - - if (state->is_version(400, 0) || state->ARB_sample_shading_enable) { - add_system_value(SYSTEM_VALUE_SAMPLE_ID, int_t, "gl_SampleID"); - add_system_value(SYSTEM_VALUE_SAMPLE_POS, vec2_t, "gl_SamplePosition"); - /* From the ARB_sample_shading specification: - * "The number of elements in the array is ceil(/32), where - * is the maximum number of color samples supported by the - * implementation." - * Since no drivers expose more than 32x MSAA, we can simply set - * the array size to 1 rather than computing it. - */ - add_output(FRAG_RESULT_SAMPLE_MASK, array(int_t, 1), "gl_SampleMask"); - } - - if (state->is_version(400, 0) || state->ARB_gpu_shader5_enable) { - add_system_value(SYSTEM_VALUE_SAMPLE_MASK_IN, array(int_t, 1), "gl_SampleMaskIn"); - } - - if (state->is_version(430, 0) || state->ARB_fragment_layer_viewport_enable) { - var = add_input(VARYING_SLOT_LAYER, int_t, "gl_Layer"); - var->data.interpolation = INTERP_QUALIFIER_FLAT; - var = add_input(VARYING_SLOT_VIEWPORT, int_t, "gl_ViewportIndex"); - var->data.interpolation = INTERP_QUALIFIER_FLAT; - } - - if (state->is_version(450, 310)/* || state->ARB_ES3_1_compatibility_enable*/) - add_system_value(SYSTEM_VALUE_HELPER_INVOCATION, bool_t, "gl_HelperInvocation"); -} - - -/** - * Generate variables which only exist in compute shaders. - */ -void -builtin_variable_generator::generate_cs_special_vars() -{ - add_system_value(SYSTEM_VALUE_LOCAL_INVOCATION_ID, uvec3_t, - "gl_LocalInvocationID"); - add_system_value(SYSTEM_VALUE_WORK_GROUP_ID, uvec3_t, "gl_WorkGroupID"); - add_system_value(SYSTEM_VALUE_NUM_WORK_GROUPS, uvec3_t, "gl_NumWorkGroups"); - add_variable("gl_GlobalInvocationID", uvec3_t, ir_var_auto, 0); - add_variable("gl_LocalInvocationIndex", uint_t, ir_var_auto, 0); -} - - -/** - * Add a single "varying" variable. The variable's type and direction (input - * or output) are adjusted as appropriate for the type of shader being - * compiled. - */ -void -builtin_variable_generator::add_varying(int slot, const glsl_type *type, - const char *name) -{ - switch (state->stage) { - case MESA_SHADER_TESS_CTRL: - case MESA_SHADER_TESS_EVAL: - case MESA_SHADER_GEOMETRY: - this->per_vertex_in.add_field(slot, type, name); - /* FALLTHROUGH */ - case MESA_SHADER_VERTEX: - this->per_vertex_out.add_field(slot, type, name); - break; - case MESA_SHADER_FRAGMENT: - add_input(slot, type, name); - break; - case MESA_SHADER_COMPUTE: - /* Compute shaders don't have varyings. */ - break; - } -} - - -/** - * Generate variables that are used to communicate data from one shader stage - * to the next ("varyings"). - */ -void -builtin_variable_generator::generate_varyings() -{ - /* gl_Position and gl_PointSize are not visible from fragment shaders. */ - if (state->stage != MESA_SHADER_FRAGMENT) { - add_varying(VARYING_SLOT_POS, vec4_t, "gl_Position"); - add_varying(VARYING_SLOT_PSIZ, float_t, "gl_PointSize"); - } - - if (state->is_version(130, 0)) { - add_varying(VARYING_SLOT_CLIP_DIST0, array(float_t, 0), - "gl_ClipDistance"); - } - - if (compatibility) { - add_varying(VARYING_SLOT_TEX0, array(vec4_t, 0), "gl_TexCoord"); - add_varying(VARYING_SLOT_FOGC, float_t, "gl_FogFragCoord"); - if (state->stage == MESA_SHADER_FRAGMENT) { - add_varying(VARYING_SLOT_COL0, vec4_t, "gl_Color"); - add_varying(VARYING_SLOT_COL1, vec4_t, "gl_SecondaryColor"); - } else { - add_varying(VARYING_SLOT_CLIP_VERTEX, vec4_t, "gl_ClipVertex"); - add_varying(VARYING_SLOT_COL0, vec4_t, "gl_FrontColor"); - add_varying(VARYING_SLOT_BFC0, vec4_t, "gl_BackColor"); - add_varying(VARYING_SLOT_COL1, vec4_t, "gl_FrontSecondaryColor"); - add_varying(VARYING_SLOT_BFC1, vec4_t, "gl_BackSecondaryColor"); - } - } - - /* Section 7.1 (Built-In Language Variables) of the GLSL 4.00 spec - * says: - * - * "In the tessellation control language, built-in variables are - * intrinsically declared as: - * - * in gl_PerVertex { - * vec4 gl_Position; - * float gl_PointSize; - * float gl_ClipDistance[]; - * } gl_in[gl_MaxPatchVertices];" - */ - if (state->stage == MESA_SHADER_TESS_CTRL || - state->stage == MESA_SHADER_TESS_EVAL) { - const glsl_type *per_vertex_in_type = - this->per_vertex_in.construct_interface_instance(); - add_variable("gl_in", array(per_vertex_in_type, state->Const.MaxPatchVertices), - ir_var_shader_in, -1); - } - if (state->stage == MESA_SHADER_GEOMETRY) { - const glsl_type *per_vertex_in_type = - this->per_vertex_in.construct_interface_instance(); - add_variable("gl_in", array(per_vertex_in_type, 0), - ir_var_shader_in, -1); - } - if (state->stage == MESA_SHADER_TESS_CTRL) { - const glsl_type *per_vertex_out_type = - this->per_vertex_out.construct_interface_instance(); - add_variable("gl_out", array(per_vertex_out_type, 0), - ir_var_shader_out, -1); - } - if (state->stage == MESA_SHADER_VERTEX || - state->stage == MESA_SHADER_TESS_EVAL || - state->stage == MESA_SHADER_GEOMETRY) { - const glsl_type *per_vertex_out_type = - this->per_vertex_out.construct_interface_instance(); - const glsl_struct_field *fields = per_vertex_out_type->fields.structure; - for (unsigned i = 0; i < per_vertex_out_type->length; i++) { - ir_variable *var = - add_variable(fields[i].name, fields[i].type, ir_var_shader_out, - fields[i].location); - var->data.interpolation = fields[i].interpolation; - var->data.centroid = fields[i].centroid; - var->data.sample = fields[i].sample; - var->data.patch = fields[i].patch; - var->data.precision = fields[i].precision; - var->init_interface_type(per_vertex_out_type); - } - } -} - - -}; /* Anonymous namespace */ - - -void -_mesa_glsl_initialize_variables(exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - builtin_variable_generator gen(instructions, state); - - gen.generate_constants(); - gen.generate_uniforms(); - - gen.generate_varyings(); - - switch (state->stage) { - case MESA_SHADER_VERTEX: - gen.generate_vs_special_vars(); - break; - case MESA_SHADER_TESS_CTRL: - gen.generate_tcs_special_vars(); - break; - case MESA_SHADER_TESS_EVAL: - gen.generate_tes_special_vars(); - break; - case MESA_SHADER_GEOMETRY: - gen.generate_gs_special_vars(); - break; - case MESA_SHADER_FRAGMENT: - gen.generate_fs_special_vars(); - break; - case MESA_SHADER_COMPUTE: - gen.generate_cs_special_vars(); - break; - } -} - - -/** - * Initialize compute shader variables with values that are derived from other - * compute shader variable. - */ -static void -initialize_cs_derived_variables(gl_shader *shader, - ir_function_signature *const main_sig) -{ - assert(shader->Stage == MESA_SHADER_COMPUTE); - - ir_variable *gl_GlobalInvocationID = - shader->symbols->get_variable("gl_GlobalInvocationID"); - assert(gl_GlobalInvocationID); - ir_variable *gl_WorkGroupID = - shader->symbols->get_variable("gl_WorkGroupID"); - assert(gl_WorkGroupID); - ir_variable *gl_WorkGroupSize = - shader->symbols->get_variable("gl_WorkGroupSize"); - if (gl_WorkGroupSize == NULL) { - void *const mem_ctx = ralloc_parent(shader->ir); - gl_WorkGroupSize = new(mem_ctx) ir_variable(glsl_type::uvec3_type, - "gl_WorkGroupSize", - ir_var_auto); - gl_WorkGroupSize->data.how_declared = ir_var_declared_implicitly; - gl_WorkGroupSize->data.read_only = true; - shader->ir->push_head(gl_WorkGroupSize); - } - ir_variable *gl_LocalInvocationID = - shader->symbols->get_variable("gl_LocalInvocationID"); - assert(gl_LocalInvocationID); - - /* gl_GlobalInvocationID = - * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID - */ - ir_instruction *inst = - assign(gl_GlobalInvocationID, - add(mul(gl_WorkGroupID, gl_WorkGroupSize), - gl_LocalInvocationID)); - main_sig->body.push_head(inst); - - /* gl_LocalInvocationIndex = - * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + - * gl_LocalInvocationID.y * gl_WorkGroupSize.x + - * gl_LocalInvocationID.x; - */ - ir_expression *index_z = - mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)), - swizzle_y(gl_WorkGroupSize)); - ir_expression *index_y = - mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize)); - ir_expression *index_y_plus_z = add(index_y, index_z); - operand index_x(swizzle_x(gl_LocalInvocationID)); - ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x); - ir_variable *gl_LocalInvocationIndex = - shader->symbols->get_variable("gl_LocalInvocationIndex"); - assert(gl_LocalInvocationIndex); - inst = assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z); - main_sig->body.push_head(inst); -} - - -/** - * Initialize builtin variables with values based on other builtin variables. - * These are initialized in the main function. - */ -void -_mesa_glsl_initialize_derived_variables(gl_shader *shader) -{ - /* We only need to set CS variables currently. */ - if (shader->Stage != MESA_SHADER_COMPUTE) - return; - - ir_function_signature *const main_sig = - _mesa_get_main_function_signature(shader); - if (main_sig == NULL) - return; - - initialize_cs_derived_variables(shader, main_sig); -} diff --git a/src/glsl/glcpp/.gitignore b/src/glsl/glcpp/.gitignore deleted file mode 100644 index 24a7119caa4..00000000000 --- a/src/glsl/glcpp/.gitignore +++ /dev/null @@ -1,6 +0,0 @@ -glcpp -glcpp-lex.c -glcpp-parse.output -glcpp-parse.c -glcpp-parse.h -tests/*.out diff --git a/src/glsl/glcpp/README b/src/glsl/glcpp/README deleted file mode 100644 index 0637935e28b..00000000000 --- a/src/glsl/glcpp/README +++ /dev/null @@ -1,30 +0,0 @@ -glcpp -- GLSL "C" preprocessor - -This is a simple preprocessor designed to provide the preprocessing -needs of the GLSL language. The requirements for this preprocessor are -specified in the GLSL 1.30 specification availble from: - -http://www.opengl.org/registry/doc/GLSLangSpec.Full.1.30.10.pdf - -This specification is not precise on some semantics, (for example, -#define and #if), defining these merely "as is standard for C++ -preprocessors". To fill in these details, I've been using a draft of -the C99 standard as available from: - -http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1256.pdf - -Any downstream compiler accepting output from glcpp should be prepared -to encounter and deal with the following preprocessor macros: - - #line - #pragma - #extension - -All other macros will be handled according to the GLSL specification -and will not appear in the output. - -Known limitations ------------------ -A file that ends with a function-like macro name as the last -non-whitespace token will result in a parse error, (where it should be -passed through as is). \ No newline at end of file diff --git a/src/glsl/glcpp/glcpp-lex.l b/src/glsl/glcpp/glcpp-lex.l deleted file mode 100644 index fa9aa506912..00000000000 --- a/src/glsl/glcpp/glcpp-lex.l +++ /dev/null @@ -1,577 +0,0 @@ -%{ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include - -#include "glcpp.h" -#include "glcpp-parse.h" - -/* Flex annoyingly generates some functions without making them - * static. Let's declare them here. */ -int glcpp_get_column (yyscan_t yyscanner); -void glcpp_set_column (int column_no , yyscan_t yyscanner); - -#ifdef _MSC_VER -#define YY_NO_UNISTD_H -#endif - -#define YY_NO_INPUT - -#define YY_USER_ACTION \ - do { \ - if (parser->has_new_line_number) \ - yylineno = parser->new_line_number; \ - if (parser->has_new_source_number) \ - yylloc->source = parser->new_source_number; \ - yylloc->first_column = yycolumn + 1; \ - yylloc->first_line = yylloc->last_line = yylineno; \ - yycolumn += yyleng; \ - yylloc->last_column = yycolumn + 1; \ - parser->has_new_line_number = 0; \ - parser->has_new_source_number = 0; \ - } while(0); - -#define YY_USER_INIT \ - do { \ - yylineno = 1; \ - yycolumn = 0; \ - yylloc->source = 0; \ - } while(0) - -/* It's ugly to have macros that have return statements inside of - * them, but flex-based lexer generation is all built around the - * return statement. - * - * To mitigate the ugliness, we defer as much of the logic as possible - * to an actual function, not a macro (see - * glcpplex_update_state_per_token) and we make the word RETURN - * prominent in all of the macros which may return. - * - * The most-commonly-used macro is RETURN_TOKEN which will perform all - * necessary state updates based on the provided token,, then - * conditionally return the token. It will not return a token if the - * parser is currently skipping tokens, (such as within #if - * 0...#else). - * - * The RETURN_TOKEN_NEVER_SKIP macro is a lower-level variant that - * makes the token returning unconditional. This is needed for things - * like #if and the tokens of its condition, (since these must be - * evaluated by the parser even when otherwise skipping). - * - * Finally, RETURN_STRING_TOKEN is a simple convenience wrapper on top - * of RETURN_TOKEN that performs a string copy of yytext before the - * return. - */ -#define RETURN_TOKEN_NEVER_SKIP(token) \ - do { \ - if (glcpp_lex_update_state_per_token (parser, token)) \ - return token; \ - } while (0) - -#define RETURN_TOKEN(token) \ - do { \ - if (! parser->skipping) { \ - RETURN_TOKEN_NEVER_SKIP(token); \ - } \ - } while(0) - -#define RETURN_STRING_TOKEN(token) \ - do { \ - if (! parser->skipping) { \ - yylval->str = ralloc_strdup (yyextra, yytext); \ - RETURN_TOKEN_NEVER_SKIP (token); \ - } \ - } while(0) - - -/* Update all state necessary for each token being returned. - * - * Here we'll be tracking newlines and spaces so that the lexer can - * alter its behavior as necessary, (for example, '#' has special - * significance if it is the first non-whitespace, non-comment token - * in a line, but does not otherwise). - * - * NOTE: If this function returns FALSE, then no token should be - * returned at all. This is used to suprress duplicate SPACE tokens. - */ -static int -glcpp_lex_update_state_per_token (glcpp_parser_t *parser, int token) -{ - /* After the first non-space token in a line, we won't - * allow any '#' to introduce a directive. */ - if (token == NEWLINE) { - parser->first_non_space_token_this_line = 1; - } else if (token != SPACE) { - parser->first_non_space_token_this_line = 0; - } - - /* Track newlines just to know whether a newline needs - * to be inserted if end-of-file comes early. */ - if (token == NEWLINE) { - parser->last_token_was_newline = 1; - } else { - parser->last_token_was_newline = 0; - } - - /* Track spaces to avoid emitting multiple SPACE - * tokens in a row. */ - if (token == SPACE) { - if (! parser->last_token_was_space) { - parser->last_token_was_space = 1; - return 1; - } else { - parser->last_token_was_space = 1; - return 0; - } - } else { - parser->last_token_was_space = 0; - return 1; - } -} - - -%} - -%option bison-bridge bison-locations reentrant noyywrap -%option extra-type="glcpp_parser_t *" -%option prefix="glcpp_" -%option stack -%option never-interactive -%option warn nodefault - - /* Note: When adding any start conditions to this list, you must also - * update the "Internal compiler error" catch-all rule near the end of - * this file. */ - -%x COMMENT DEFINE DONE HASH NEWLINE_CATCHUP UNREACHABLE - -SPACE [[:space:]] -NONSPACE [^[:space:]] -HSPACE [ \t] -HASH # -NEWLINE (\r\n|\n\r|\r|\n) -IDENTIFIER [_a-zA-Z][_a-zA-Z0-9]* -PP_NUMBER [.]?[0-9]([._a-zA-Z0-9]|[eEpP][-+])* -PUNCTUATION [][(){}.&*~!/%<>^|;,=+-] - -/* The OTHER class is simply a catch-all for things that the CPP -parser just doesn't care about. Since flex regular expressions that -match longer strings take priority over those matching shorter -strings, we have to be careful to avoid OTHER matching and hiding -something that CPP does care about. So we simply exclude all -characters that appear in any other expressions. */ - -OTHER [^][_#[:space:]#a-zA-Z0-9(){}.&*~!/%<>^|;,=+-] - -DIGITS [0-9][0-9]* -DECIMAL_INTEGER [1-9][0-9]*[uU]? -OCTAL_INTEGER 0[0-7]*[uU]? -HEXADECIMAL_INTEGER 0[xX][0-9a-fA-F]+[uU]? - -%% - - glcpp_parser_t *parser = yyextra; - - /* When we lex a multi-line comment, we replace it (as - * specified) with a single space. But if the comment spanned - * multiple lines, then subsequent parsing stages will not - * count correct line numbers. To avoid this problem we keep - * track of all newlines that were commented out by a - * multi-line comment, and we emit a NEWLINE token for each at - * the next legal opportunity, (which is when the lexer would - * be emitting a NEWLINE token anyway). - */ - if (YY_START == NEWLINE_CATCHUP) { - if (parser->commented_newlines) - parser->commented_newlines--; - if (parser->commented_newlines == 0) - BEGIN INITIAL; - RETURN_TOKEN_NEVER_SKIP (NEWLINE); - } - - /* Set up the parser->skipping bit here before doing any lexing. - * - * This bit controls whether tokens are skipped, (as implemented by - * RETURN_TOKEN), such as between "#if 0" and "#endif". - * - * The parser maintains a skip_stack indicating whether we should be - * skipping, (and nested levels of #if/#ifdef/#ifndef/#endif) will - * push and pop items from the stack. - * - * Here are the rules for determining whether we are skipping: - * - * 1. If the skip stack is NULL, we are outside of all #if blocks - * and we are not skipping. - * - * 2. If the skip stack is non-NULL, the type of the top node in - * the stack determines whether to skip. A type of - * SKIP_NO_SKIP is used for blocks wheere we are emitting - * tokens, (such as between #if 1 and #endif, or after the - * #else of an #if 0, etc.). - * - * 3. The lexing_directive bit overrides the skip stack. This bit - * is set when we are actively lexing the expression for a - * pre-processor condition, (such as #if, #elif, or #else). In - * this case, even if otherwise skipping, we need to emit the - * tokens for this condition so that the parser can evaluate - * the expression. (For, #else, there's no expression, but we - * emit tokens so the parser can generate a nice error message - * if there are any tokens here). - */ - if (parser->skip_stack && - parser->skip_stack->type != SKIP_NO_SKIP && - ! parser->lexing_directive) - { - parser->skipping = 1; - } else { - parser->skipping = 0; - } - - /* Single-line comments */ -"//"[^\r\n]* { -} - - /* Multi-line comments */ -"/*" { yy_push_state(COMMENT, yyscanner); } -[^*\r\n]* -[^*\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } -"*"+[^*/\r\n]* -"*"+[^*/\r\n]*{NEWLINE} { yylineno++; yycolumn = 0; parser->commented_newlines++; } -"*"+"/" { - yy_pop_state(yyscanner); - /* In the start condition, we don't want any SPACE token. */ - if (yyextra->space_tokens && YY_START != HASH) - RETURN_TOKEN (SPACE); -} - -{HASH} { - - /* If the '#' is the first non-whitespace, non-comment token on this - * line, then it introduces a directive, switch to the start - * condition. - * - * Otherwise, this is just punctuation, so return the HASH_TOKEN - * token. */ - if (parser->first_non_space_token_this_line) { - BEGIN HASH; - } - - RETURN_TOKEN_NEVER_SKIP (HASH_TOKEN); -} - -version{HSPACE}+ { - BEGIN INITIAL; - yyextra->space_tokens = 0; - RETURN_STRING_TOKEN (VERSION_TOKEN); -} - - /* Swallow empty #pragma directives, (to avoid confusing the - * downstream compiler). - * - * Note: We use a simple regular expression for the lookahead - * here. Specifically, we cannot use the complete {NEWLINE} expression - * since it uses alternation and we've found that there's a flex bug - * where using alternation in the lookahead portion of a pattern - * triggers a buffer overrun. */ -pragma{HSPACE}*/[\r\n] { - BEGIN INITIAL; -} - - /* glcpp doesn't handle #extension, #version, or #pragma directives. - * Simply pass them through to the main compiler's lexer/parser. */ -(extension|pragma)[^\r\n]* { - BEGIN INITIAL; - RETURN_STRING_TOKEN (PRAGMA); -} - -line{HSPACE}+ { - BEGIN INITIAL; - RETURN_TOKEN (LINE); -} - -{NEWLINE} { - BEGIN INITIAL; - RETURN_TOKEN_NEVER_SKIP (NEWLINE); -} - - /* For the pre-processor directives, we return these tokens - * even when we are otherwise skipping. */ -ifdef { - BEGIN INITIAL; - yyextra->lexing_directive = 1; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (IFDEF); -} - -ifndef { - BEGIN INITIAL; - yyextra->lexing_directive = 1; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (IFNDEF); -} - -if/[^_a-zA-Z0-9] { - BEGIN INITIAL; - yyextra->lexing_directive = 1; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (IF); -} - -elif/[^_a-zA-Z0-9] { - BEGIN INITIAL; - yyextra->lexing_directive = 1; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (ELIF); -} - -else { - BEGIN INITIAL; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (ELSE); -} - -endif { - BEGIN INITIAL; - yyextra->space_tokens = 0; - RETURN_TOKEN_NEVER_SKIP (ENDIF); -} - -error[^\r\n]* { - BEGIN INITIAL; - RETURN_STRING_TOKEN (ERROR_TOKEN); -} - - /* After we see a "#define" we enter the start state - * for the lexer. Within we are looking for the first - * identifier and specifically checking whether the identifier - * is followed by a '(' or not, (to lex either a - * FUNC_IDENTIFIER or an OBJ_IDENITIFIER token). - * - * While in the state we also need to explicitly - * handle a few other things that may appear before the - * identifier: - * - * * Comments, (handled above with the main support for - * comments). - * - * * Whitespace (simply ignored) - * - * * Anything else, (not an identifier, not a comment, - * and not whitespace). This will generate an error. - */ -define{HSPACE}* { - if (! parser->skipping) { - BEGIN DEFINE; - yyextra->space_tokens = 0; - RETURN_TOKEN (DEFINE_TOKEN); - } -} - -undef { - BEGIN INITIAL; - yyextra->space_tokens = 0; - RETURN_TOKEN (UNDEF); -} - -{HSPACE}+ { - /* Nothing to do here. Importantly, don't leave the - * start condition, since it's legal to have space between the - * '#' and the directive.. */ -} - - /* This will catch any non-directive garbage after a HASH */ -{NONSPACE} { - BEGIN INITIAL; - RETURN_TOKEN (GARBAGE); -} - - /* An identifier immediately followed by '(' */ -{IDENTIFIER}/"(" { - BEGIN INITIAL; - RETURN_STRING_TOKEN (FUNC_IDENTIFIER); -} - - /* An identifier not immediately followed by '(' */ -{IDENTIFIER} { - BEGIN INITIAL; - RETURN_STRING_TOKEN (OBJ_IDENTIFIER); -} - - /* Whitespace */ -{HSPACE}+ { - /* Just ignore it. Nothing to do here. */ -} - - /* '/' not followed by '*', so not a comment. This is an error. */ -[/][^*]{NONSPACE}* { - BEGIN INITIAL; - glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); - RETURN_STRING_TOKEN (INTEGER_STRING); -} - - /* A character that can't start an identifier, comment, or - * space. This is an error. */ -[^_a-zA-Z/[:space:]]{NONSPACE}* { - BEGIN INITIAL; - glcpp_error(yylloc, yyextra, "#define followed by a non-identifier: %s", yytext); - RETURN_STRING_TOKEN (INTEGER_STRING); -} - -{DECIMAL_INTEGER} { - RETURN_STRING_TOKEN (INTEGER_STRING); -} - -{OCTAL_INTEGER} { - RETURN_STRING_TOKEN (INTEGER_STRING); -} - -{HEXADECIMAL_INTEGER} { - RETURN_STRING_TOKEN (INTEGER_STRING); -} - -"<<" { - RETURN_TOKEN (LEFT_SHIFT); -} - -">>" { - RETURN_TOKEN (RIGHT_SHIFT); -} - -"<=" { - RETURN_TOKEN (LESS_OR_EQUAL); -} - -">=" { - RETURN_TOKEN (GREATER_OR_EQUAL); -} - -"==" { - RETURN_TOKEN (EQUAL); -} - -"!=" { - RETURN_TOKEN (NOT_EQUAL); -} - -"&&" { - RETURN_TOKEN (AND); -} - -"||" { - RETURN_TOKEN (OR); -} - -"++" { - RETURN_TOKEN (PLUS_PLUS); -} - -"--" { - RETURN_TOKEN (MINUS_MINUS); -} - -"##" { - if (! parser->skipping) { - if (parser->is_gles) - glcpp_error(yylloc, yyextra, "Token pasting (##) is illegal in GLES"); - RETURN_TOKEN (PASTE); - } -} - -"defined" { - RETURN_TOKEN (DEFINED); -} - -{IDENTIFIER} { - RETURN_STRING_TOKEN (IDENTIFIER); -} - -{PP_NUMBER} { - RETURN_STRING_TOKEN (OTHER); -} - -{PUNCTUATION} { - RETURN_TOKEN (yytext[0]); -} - -{OTHER}+ { - RETURN_STRING_TOKEN (OTHER); -} - -{HSPACE} { - if (yyextra->space_tokens) { - RETURN_TOKEN (SPACE); - } -} - - /* We preserve all newlines, even between #if 0..#endif, so no - skipping.. */ -<*>{NEWLINE} { - if (parser->commented_newlines) { - BEGIN NEWLINE_CATCHUP; - } else { - BEGIN INITIAL; - } - yyextra->space_tokens = 1; - yyextra->lexing_directive = 0; - yylineno++; - yycolumn = 0; - RETURN_TOKEN_NEVER_SKIP (NEWLINE); -} - -<> { - if (YY_START == COMMENT) - glcpp_error(yylloc, yyextra, "Unterminated comment"); - BEGIN DONE; /* Don't keep matching this rule forever. */ - yyextra->lexing_directive = 0; - if (! parser->last_token_was_newline) - RETURN_TOKEN (NEWLINE); -} - - /* This is a catch-all to avoid the annoying default flex action which - * matches any character and prints it. If any input ever matches this - * rule, then we have made a mistake above and need to fix one or more - * of the preceding patterns to match that input. */ - -<*>. { - glcpp_error(yylloc, yyextra, "Internal compiler error: Unexpected character: %s", yytext); - - /* We don't actually use the UNREACHABLE start condition. We - only have this block here so that we can pretend to call some - generated functions, (to avoid "defined but not used" - warnings. */ - if (YY_START == UNREACHABLE) { - unput('.'); - yy_top_state(yyextra); - } -} - -%% - -void -glcpp_lex_set_source_string(glcpp_parser_t *parser, const char *shader) -{ - yy_scan_string(shader, parser->scanner); -} diff --git a/src/glsl/glcpp/glcpp-parse.y b/src/glsl/glcpp/glcpp-parse.y deleted file mode 100644 index ef1a6575aaa..00000000000 --- a/src/glsl/glcpp/glcpp-parse.y +++ /dev/null @@ -1,2557 +0,0 @@ -%{ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include -#include - -#include "glcpp.h" -#include "main/core.h" /* for struct gl_extensions */ -#include "main/mtypes.h" /* for gl_api enum */ - -static void -yyerror (YYLTYPE *locp, glcpp_parser_t *parser, const char *error); - -static void -_define_object_macro (glcpp_parser_t *parser, - YYLTYPE *loc, - const char *macro, - token_list_t *replacements); - -static void -_define_function_macro (glcpp_parser_t *parser, - YYLTYPE *loc, - const char *macro, - string_list_t *parameters, - token_list_t *replacements); - -static string_list_t * -_string_list_create (void *ctx); - -static void -_string_list_append_item (string_list_t *list, const char *str); - -static int -_string_list_contains (string_list_t *list, const char *member, int *index); - -static const char * -_string_list_has_duplicate (string_list_t *list); - -static int -_string_list_length (string_list_t *list); - -static int -_string_list_equal (string_list_t *a, string_list_t *b); - -static argument_list_t * -_argument_list_create (void *ctx); - -static void -_argument_list_append (argument_list_t *list, token_list_t *argument); - -static int -_argument_list_length (argument_list_t *list); - -static token_list_t * -_argument_list_member_at (argument_list_t *list, int index); - -/* Note: This function ralloc_steal()s the str pointer. */ -static token_t * -_token_create_str (void *ctx, int type, char *str); - -static token_t * -_token_create_ival (void *ctx, int type, int ival); - -static token_list_t * -_token_list_create (void *ctx); - -static void -_token_list_append (token_list_t *list, token_t *token); - -static void -_token_list_append_list (token_list_t *list, token_list_t *tail); - -static int -_token_list_equal_ignoring_space (token_list_t *a, token_list_t *b); - -static void -_parser_active_list_push (glcpp_parser_t *parser, - const char *identifier, - token_node_t *marker); - -static void -_parser_active_list_pop (glcpp_parser_t *parser); - -static int -_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier); - -typedef enum { - EXPANSION_MODE_IGNORE_DEFINED, - EXPANSION_MODE_EVALUATE_DEFINED -} expansion_mode_t; - -/* Expand list, and begin lexing from the result (after first - * prefixing a token of type 'head_token_type'). - */ -static void -_glcpp_parser_expand_and_lex_from (glcpp_parser_t *parser, - int head_token_type, - token_list_t *list, - expansion_mode_t mode); - -/* Perform macro expansion in-place on the given list. */ -static void -_glcpp_parser_expand_token_list (glcpp_parser_t *parser, - token_list_t *list, - expansion_mode_t mode); - -static void -_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, - token_list_t *list); - -static void -_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, YYLTYPE *loc, - int condition); - -static void -_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, YYLTYPE *loc, - const char *type, int condition); - -static void -_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser, YYLTYPE *loc); - -static void -_glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t version, - const char *ident, bool explicitly_set); - -static int -glcpp_parser_lex (YYSTYPE *yylval, YYLTYPE *yylloc, glcpp_parser_t *parser); - -static void -glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list); - -static void -add_builtin_define(glcpp_parser_t *parser, const char *name, int value); - -%} - -%pure-parser -%error-verbose - -%locations -%initial-action { - @$.first_line = 1; - @$.first_column = 1; - @$.last_line = 1; - @$.last_column = 1; - @$.source = 0; -} - -%parse-param {glcpp_parser_t *parser} -%lex-param {glcpp_parser_t *parser} - -%expect 0 - - /* We use HASH_TOKEN, DEFINE_TOKEN and VERSION_TOKEN (as opposed to - * HASH, DEFINE, and VERSION) to avoid conflicts with other symbols, - * (such as the and start conditions in the lexer). */ -%token DEFINED ELIF_EXPANDED HASH_TOKEN DEFINE_TOKEN FUNC_IDENTIFIER OBJ_IDENTIFIER ELIF ELSE ENDIF ERROR_TOKEN IF IFDEF IFNDEF LINE PRAGMA UNDEF VERSION_TOKEN GARBAGE IDENTIFIER IF_EXPANDED INTEGER INTEGER_STRING LINE_EXPANDED NEWLINE OTHER PLACEHOLDER SPACE PLUS_PLUS MINUS_MINUS -%token PASTE -%type INTEGER operator SPACE integer_constant -%type expression -%type IDENTIFIER FUNC_IDENTIFIER OBJ_IDENTIFIER INTEGER_STRING OTHER ERROR_TOKEN PRAGMA -%type identifier_list -%type preprocessing_token -%type pp_tokens replacement_list text_line -%left OR -%left AND -%left '|' -%left '^' -%left '&' -%left EQUAL NOT_EQUAL -%left '<' '>' LESS_OR_EQUAL GREATER_OR_EQUAL -%left LEFT_SHIFT RIGHT_SHIFT -%left '+' '-' -%left '*' '/' '%' -%right UNARY - -%debug - -%% - -input: - /* empty */ -| input line -; - -line: - control_line -| SPACE control_line -| text_line { - _glcpp_parser_print_expanded_token_list (parser, $1); - ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n"); - ralloc_free ($1); - } -| expanded_line -; - -expanded_line: - IF_EXPANDED expression NEWLINE { - if (parser->is_gles && $2.undefined_macro) - glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); - _glcpp_parser_skip_stack_push_if (parser, & @1, $2.value); - } -| ELIF_EXPANDED expression NEWLINE { - if (parser->is_gles && $2.undefined_macro) - glcpp_error(& @1, parser, "undefined macro %s in expression (illegal in GLES)", $2.undefined_macro); - _glcpp_parser_skip_stack_change_if (parser, & @1, "elif", $2.value); - } -| LINE_EXPANDED integer_constant NEWLINE { - parser->has_new_line_number = 1; - parser->new_line_number = $2; - ralloc_asprintf_rewrite_tail (&parser->output, - &parser->output_length, - "#line %" PRIiMAX "\n", - $2); - } -| LINE_EXPANDED integer_constant integer_constant NEWLINE { - parser->has_new_line_number = 1; - parser->new_line_number = $2; - parser->has_new_source_number = 1; - parser->new_source_number = $3; - ralloc_asprintf_rewrite_tail (&parser->output, - &parser->output_length, - "#line %" PRIiMAX " %" PRIiMAX "\n", - $2, $3); - } -; - -define: - OBJ_IDENTIFIER replacement_list NEWLINE { - _define_object_macro (parser, & @1, $1, $2); - } -| FUNC_IDENTIFIER '(' ')' replacement_list NEWLINE { - _define_function_macro (parser, & @1, $1, NULL, $4); - } -| FUNC_IDENTIFIER '(' identifier_list ')' replacement_list NEWLINE { - _define_function_macro (parser, & @1, $1, $3, $5); - } -; - -control_line: - control_line_success { - ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "\n"); - } -| control_line_error -| HASH_TOKEN LINE { - glcpp_parser_resolve_implicit_version(parser); - } pp_tokens NEWLINE { - - if (parser->skip_stack == NULL || - parser->skip_stack->type == SKIP_NO_SKIP) - { - _glcpp_parser_expand_and_lex_from (parser, - LINE_EXPANDED, $4, - EXPANSION_MODE_IGNORE_DEFINED); - } - } -; - -control_line_success: - HASH_TOKEN DEFINE_TOKEN { - glcpp_parser_resolve_implicit_version(parser); - } define -| HASH_TOKEN UNDEF { - glcpp_parser_resolve_implicit_version(parser); - } IDENTIFIER NEWLINE { - macro_t *macro; - if (strcmp("__LINE__", $4) == 0 - || strcmp("__FILE__", $4) == 0 - || strcmp("__VERSION__", $4) == 0 - || strncmp("GL_", $4, 3) == 0) - glcpp_error(& @1, parser, "Built-in (pre-defined)" - " macro names cannot be undefined."); - - macro = hash_table_find (parser->defines, $4); - if (macro) { - hash_table_remove (parser->defines, $4); - ralloc_free (macro); - } - ralloc_free ($4); - } -| HASH_TOKEN IF { - glcpp_parser_resolve_implicit_version(parser); - } pp_tokens NEWLINE { - /* Be careful to only evaluate the 'if' expression if - * we are not skipping. When we are skipping, we - * simply push a new 0-valued 'if' onto the skip - * stack. - * - * This avoids generating diagnostics for invalid - * expressions that are being skipped. */ - if (parser->skip_stack == NULL || - parser->skip_stack->type == SKIP_NO_SKIP) - { - _glcpp_parser_expand_and_lex_from (parser, - IF_EXPANDED, $4, - EXPANSION_MODE_EVALUATE_DEFINED); - } - else - { - _glcpp_parser_skip_stack_push_if (parser, & @1, 0); - parser->skip_stack->type = SKIP_TO_ENDIF; - } - } -| HASH_TOKEN IF NEWLINE { - /* #if without an expression is only an error if we - * are not skipping */ - if (parser->skip_stack == NULL || - parser->skip_stack->type == SKIP_NO_SKIP) - { - glcpp_error(& @1, parser, "#if with no expression"); - } - _glcpp_parser_skip_stack_push_if (parser, & @1, 0); - } -| HASH_TOKEN IFDEF { - glcpp_parser_resolve_implicit_version(parser); - } IDENTIFIER junk NEWLINE { - macro_t *macro = hash_table_find (parser->defines, $4); - ralloc_free ($4); - _glcpp_parser_skip_stack_push_if (parser, & @1, macro != NULL); - } -| HASH_TOKEN IFNDEF { - glcpp_parser_resolve_implicit_version(parser); - } IDENTIFIER junk NEWLINE { - macro_t *macro = hash_table_find (parser->defines, $4); - ralloc_free ($4); - _glcpp_parser_skip_stack_push_if (parser, & @3, macro == NULL); - } -| HASH_TOKEN ELIF pp_tokens NEWLINE { - /* Be careful to only evaluate the 'elif' expression - * if we are not skipping. When we are skipping, we - * simply change to a 0-valued 'elif' on the skip - * stack. - * - * This avoids generating diagnostics for invalid - * expressions that are being skipped. */ - if (parser->skip_stack && - parser->skip_stack->type == SKIP_TO_ELSE) - { - _glcpp_parser_expand_and_lex_from (parser, - ELIF_EXPANDED, $3, - EXPANSION_MODE_EVALUATE_DEFINED); - } - else if (parser->skip_stack && - parser->skip_stack->has_else) - { - glcpp_error(& @1, parser, "#elif after #else"); - } - else - { - _glcpp_parser_skip_stack_change_if (parser, & @1, - "elif", 0); - } - } -| HASH_TOKEN ELIF NEWLINE { - /* #elif without an expression is an error unless we - * are skipping. */ - if (parser->skip_stack && - parser->skip_stack->type == SKIP_TO_ELSE) - { - glcpp_error(& @1, parser, "#elif with no expression"); - } - else if (parser->skip_stack && - parser->skip_stack->has_else) - { - glcpp_error(& @1, parser, "#elif after #else"); - } - else - { - _glcpp_parser_skip_stack_change_if (parser, & @1, - "elif", 0); - glcpp_warning(& @1, parser, "ignoring illegal #elif without expression"); - } - } -| HASH_TOKEN ELSE { parser->lexing_directive = 1; } NEWLINE { - if (parser->skip_stack && - parser->skip_stack->has_else) - { - glcpp_error(& @1, parser, "multiple #else"); - } - else - { - _glcpp_parser_skip_stack_change_if (parser, & @1, "else", 1); - if (parser->skip_stack) - parser->skip_stack->has_else = true; - } - } -| HASH_TOKEN ENDIF { - _glcpp_parser_skip_stack_pop (parser, & @1); - } NEWLINE -| HASH_TOKEN VERSION_TOKEN integer_constant NEWLINE { - if (parser->version_resolved) { - glcpp_error(& @1, parser, "#version must appear on the first line"); - } - _glcpp_parser_handle_version_declaration(parser, $3, NULL, true); - } -| HASH_TOKEN VERSION_TOKEN integer_constant IDENTIFIER NEWLINE { - if (parser->version_resolved) { - glcpp_error(& @1, parser, "#version must appear on the first line"); - } - _glcpp_parser_handle_version_declaration(parser, $3, $4, true); - } -| HASH_TOKEN NEWLINE { - glcpp_parser_resolve_implicit_version(parser); - } -| HASH_TOKEN PRAGMA NEWLINE { - ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, "#%s", $2); - } -; - -control_line_error: - HASH_TOKEN ERROR_TOKEN NEWLINE { - glcpp_error(& @1, parser, "#%s", $2); - } -| HASH_TOKEN DEFINE_TOKEN NEWLINE { - glcpp_error (& @1, parser, "#define without macro name"); - } -| HASH_TOKEN GARBAGE pp_tokens NEWLINE { - glcpp_error (& @1, parser, "Illegal non-directive after #"); - } -; - -integer_constant: - INTEGER_STRING { - if (strlen ($1) >= 3 && strncmp ($1, "0x", 2) == 0) { - $$ = strtoll ($1 + 2, NULL, 16); - } else if ($1[0] == '0') { - $$ = strtoll ($1, NULL, 8); - } else { - $$ = strtoll ($1, NULL, 10); - } - } -| INTEGER { - $$ = $1; - } - -expression: - integer_constant { - $$.value = $1; - $$.undefined_macro = NULL; - } -| IDENTIFIER { - $$.value = 0; - if (parser->is_gles) - $$.undefined_macro = ralloc_strdup (parser, $1); - else - $$.undefined_macro = NULL; - } -| expression OR expression { - $$.value = $1.value || $3.value; - - /* Short-circuit: Only flag undefined from right side - * if left side evaluates to false. - */ - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else if (! $1.value) - $$.undefined_macro = $3.undefined_macro; - } -| expression AND expression { - $$.value = $1.value && $3.value; - - /* Short-circuit: Only flag undefined from right-side - * if left side evaluates to true. - */ - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else if ($1.value) - $$.undefined_macro = $3.undefined_macro; - } -| expression '|' expression { - $$.value = $1.value | $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression '^' expression { - $$.value = $1.value ^ $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression '&' expression { - $$.value = $1.value & $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression NOT_EQUAL expression { - $$.value = $1.value != $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression EQUAL expression { - $$.value = $1.value == $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression GREATER_OR_EQUAL expression { - $$.value = $1.value >= $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression LESS_OR_EQUAL expression { - $$.value = $1.value <= $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression '>' expression { - $$.value = $1.value > $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression '<' expression { - $$.value = $1.value < $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression RIGHT_SHIFT expression { - $$.value = $1.value >> $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression LEFT_SHIFT expression { - $$.value = $1.value << $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression '-' expression { - $$.value = $1.value - $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression '+' expression { - $$.value = $1.value + $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression '%' expression { - if ($3.value == 0) { - yyerror (& @1, parser, - "zero modulus in preprocessor directive"); - } else { - $$.value = $1.value % $3.value; - } - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression '/' expression { - if ($3.value == 0) { - yyerror (& @1, parser, - "division by 0 in preprocessor directive"); - } else { - $$.value = $1.value / $3.value; - } - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| expression '*' expression { - $$.value = $1.value * $3.value; - if ($1.undefined_macro) - $$.undefined_macro = $1.undefined_macro; - else - $$.undefined_macro = $3.undefined_macro; - } -| '!' expression %prec UNARY { - $$.value = ! $2.value; - $$.undefined_macro = $2.undefined_macro; - } -| '~' expression %prec UNARY { - $$.value = ~ $2.value; - $$.undefined_macro = $2.undefined_macro; - } -| '-' expression %prec UNARY { - $$.value = - $2.value; - $$.undefined_macro = $2.undefined_macro; - } -| '+' expression %prec UNARY { - $$.value = + $2.value; - $$.undefined_macro = $2.undefined_macro; - } -| '(' expression ')' { - $$ = $2; - } -; - -identifier_list: - IDENTIFIER { - $$ = _string_list_create (parser); - _string_list_append_item ($$, $1); - ralloc_steal ($$, $1); - } -| identifier_list ',' IDENTIFIER { - $$ = $1; - _string_list_append_item ($$, $3); - ralloc_steal ($$, $3); - } -; - -text_line: - NEWLINE { $$ = NULL; } -| pp_tokens NEWLINE -; - -replacement_list: - /* empty */ { $$ = NULL; } -| pp_tokens -; - -junk: - /* empty */ -| pp_tokens { - glcpp_error(&@1, parser, "extra tokens at end of directive"); - } -; - -pp_tokens: - preprocessing_token { - parser->space_tokens = 1; - $$ = _token_list_create (parser); - _token_list_append ($$, $1); - } -| pp_tokens preprocessing_token { - $$ = $1; - _token_list_append ($$, $2); - } -; - -preprocessing_token: - IDENTIFIER { - $$ = _token_create_str (parser, IDENTIFIER, $1); - $$->location = yylloc; - } -| INTEGER_STRING { - $$ = _token_create_str (parser, INTEGER_STRING, $1); - $$->location = yylloc; - } -| operator { - $$ = _token_create_ival (parser, $1, $1); - $$->location = yylloc; - } -| DEFINED { - $$ = _token_create_ival (parser, DEFINED, DEFINED); - $$->location = yylloc; - } -| OTHER { - $$ = _token_create_str (parser, OTHER, $1); - $$->location = yylloc; - } -| SPACE { - $$ = _token_create_ival (parser, SPACE, SPACE); - $$->location = yylloc; - } -; - -operator: - '[' { $$ = '['; } -| ']' { $$ = ']'; } -| '(' { $$ = '('; } -| ')' { $$ = ')'; } -| '{' { $$ = '{'; } -| '}' { $$ = '}'; } -| '.' { $$ = '.'; } -| '&' { $$ = '&'; } -| '*' { $$ = '*'; } -| '+' { $$ = '+'; } -| '-' { $$ = '-'; } -| '~' { $$ = '~'; } -| '!' { $$ = '!'; } -| '/' { $$ = '/'; } -| '%' { $$ = '%'; } -| LEFT_SHIFT { $$ = LEFT_SHIFT; } -| RIGHT_SHIFT { $$ = RIGHT_SHIFT; } -| '<' { $$ = '<'; } -| '>' { $$ = '>'; } -| LESS_OR_EQUAL { $$ = LESS_OR_EQUAL; } -| GREATER_OR_EQUAL { $$ = GREATER_OR_EQUAL; } -| EQUAL { $$ = EQUAL; } -| NOT_EQUAL { $$ = NOT_EQUAL; } -| '^' { $$ = '^'; } -| '|' { $$ = '|'; } -| AND { $$ = AND; } -| OR { $$ = OR; } -| ';' { $$ = ';'; } -| ',' { $$ = ','; } -| '=' { $$ = '='; } -| PASTE { $$ = PASTE; } -| PLUS_PLUS { $$ = PLUS_PLUS; } -| MINUS_MINUS { $$ = MINUS_MINUS; } -; - -%% - -string_list_t * -_string_list_create (void *ctx) -{ - string_list_t *list; - - list = ralloc (ctx, string_list_t); - list->head = NULL; - list->tail = NULL; - - return list; -} - -void -_string_list_append_item (string_list_t *list, const char *str) -{ - string_node_t *node; - - node = ralloc (list, string_node_t); - node->str = ralloc_strdup (node, str); - - node->next = NULL; - - if (list->head == NULL) { - list->head = node; - } else { - list->tail->next = node; - } - - list->tail = node; -} - -int -_string_list_contains (string_list_t *list, const char *member, int *index) -{ - string_node_t *node; - int i; - - if (list == NULL) - return 0; - - for (i = 0, node = list->head; node; i++, node = node->next) { - if (strcmp (node->str, member) == 0) { - if (index) - *index = i; - return 1; - } - } - - return 0; -} - -/* Return duplicate string in list (if any), NULL otherwise. */ -const char * -_string_list_has_duplicate (string_list_t *list) -{ - string_node_t *node, *dup; - - if (list == NULL) - return NULL; - - for (node = list->head; node; node = node->next) { - for (dup = node->next; dup; dup = dup->next) { - if (strcmp (node->str, dup->str) == 0) - return node->str; - } - } - - return NULL; -} - -int -_string_list_length (string_list_t *list) -{ - int length = 0; - string_node_t *node; - - if (list == NULL) - return 0; - - for (node = list->head; node; node = node->next) - length++; - - return length; -} - -int -_string_list_equal (string_list_t *a, string_list_t *b) -{ - string_node_t *node_a, *node_b; - - if (a == NULL && b == NULL) - return 1; - - if (a == NULL || b == NULL) - return 0; - - for (node_a = a->head, node_b = b->head; - node_a && node_b; - node_a = node_a->next, node_b = node_b->next) - { - if (strcmp (node_a->str, node_b->str)) - return 0; - } - - /* Catch the case of lists being different lengths, (which - * would cause the loop above to terminate after the shorter - * list). */ - return node_a == node_b; -} - -argument_list_t * -_argument_list_create (void *ctx) -{ - argument_list_t *list; - - list = ralloc (ctx, argument_list_t); - list->head = NULL; - list->tail = NULL; - - return list; -} - -void -_argument_list_append (argument_list_t *list, token_list_t *argument) -{ - argument_node_t *node; - - node = ralloc (list, argument_node_t); - node->argument = argument; - - node->next = NULL; - - if (list->head == NULL) { - list->head = node; - } else { - list->tail->next = node; - } - - list->tail = node; -} - -int -_argument_list_length (argument_list_t *list) -{ - int length = 0; - argument_node_t *node; - - if (list == NULL) - return 0; - - for (node = list->head; node; node = node->next) - length++; - - return length; -} - -token_list_t * -_argument_list_member_at (argument_list_t *list, int index) -{ - argument_node_t *node; - int i; - - if (list == NULL) - return NULL; - - node = list->head; - for (i = 0; i < index; i++) { - node = node->next; - if (node == NULL) - break; - } - - if (node) - return node->argument; - - return NULL; -} - -/* Note: This function ralloc_steal()s the str pointer. */ -token_t * -_token_create_str (void *ctx, int type, char *str) -{ - token_t *token; - - token = ralloc (ctx, token_t); - token->type = type; - token->value.str = str; - - ralloc_steal (token, str); - - return token; -} - -token_t * -_token_create_ival (void *ctx, int type, int ival) -{ - token_t *token; - - token = ralloc (ctx, token_t); - token->type = type; - token->value.ival = ival; - - return token; -} - -token_list_t * -_token_list_create (void *ctx) -{ - token_list_t *list; - - list = ralloc (ctx, token_list_t); - list->head = NULL; - list->tail = NULL; - list->non_space_tail = NULL; - - return list; -} - -void -_token_list_append (token_list_t *list, token_t *token) -{ - token_node_t *node; - - node = ralloc (list, token_node_t); - node->token = token; - node->next = NULL; - - if (list->head == NULL) { - list->head = node; - } else { - list->tail->next = node; - } - - list->tail = node; - if (token->type != SPACE) - list->non_space_tail = node; -} - -void -_token_list_append_list (token_list_t *list, token_list_t *tail) -{ - if (tail == NULL || tail->head == NULL) - return; - - if (list->head == NULL) { - list->head = tail->head; - } else { - list->tail->next = tail->head; - } - - list->tail = tail->tail; - list->non_space_tail = tail->non_space_tail; -} - -static token_list_t * -_token_list_copy (void *ctx, token_list_t *other) -{ - token_list_t *copy; - token_node_t *node; - - if (other == NULL) - return NULL; - - copy = _token_list_create (ctx); - for (node = other->head; node; node = node->next) { - token_t *new_token = ralloc (copy, token_t); - *new_token = *node->token; - _token_list_append (copy, new_token); - } - - return copy; -} - -static void -_token_list_trim_trailing_space (token_list_t *list) -{ - token_node_t *tail, *next; - - if (list->non_space_tail) { - tail = list->non_space_tail->next; - list->non_space_tail->next = NULL; - list->tail = list->non_space_tail; - - while (tail) { - next = tail->next; - ralloc_free (tail); - tail = next; - } - } -} - -static int -_token_list_is_empty_ignoring_space (token_list_t *l) -{ - token_node_t *n; - - if (l == NULL) - return 1; - - n = l->head; - while (n != NULL && n->token->type == SPACE) - n = n->next; - - return n == NULL; -} - -int -_token_list_equal_ignoring_space (token_list_t *a, token_list_t *b) -{ - token_node_t *node_a, *node_b; - - if (a == NULL || b == NULL) { - int a_empty = _token_list_is_empty_ignoring_space(a); - int b_empty = _token_list_is_empty_ignoring_space(b); - return a_empty == b_empty; - } - - node_a = a->head; - node_b = b->head; - - while (1) - { - if (node_a == NULL && node_b == NULL) - break; - - if (node_a == NULL || node_b == NULL) - return 0; - /* Make sure whitespace appears in the same places in both. - * It need not be exactly the same amount of whitespace, - * though. - */ - if (node_a->token->type == SPACE - && node_b->token->type == SPACE) { - while (node_a && node_a->token->type == SPACE) - node_a = node_a->next; - while (node_b && node_b->token->type == SPACE) - node_b = node_b->next; - continue; - } - - if (node_a->token->type != node_b->token->type) - return 0; - - switch (node_a->token->type) { - case INTEGER: - if (node_a->token->value.ival != - node_b->token->value.ival) - { - return 0; - } - break; - case IDENTIFIER: - case INTEGER_STRING: - case OTHER: - if (strcmp (node_a->token->value.str, - node_b->token->value.str)) - { - return 0; - } - break; - } - - node_a = node_a->next; - node_b = node_b->next; - } - - return 1; -} - -static void -_token_print (char **out, size_t *len, token_t *token) -{ - if (token->type < 256) { - ralloc_asprintf_rewrite_tail (out, len, "%c", token->type); - return; - } - - switch (token->type) { - case INTEGER: - ralloc_asprintf_rewrite_tail (out, len, "%" PRIiMAX, token->value.ival); - break; - case IDENTIFIER: - case INTEGER_STRING: - case OTHER: - ralloc_asprintf_rewrite_tail (out, len, "%s", token->value.str); - break; - case SPACE: - ralloc_asprintf_rewrite_tail (out, len, " "); - break; - case LEFT_SHIFT: - ralloc_asprintf_rewrite_tail (out, len, "<<"); - break; - case RIGHT_SHIFT: - ralloc_asprintf_rewrite_tail (out, len, ">>"); - break; - case LESS_OR_EQUAL: - ralloc_asprintf_rewrite_tail (out, len, "<="); - break; - case GREATER_OR_EQUAL: - ralloc_asprintf_rewrite_tail (out, len, ">="); - break; - case EQUAL: - ralloc_asprintf_rewrite_tail (out, len, "=="); - break; - case NOT_EQUAL: - ralloc_asprintf_rewrite_tail (out, len, "!="); - break; - case AND: - ralloc_asprintf_rewrite_tail (out, len, "&&"); - break; - case OR: - ralloc_asprintf_rewrite_tail (out, len, "||"); - break; - case PASTE: - ralloc_asprintf_rewrite_tail (out, len, "##"); - break; - case PLUS_PLUS: - ralloc_asprintf_rewrite_tail (out, len, "++"); - break; - case MINUS_MINUS: - ralloc_asprintf_rewrite_tail (out, len, "--"); - break; - case DEFINED: - ralloc_asprintf_rewrite_tail (out, len, "defined"); - break; - case PLACEHOLDER: - /* Nothing to print. */ - break; - default: - assert(!"Error: Don't know how to print token."); - - break; - } -} - -/* Return a new token (ralloc()ed off of 'token') formed by pasting - * 'token' and 'other'. Note that this function may return 'token' or - * 'other' directly rather than allocating anything new. - * - * Caution: Only very cursory error-checking is performed to see if - * the final result is a valid single token. */ -static token_t * -_token_paste (glcpp_parser_t *parser, token_t *token, token_t *other) -{ - token_t *combined = NULL; - - /* Pasting a placeholder onto anything makes no change. */ - if (other->type == PLACEHOLDER) - return token; - - /* When 'token' is a placeholder, just return 'other'. */ - if (token->type == PLACEHOLDER) - return other; - - /* A very few single-character punctuators can be combined - * with another to form a multi-character punctuator. */ - switch (token->type) { - case '<': - if (other->type == '<') - combined = _token_create_ival (token, LEFT_SHIFT, LEFT_SHIFT); - else if (other->type == '=') - combined = _token_create_ival (token, LESS_OR_EQUAL, LESS_OR_EQUAL); - break; - case '>': - if (other->type == '>') - combined = _token_create_ival (token, RIGHT_SHIFT, RIGHT_SHIFT); - else if (other->type == '=') - combined = _token_create_ival (token, GREATER_OR_EQUAL, GREATER_OR_EQUAL); - break; - case '=': - if (other->type == '=') - combined = _token_create_ival (token, EQUAL, EQUAL); - break; - case '!': - if (other->type == '=') - combined = _token_create_ival (token, NOT_EQUAL, NOT_EQUAL); - break; - case '&': - if (other->type == '&') - combined = _token_create_ival (token, AND, AND); - break; - case '|': - if (other->type == '|') - combined = _token_create_ival (token, OR, OR); - break; - } - - if (combined != NULL) { - /* Inherit the location from the first token */ - combined->location = token->location; - return combined; - } - - /* Two string-valued (or integer) tokens can usually just be - * mashed together. (We also handle a string followed by an - * integer here as well.) - * - * There are some exceptions here. Notably, if the first token - * is an integer (or a string representing an integer), then - * the second token must also be an integer or must be a - * string representing an integer that begins with a digit. - */ - if ((token->type == IDENTIFIER || token->type == OTHER || token->type == INTEGER_STRING || token->type == INTEGER) && - (other->type == IDENTIFIER || other->type == OTHER || other->type == INTEGER_STRING || other->type == INTEGER)) - { - char *str; - int combined_type; - - /* Check that pasting onto an integer doesn't create a - * non-integer, (that is, only digits can be - * pasted. */ - if (token->type == INTEGER_STRING || token->type == INTEGER) - { - switch (other->type) { - case INTEGER_STRING: - if (other->value.str[0] < '0' || - other->value.str[0] > '9') - goto FAIL; - break; - case INTEGER: - if (other->value.ival < 0) - goto FAIL; - break; - default: - goto FAIL; - } - } - - if (token->type == INTEGER) - str = ralloc_asprintf (token, "%" PRIiMAX, - token->value.ival); - else - str = ralloc_strdup (token, token->value.str); - - - if (other->type == INTEGER) - ralloc_asprintf_append (&str, "%" PRIiMAX, - other->value.ival); - else - ralloc_strcat (&str, other->value.str); - - /* New token is same type as original token, unless we - * started with an integer, in which case we will be - * creating an integer-string. */ - combined_type = token->type; - if (combined_type == INTEGER) - combined_type = INTEGER_STRING; - - combined = _token_create_str (token, combined_type, str); - combined->location = token->location; - return combined; - } - - FAIL: - glcpp_error (&token->location, parser, ""); - ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "Pasting \""); - _token_print (&parser->info_log, &parser->info_log_length, token); - ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" and \""); - _token_print (&parser->info_log, &parser->info_log_length, other); - ralloc_asprintf_rewrite_tail (&parser->info_log, &parser->info_log_length, "\" does not give a valid preprocessing token.\n"); - - return token; -} - -static void -_token_list_print (glcpp_parser_t *parser, token_list_t *list) -{ - token_node_t *node; - - if (list == NULL) - return; - - for (node = list->head; node; node = node->next) - _token_print (&parser->output, &parser->output_length, node->token); -} - -void -yyerror (YYLTYPE *locp, glcpp_parser_t *parser, const char *error) -{ - glcpp_error(locp, parser, "%s", error); -} - -static void add_builtin_define(glcpp_parser_t *parser, - const char *name, int value) -{ - token_t *tok; - token_list_t *list; - - tok = _token_create_ival (parser, INTEGER, value); - - list = _token_list_create(parser); - _token_list_append(list, tok); - _define_object_macro(parser, NULL, name, list); -} - -glcpp_parser_t * -glcpp_parser_create (const struct gl_extensions *extensions, gl_api api) -{ - glcpp_parser_t *parser; - - parser = ralloc (NULL, glcpp_parser_t); - - glcpp_lex_init_extra (parser, &parser->scanner); - parser->defines = hash_table_ctor (32, hash_table_string_hash, - hash_table_string_compare); - parser->active = NULL; - parser->lexing_directive = 0; - parser->space_tokens = 1; - parser->last_token_was_newline = 0; - parser->last_token_was_space = 0; - parser->first_non_space_token_this_line = 1; - parser->newline_as_space = 0; - parser->in_control_line = 0; - parser->paren_count = 0; - parser->commented_newlines = 0; - - parser->skip_stack = NULL; - parser->skipping = 0; - - parser->lex_from_list = NULL; - parser->lex_from_node = NULL; - - parser->output = ralloc_strdup(parser, ""); - parser->output_length = 0; - parser->info_log = ralloc_strdup(parser, ""); - parser->info_log_length = 0; - parser->error = 0; - - parser->extensions = extensions; - parser->api = api; - parser->version_resolved = false; - - parser->has_new_line_number = 0; - parser->new_line_number = 1; - parser->has_new_source_number = 0; - parser->new_source_number = 0; - - return parser; -} - -void -glcpp_parser_destroy (glcpp_parser_t *parser) -{ - glcpp_lex_destroy (parser->scanner); - hash_table_dtor (parser->defines); - ralloc_free (parser); -} - -typedef enum function_status -{ - FUNCTION_STATUS_SUCCESS, - FUNCTION_NOT_A_FUNCTION, - FUNCTION_UNBALANCED_PARENTHESES -} function_status_t; - -/* Find a set of function-like macro arguments by looking for a - * balanced set of parentheses. - * - * When called, 'node' should be the opening-parenthesis token, (or - * perhaps preceeding SPACE tokens). Upon successful return *last will - * be the last consumed node, (corresponding to the closing right - * parenthesis). - * - * Return values: - * - * FUNCTION_STATUS_SUCCESS: - * - * Successfully parsed a set of function arguments. - * - * FUNCTION_NOT_A_FUNCTION: - * - * Macro name not followed by a '('. This is not an error, but - * simply that the macro name should be treated as a non-macro. - * - * FUNCTION_UNBALANCED_PARENTHESES - * - * Macro name is not followed by a balanced set of parentheses. - */ -static function_status_t -_arguments_parse (argument_list_t *arguments, - token_node_t *node, - token_node_t **last) -{ - token_list_t *argument; - int paren_count; - - node = node->next; - - /* Ignore whitespace before first parenthesis. */ - while (node && node->token->type == SPACE) - node = node->next; - - if (node == NULL || node->token->type != '(') - return FUNCTION_NOT_A_FUNCTION; - - node = node->next; - - argument = _token_list_create (arguments); - _argument_list_append (arguments, argument); - - for (paren_count = 1; node; node = node->next) { - if (node->token->type == '(') - { - paren_count++; - } - else if (node->token->type == ')') - { - paren_count--; - if (paren_count == 0) - break; - } - - if (node->token->type == ',' && - paren_count == 1) - { - _token_list_trim_trailing_space (argument); - argument = _token_list_create (arguments); - _argument_list_append (arguments, argument); - } - else { - if (argument->head == NULL) { - /* Don't treat initial whitespace as - * part of the argument. */ - if (node->token->type == SPACE) - continue; - } - _token_list_append (argument, node->token); - } - } - - if (paren_count) - return FUNCTION_UNBALANCED_PARENTHESES; - - *last = node; - - return FUNCTION_STATUS_SUCCESS; -} - -static token_list_t * -_token_list_create_with_one_ival (void *ctx, int type, int ival) -{ - token_list_t *list; - token_t *node; - - list = _token_list_create (ctx); - node = _token_create_ival (list, type, ival); - _token_list_append (list, node); - - return list; -} - -static token_list_t * -_token_list_create_with_one_space (void *ctx) -{ - return _token_list_create_with_one_ival (ctx, SPACE, SPACE); -} - -static token_list_t * -_token_list_create_with_one_integer (void *ctx, int ival) -{ - return _token_list_create_with_one_ival (ctx, INTEGER, ival); -} - -/* Evaluate a DEFINED token node (based on subsequent tokens in the list). - * - * Note: This function must only be called when "node" is a DEFINED token, - * (and will abort with an assertion failure otherwise). - * - * If "node" is followed, (ignoring any SPACE tokens), by an IDENTIFIER token - * (optionally preceded and followed by '(' and ')' tokens) then the following - * occurs: - * - * If the identifier is a defined macro, this function returns 1. - * - * If the identifier is not a defined macro, this function returns 0. - * - * In either case, *last will be updated to the last node in the list - * consumed by the evaluation, (either the token of the identifier or the - * token of the closing parenthesis). - * - * In all other cases, (such as "node is the final node of the list", or - * "missing closing parenthesis", etc.), this function generates a - * preprocessor error, returns -1 and *last will not be set. - */ -static int -_glcpp_parser_evaluate_defined (glcpp_parser_t *parser, - token_node_t *node, - token_node_t **last) -{ - token_node_t *argument, *defined = node; - - assert (node->token->type == DEFINED); - - node = node->next; - - /* Ignore whitespace after DEFINED token. */ - while (node && node->token->type == SPACE) - node = node->next; - - if (node == NULL) - goto FAIL; - - if (node->token->type == IDENTIFIER || node->token->type == OTHER) { - argument = node; - } else if (node->token->type == '(') { - node = node->next; - - /* Ignore whitespace after '(' token. */ - while (node && node->token->type == SPACE) - node = node->next; - - if (node == NULL || (node->token->type != IDENTIFIER && - node->token->type != OTHER)) - { - goto FAIL; - } - - argument = node; - - node = node->next; - - /* Ignore whitespace after identifier, before ')' token. */ - while (node && node->token->type == SPACE) - node = node->next; - - if (node == NULL || node->token->type != ')') - goto FAIL; - } else { - goto FAIL; - } - - *last = node; - - return hash_table_find (parser->defines, - argument->token->value.str) ? 1 : 0; - -FAIL: - glcpp_error (&defined->token->location, parser, - "\"defined\" not followed by an identifier"); - return -1; -} - -/* Evaluate all DEFINED nodes in a given list, modifying the list in place. - */ -static void -_glcpp_parser_evaluate_defined_in_list (glcpp_parser_t *parser, - token_list_t *list) -{ - token_node_t *node, *node_prev, *replacement, *last = NULL; - int value; - - if (list == NULL) - return; - - node_prev = NULL; - node = list->head; - - while (node) { - - if (node->token->type != DEFINED) - goto NEXT; - - value = _glcpp_parser_evaluate_defined (parser, node, &last); - if (value == -1) - goto NEXT; - - replacement = ralloc (list, token_node_t); - replacement->token = _token_create_ival (list, INTEGER, value); - - /* Splice replacement node into list, replacing from "node" - * through "last". */ - if (node_prev) - node_prev->next = replacement; - else - list->head = replacement; - replacement->next = last->next; - if (last == list->tail) - list->tail = replacement; - - node = replacement; - - NEXT: - node_prev = node; - node = node->next; - } -} - -/* Perform macro expansion on 'list', placing the resulting tokens - * into a new list which is initialized with a first token of type - * 'head_token_type'. Then begin lexing from the resulting list, - * (return to the current lexing source when this list is exhausted). - * - * See the documentation of _glcpp_parser_expand_token_list for a description - * of the "mode" parameter. - */ -static void -_glcpp_parser_expand_and_lex_from (glcpp_parser_t *parser, - int head_token_type, - token_list_t *list, - expansion_mode_t mode) -{ - token_list_t *expanded; - token_t *token; - - expanded = _token_list_create (parser); - token = _token_create_ival (parser, head_token_type, head_token_type); - _token_list_append (expanded, token); - _glcpp_parser_expand_token_list (parser, list, mode); - _token_list_append_list (expanded, list); - glcpp_parser_lex_from (parser, expanded); -} - -static void -_glcpp_parser_apply_pastes (glcpp_parser_t *parser, token_list_t *list) -{ - token_node_t *node; - - node = list->head; - while (node) - { - token_node_t *next_non_space; - - /* Look ahead for a PASTE token, skipping space. */ - next_non_space = node->next; - while (next_non_space && next_non_space->token->type == SPACE) - next_non_space = next_non_space->next; - - if (next_non_space == NULL) - break; - - if (next_non_space->token->type != PASTE) { - node = next_non_space; - continue; - } - - /* Now find the next non-space token after the PASTE. */ - next_non_space = next_non_space->next; - while (next_non_space && next_non_space->token->type == SPACE) - next_non_space = next_non_space->next; - - if (next_non_space == NULL) { - yyerror (&node->token->location, parser, "'##' cannot appear at either end of a macro expansion\n"); - return; - } - - node->token = _token_paste (parser, node->token, next_non_space->token); - node->next = next_non_space->next; - if (next_non_space == list->tail) - list->tail = node; - } - - list->non_space_tail = list->tail; -} - -/* This is a helper function that's essentially part of the - * implementation of _glcpp_parser_expand_node. It shouldn't be called - * except for by that function. - * - * Returns NULL if node is a simple token with no expansion, (that is, - * although 'node' corresponds to an identifier defined as a - * function-like macro, it is not followed with a parenthesized - * argument list). - * - * Compute the complete expansion of node (which is a function-like - * macro) and subsequent nodes which are arguments. - * - * Returns the token list that results from the expansion and sets - * *last to the last node in the list that was consumed by the - * expansion. Specifically, *last will be set as follows: as the - * token of the closing right parenthesis. - * - * See the documentation of _glcpp_parser_expand_token_list for a description - * of the "mode" parameter. - */ -static token_list_t * -_glcpp_parser_expand_function (glcpp_parser_t *parser, - token_node_t *node, - token_node_t **last, - expansion_mode_t mode) -{ - macro_t *macro; - const char *identifier; - argument_list_t *arguments; - function_status_t status; - token_list_t *substituted; - int parameter_index; - - identifier = node->token->value.str; - - macro = hash_table_find (parser->defines, identifier); - - assert (macro->is_function); - - arguments = _argument_list_create (parser); - status = _arguments_parse (arguments, node, last); - - switch (status) { - case FUNCTION_STATUS_SUCCESS: - break; - case FUNCTION_NOT_A_FUNCTION: - return NULL; - case FUNCTION_UNBALANCED_PARENTHESES: - glcpp_error (&node->token->location, parser, "Macro %s call has unbalanced parentheses\n", identifier); - return NULL; - } - - /* Replace a macro defined as empty with a SPACE token. */ - if (macro->replacements == NULL) { - ralloc_free (arguments); - return _token_list_create_with_one_space (parser); - } - - if (! ((_argument_list_length (arguments) == - _string_list_length (macro->parameters)) || - (_string_list_length (macro->parameters) == 0 && - _argument_list_length (arguments) == 1 && - arguments->head->argument->head == NULL))) - { - glcpp_error (&node->token->location, parser, - "Error: macro %s invoked with %d arguments (expected %d)\n", - identifier, - _argument_list_length (arguments), - _string_list_length (macro->parameters)); - return NULL; - } - - /* Perform argument substitution on the replacement list. */ - substituted = _token_list_create (arguments); - - for (node = macro->replacements->head; node; node = node->next) - { - if (node->token->type == IDENTIFIER && - _string_list_contains (macro->parameters, - node->token->value.str, - ¶meter_index)) - { - token_list_t *argument; - argument = _argument_list_member_at (arguments, - parameter_index); - /* Before substituting, we expand the argument - * tokens, or append a placeholder token for - * an empty argument. */ - if (argument->head) { - token_list_t *expanded_argument; - expanded_argument = _token_list_copy (parser, - argument); - _glcpp_parser_expand_token_list (parser, - expanded_argument, - mode); - _token_list_append_list (substituted, - expanded_argument); - } else { - token_t *new_token; - - new_token = _token_create_ival (substituted, - PLACEHOLDER, - PLACEHOLDER); - _token_list_append (substituted, new_token); - } - } else { - _token_list_append (substituted, node->token); - } - } - - /* After argument substitution, and before further expansion - * below, implement token pasting. */ - - _token_list_trim_trailing_space (substituted); - - _glcpp_parser_apply_pastes (parser, substituted); - - return substituted; -} - -/* Compute the complete expansion of node, (and subsequent nodes after - * 'node' in the case that 'node' is a function-like macro and - * subsequent nodes are arguments). - * - * Returns NULL if node is a simple token with no expansion. - * - * Otherwise, returns the token list that results from the expansion - * and sets *last to the last node in the list that was consumed by - * the expansion. Specifically, *last will be set as follows: - * - * As 'node' in the case of object-like macro expansion. - * - * As the token of the closing right parenthesis in the case of - * function-like macro expansion. - * - * See the documentation of _glcpp_parser_expand_token_list for a description - * of the "mode" parameter. - */ -static token_list_t * -_glcpp_parser_expand_node (glcpp_parser_t *parser, - token_node_t *node, - token_node_t **last, - expansion_mode_t mode) -{ - token_t *token = node->token; - const char *identifier; - macro_t *macro; - - /* We only expand identifiers */ - if (token->type != IDENTIFIER) { - return NULL; - } - - *last = node; - identifier = token->value.str; - - /* Special handling for __LINE__ and __FILE__, (not through - * the hash table). */ - if (strcmp(identifier, "__LINE__") == 0) - return _token_list_create_with_one_integer (parser, node->token->location.first_line); - - if (strcmp(identifier, "__FILE__") == 0) - return _token_list_create_with_one_integer (parser, node->token->location.source); - - /* Look up this identifier in the hash table. */ - macro = hash_table_find (parser->defines, identifier); - - /* Not a macro, so no expansion needed. */ - if (macro == NULL) - return NULL; - - /* Finally, don't expand this macro if we're already actively - * expanding it, (to avoid infinite recursion). */ - if (_parser_active_list_contains (parser, identifier)) { - /* We change the token type here from IDENTIFIER to - * OTHER to prevent any future expansion of this - * unexpanded token. */ - char *str; - token_list_t *expansion; - token_t *final; - - str = ralloc_strdup (parser, token->value.str); - final = _token_create_str (parser, OTHER, str); - expansion = _token_list_create (parser); - _token_list_append (expansion, final); - return expansion; - } - - if (! macro->is_function) - { - token_list_t *replacement; - - /* Replace a macro defined as empty with a SPACE token. */ - if (macro->replacements == NULL) - return _token_list_create_with_one_space (parser); - - replacement = _token_list_copy (parser, macro->replacements); - _glcpp_parser_apply_pastes (parser, replacement); - return replacement; - } - - return _glcpp_parser_expand_function (parser, node, last, mode); -} - -/* Push a new identifier onto the parser's active list. - * - * Here, 'marker' is the token node that appears in the list after the - * expansion of 'identifier'. That is, when the list iterator begins - * examining 'marker', then it is time to pop this node from the - * active stack. - */ -static void -_parser_active_list_push (glcpp_parser_t *parser, - const char *identifier, - token_node_t *marker) -{ - active_list_t *node; - - node = ralloc (parser->active, active_list_t); - node->identifier = ralloc_strdup (node, identifier); - node->marker = marker; - node->next = parser->active; - - parser->active = node; -} - -static void -_parser_active_list_pop (glcpp_parser_t *parser) -{ - active_list_t *node = parser->active; - - if (node == NULL) { - parser->active = NULL; - return; - } - - node = parser->active->next; - ralloc_free (parser->active); - - parser->active = node; -} - -static int -_parser_active_list_contains (glcpp_parser_t *parser, const char *identifier) -{ - active_list_t *node; - - if (parser->active == NULL) - return 0; - - for (node = parser->active; node; node = node->next) - if (strcmp (node->identifier, identifier) == 0) - return 1; - - return 0; -} - -/* Walk over the token list replacing nodes with their expansion. - * Whenever nodes are expanded the walking will walk over the new - * nodes, continuing to expand as necessary. The results are placed in - * 'list' itself. - * - * The "mode" argument controls the handling of any DEFINED tokens that - * result from expansion as follows: - * - * EXPANSION_MODE_IGNORE_DEFINED: Any resulting DEFINED tokens will be - * left in the final list, unevaluated. This is the correct mode - * for expanding any list in any context other than a - * preprocessor conditional, (#if or #elif). - * - * EXPANSION_MODE_EVALUATE_DEFINED: Any resulting DEFINED tokens will be - * evaluated to 0 or 1 tokens depending on whether the following - * token is the name of a defined macro. If the DEFINED token is - * not followed by an (optionally parenthesized) identifier, then - * an error will be generated. This the correct mode for - * expanding any list in the context of a preprocessor - * conditional, (#if or #elif). - */ -static void -_glcpp_parser_expand_token_list (glcpp_parser_t *parser, - token_list_t *list, - expansion_mode_t mode) -{ - token_node_t *node_prev; - token_node_t *node, *last = NULL; - token_list_t *expansion; - active_list_t *active_initial = parser->active; - - if (list == NULL) - return; - - _token_list_trim_trailing_space (list); - - node_prev = NULL; - node = list->head; - - if (mode == EXPANSION_MODE_EVALUATE_DEFINED) - _glcpp_parser_evaluate_defined_in_list (parser, list); - - while (node) { - - while (parser->active && parser->active->marker == node) - _parser_active_list_pop (parser); - - expansion = _glcpp_parser_expand_node (parser, node, &last, mode); - if (expansion) { - token_node_t *n; - - if (mode == EXPANSION_MODE_EVALUATE_DEFINED) { - _glcpp_parser_evaluate_defined_in_list (parser, - expansion); - } - - for (n = node; n != last->next; n = n->next) - while (parser->active && - parser->active->marker == n) - { - _parser_active_list_pop (parser); - } - - _parser_active_list_push (parser, - node->token->value.str, - last->next); - - /* Splice expansion into list, supporting a - * simple deletion if the expansion is - * empty. */ - if (expansion->head) { - if (node_prev) - node_prev->next = expansion->head; - else - list->head = expansion->head; - expansion->tail->next = last->next; - if (last == list->tail) - list->tail = expansion->tail; - } else { - if (node_prev) - node_prev->next = last->next; - else - list->head = last->next; - if (last == list->tail) - list->tail = NULL; - } - } else { - node_prev = node; - } - node = node_prev ? node_prev->next : list->head; - } - - /* Remove any lingering effects of this invocation on the - * active list. That is, pop until the list looks like it did - * at the beginning of this function. */ - while (parser->active && parser->active != active_initial) - _parser_active_list_pop (parser); - - list->non_space_tail = list->tail; -} - -void -_glcpp_parser_print_expanded_token_list (glcpp_parser_t *parser, - token_list_t *list) -{ - if (list == NULL) - return; - - _glcpp_parser_expand_token_list (parser, list, EXPANSION_MODE_IGNORE_DEFINED); - - _token_list_trim_trailing_space (list); - - _token_list_print (parser, list); -} - -static void -_check_for_reserved_macro_name (glcpp_parser_t *parser, YYLTYPE *loc, - const char *identifier) -{ - /* Section 3.3 (Preprocessor) of the GLSL 1.30 spec (and later) and - * the GLSL ES spec (all versions) say: - * - * "All macro names containing two consecutive underscores ( __ ) - * are reserved for future use as predefined macro names. All - * macro names prefixed with "GL_" ("GL" followed by a single - * underscore) are also reserved." - * - * The intention is that names containing __ are reserved for internal - * use by the implementation, and names prefixed with GL_ are reserved - * for use by Khronos. Since every extension adds a name prefixed - * with GL_ (i.e., the name of the extension), that should be an - * error. Names simply containing __ are dangerous to use, but should - * be allowed. - * - * A future version of the GLSL specification will clarify this. - */ - if (strstr(identifier, "__")) { - glcpp_warning(loc, parser, - "Macro names containing \"__\" are reserved " - "for use by the implementation.\n"); - } - if (strncmp(identifier, "GL_", 3) == 0) { - glcpp_error (loc, parser, "Macro names starting with \"GL_\" are reserved.\n"); - } -} - -static int -_macro_equal (macro_t *a, macro_t *b) -{ - if (a->is_function != b->is_function) - return 0; - - if (a->is_function) { - if (! _string_list_equal (a->parameters, b->parameters)) - return 0; - } - - return _token_list_equal_ignoring_space (a->replacements, - b->replacements); -} - -void -_define_object_macro (glcpp_parser_t *parser, - YYLTYPE *loc, - const char *identifier, - token_list_t *replacements) -{ - macro_t *macro, *previous; - - /* We define pre-defined macros before we've started parsing the - * actual file. So if there's no location defined yet, that's what - * were doing and we don't want to generate an error for using the - * reserved names. */ - if (loc != NULL) - _check_for_reserved_macro_name(parser, loc, identifier); - - macro = ralloc (parser, macro_t); - - macro->is_function = 0; - macro->parameters = NULL; - macro->identifier = ralloc_strdup (macro, identifier); - macro->replacements = replacements; - ralloc_steal (macro, replacements); - - previous = hash_table_find (parser->defines, identifier); - if (previous) { - if (_macro_equal (macro, previous)) { - ralloc_free (macro); - return; - } - glcpp_error (loc, parser, "Redefinition of macro %s\n", - identifier); - } - - hash_table_insert (parser->defines, macro, identifier); -} - -void -_define_function_macro (glcpp_parser_t *parser, - YYLTYPE *loc, - const char *identifier, - string_list_t *parameters, - token_list_t *replacements) -{ - macro_t *macro, *previous; - const char *dup; - - _check_for_reserved_macro_name(parser, loc, identifier); - - /* Check for any duplicate parameter names. */ - if ((dup = _string_list_has_duplicate (parameters)) != NULL) { - glcpp_error (loc, parser, "Duplicate macro parameter \"%s\"", - dup); - } - - macro = ralloc (parser, macro_t); - ralloc_steal (macro, parameters); - ralloc_steal (macro, replacements); - - macro->is_function = 1; - macro->parameters = parameters; - macro->identifier = ralloc_strdup (macro, identifier); - macro->replacements = replacements; - previous = hash_table_find (parser->defines, identifier); - if (previous) { - if (_macro_equal (macro, previous)) { - ralloc_free (macro); - return; - } - glcpp_error (loc, parser, "Redefinition of macro %s\n", - identifier); - } - - hash_table_insert (parser->defines, macro, identifier); -} - -static int -glcpp_parser_lex (YYSTYPE *yylval, YYLTYPE *yylloc, glcpp_parser_t *parser) -{ - token_node_t *node; - int ret; - - if (parser->lex_from_list == NULL) { - ret = glcpp_lex (yylval, yylloc, parser->scanner); - - /* XXX: This ugly block of code exists for the sole - * purpose of converting a NEWLINE token into a SPACE - * token, but only in the case where we have seen a - * function-like macro name, but have not yet seen its - * closing parenthesis. - * - * There's perhaps a more compact way to do this with - * mid-rule actions in the grammar. - * - * I'm definitely not pleased with the complexity of - * this code here. - */ - if (parser->newline_as_space) - { - if (ret == '(') { - parser->paren_count++; - } else if (ret == ')') { - parser->paren_count--; - if (parser->paren_count == 0) - parser->newline_as_space = 0; - } else if (ret == NEWLINE) { - ret = SPACE; - } else if (ret != SPACE) { - if (parser->paren_count == 0) - parser->newline_as_space = 0; - } - } - else if (parser->in_control_line) - { - if (ret == NEWLINE) - parser->in_control_line = 0; - } - else if (ret == DEFINE_TOKEN || - ret == UNDEF || ret == IF || - ret == IFDEF || ret == IFNDEF || - ret == ELIF || ret == ELSE || - ret == ENDIF || ret == HASH_TOKEN) - { - parser->in_control_line = 1; - } - else if (ret == IDENTIFIER) - { - macro_t *macro; - macro = hash_table_find (parser->defines, - yylval->str); - if (macro && macro->is_function) { - parser->newline_as_space = 1; - parser->paren_count = 0; - } - } - - return ret; - } - - node = parser->lex_from_node; - - if (node == NULL) { - ralloc_free (parser->lex_from_list); - parser->lex_from_list = NULL; - return NEWLINE; - } - - *yylval = node->token->value; - ret = node->token->type; - - parser->lex_from_node = node->next; - - return ret; -} - -static void -glcpp_parser_lex_from (glcpp_parser_t *parser, token_list_t *list) -{ - token_node_t *node; - - assert (parser->lex_from_list == NULL); - - /* Copy list, eliminating any space tokens. */ - parser->lex_from_list = _token_list_create (parser); - - for (node = list->head; node; node = node->next) { - if (node->token->type == SPACE) - continue; - _token_list_append (parser->lex_from_list, node->token); - } - - ralloc_free (list); - - parser->lex_from_node = parser->lex_from_list->head; - - /* It's possible the list consisted of nothing but whitespace. */ - if (parser->lex_from_node == NULL) { - ralloc_free (parser->lex_from_list); - parser->lex_from_list = NULL; - } -} - -static void -_glcpp_parser_skip_stack_push_if (glcpp_parser_t *parser, YYLTYPE *loc, - int condition) -{ - skip_type_t current = SKIP_NO_SKIP; - skip_node_t *node; - - if (parser->skip_stack) - current = parser->skip_stack->type; - - node = ralloc (parser, skip_node_t); - node->loc = *loc; - - if (current == SKIP_NO_SKIP) { - if (condition) - node->type = SKIP_NO_SKIP; - else - node->type = SKIP_TO_ELSE; - } else { - node->type = SKIP_TO_ENDIF; - } - - node->has_else = false; - node->next = parser->skip_stack; - parser->skip_stack = node; -} - -static void -_glcpp_parser_skip_stack_change_if (glcpp_parser_t *parser, YYLTYPE *loc, - const char *type, int condition) -{ - if (parser->skip_stack == NULL) { - glcpp_error (loc, parser, "#%s without #if\n", type); - return; - } - - if (parser->skip_stack->type == SKIP_TO_ELSE) { - if (condition) - parser->skip_stack->type = SKIP_NO_SKIP; - } else { - parser->skip_stack->type = SKIP_TO_ENDIF; - } -} - -static void -_glcpp_parser_skip_stack_pop (glcpp_parser_t *parser, YYLTYPE *loc) -{ - skip_node_t *node; - - if (parser->skip_stack == NULL) { - glcpp_error (loc, parser, "#endif without #if\n"); - return; - } - - node = parser->skip_stack; - parser->skip_stack = node->next; - ralloc_free (node); -} - -static void -_glcpp_parser_handle_version_declaration(glcpp_parser_t *parser, intmax_t version, - const char *es_identifier, - bool explicitly_set) -{ - const struct gl_extensions *extensions = parser->extensions; - - if (parser->version_resolved) - return; - - parser->version_resolved = true; - - add_builtin_define (parser, "__VERSION__", version); - - parser->is_gles = (version == 100) || - (es_identifier && - (strcmp(es_identifier, "es") == 0)); - - /* Add pre-defined macros. */ - if (parser->is_gles) { - add_builtin_define(parser, "GL_ES", 1); - add_builtin_define(parser, "GL_EXT_separate_shader_objects", 1); - add_builtin_define(parser, "GL_EXT_draw_buffers", 1); - - if (extensions != NULL) { - if (extensions->OES_EGL_image_external) - add_builtin_define(parser, "GL_OES_EGL_image_external", 1); - if (extensions->OES_standard_derivatives) - add_builtin_define(parser, "GL_OES_standard_derivatives", 1); - if (extensions->ARB_texture_multisample) - add_builtin_define(parser, "GL_OES_texture_storage_multisample_2d_array", 1); - if (extensions->ARB_blend_func_extended) - add_builtin_define(parser, "GL_EXT_blend_func_extended", 1); - } - } else { - add_builtin_define(parser, "GL_ARB_draw_buffers", 1); - add_builtin_define(parser, "GL_ARB_enhanced_layouts", 1); - add_builtin_define(parser, "GL_ARB_separate_shader_objects", 1); - add_builtin_define(parser, "GL_ARB_texture_rectangle", 1); - add_builtin_define(parser, "GL_AMD_shader_trinary_minmax", 1); - - - if (extensions != NULL) { - if (extensions->EXT_texture_array) - add_builtin_define(parser, "GL_EXT_texture_array", 1); - - if (extensions->ARB_arrays_of_arrays) - add_builtin_define(parser, "GL_ARB_arrays_of_arrays", 1); - - if (extensions->ARB_fragment_coord_conventions) - add_builtin_define(parser, "GL_ARB_fragment_coord_conventions", - 1); - - if (extensions->ARB_fragment_layer_viewport) - add_builtin_define(parser, "GL_ARB_fragment_layer_viewport", 1); - - if (extensions->ARB_explicit_attrib_location) - add_builtin_define(parser, "GL_ARB_explicit_attrib_location", 1); - - if (extensions->ARB_explicit_uniform_location) - add_builtin_define(parser, "GL_ARB_explicit_uniform_location", 1); - - if (extensions->ARB_shader_texture_lod) - add_builtin_define(parser, "GL_ARB_shader_texture_lod", 1); - - if (extensions->ARB_draw_instanced) - add_builtin_define(parser, "GL_ARB_draw_instanced", 1); - - if (extensions->ARB_conservative_depth) { - add_builtin_define(parser, "GL_AMD_conservative_depth", 1); - add_builtin_define(parser, "GL_ARB_conservative_depth", 1); - } - - if (extensions->ARB_shader_bit_encoding) - add_builtin_define(parser, "GL_ARB_shader_bit_encoding", 1); - - if (extensions->ARB_shader_clock) - add_builtin_define(parser, "GL_ARB_shader_clock", 1); - - if (extensions->ARB_uniform_buffer_object) - add_builtin_define(parser, "GL_ARB_uniform_buffer_object", 1); - - if (extensions->ARB_texture_cube_map_array) - add_builtin_define(parser, "GL_ARB_texture_cube_map_array", 1); - - if (extensions->ARB_shading_language_packing) - add_builtin_define(parser, "GL_ARB_shading_language_packing", 1); - - if (extensions->ARB_texture_multisample) - add_builtin_define(parser, "GL_ARB_texture_multisample", 1); - - if (extensions->ARB_texture_query_levels) - add_builtin_define(parser, "GL_ARB_texture_query_levels", 1); - - if (extensions->ARB_texture_query_lod) - add_builtin_define(parser, "GL_ARB_texture_query_lod", 1); - - if (extensions->ARB_gpu_shader5) - add_builtin_define(parser, "GL_ARB_gpu_shader5", 1); - - if (extensions->ARB_gpu_shader_fp64) - add_builtin_define(parser, "GL_ARB_gpu_shader_fp64", 1); - - if (extensions->ARB_vertex_attrib_64bit) - add_builtin_define(parser, "GL_ARB_vertex_attrib_64bit", 1); - - if (extensions->AMD_vertex_shader_layer) - add_builtin_define(parser, "GL_AMD_vertex_shader_layer", 1); - - if (extensions->AMD_vertex_shader_viewport_index) - add_builtin_define(parser, "GL_AMD_vertex_shader_viewport_index", 1); - - if (extensions->ARB_shading_language_420pack) - add_builtin_define(parser, "GL_ARB_shading_language_420pack", 1); - - if (extensions->ARB_sample_shading) - add_builtin_define(parser, "GL_ARB_sample_shading", 1); - - if (extensions->ARB_texture_gather) - add_builtin_define(parser, "GL_ARB_texture_gather", 1); - - if (extensions->ARB_shader_atomic_counters) - add_builtin_define(parser, "GL_ARB_shader_atomic_counters", 1); - - if (extensions->ARB_viewport_array) - add_builtin_define(parser, "GL_ARB_viewport_array", 1); - - if (extensions->ARB_compute_shader) - add_builtin_define(parser, "GL_ARB_compute_shader", 1); - - if (extensions->ARB_shader_image_load_store) - add_builtin_define(parser, "GL_ARB_shader_image_load_store", 1); - - if (extensions->ARB_shader_image_size) - add_builtin_define(parser, "GL_ARB_shader_image_size", 1); - - if (extensions->ARB_shader_texture_image_samples) - add_builtin_define(parser, "GL_ARB_shader_texture_image_samples", 1); - - if (extensions->ARB_derivative_control) - add_builtin_define(parser, "GL_ARB_derivative_control", 1); - - if (extensions->ARB_shader_precision) - add_builtin_define(parser, "GL_ARB_shader_precision", 1); - - if (extensions->ARB_shader_storage_buffer_object) - add_builtin_define(parser, "GL_ARB_shader_storage_buffer_object", 1); - - if (extensions->ARB_tessellation_shader) - add_builtin_define(parser, "GL_ARB_tessellation_shader", 1); - - if (extensions->ARB_shader_subroutine) - add_builtin_define(parser, "GL_ARB_shader_subroutine", 1); - - if (extensions->ARB_shader_draw_parameters) - add_builtin_define(parser, "GL_ARB_shader_draw_parameters", 1); - } - } - - if (extensions != NULL) { - if (extensions->EXT_shader_integer_mix) - add_builtin_define(parser, "GL_EXT_shader_integer_mix", 1); - - if (extensions->EXT_shader_samples_identical) - add_builtin_define(parser, "GL_EXT_shader_samples_identical", 1); - } - - if (version >= 150) - add_builtin_define(parser, "GL_core_profile", 1); - - /* Currently, all ES2/ES3 implementations support highp in the - * fragment shader, so we always define this macro in ES2/ES3. - * If we ever get a driver that doesn't support highp, we'll - * need to add a flag to the gl_context and check that here. - */ - if (version >= 130 || parser->is_gles) - add_builtin_define (parser, "GL_FRAGMENT_PRECISION_HIGH", 1); - - if (explicitly_set) { - ralloc_asprintf_rewrite_tail (&parser->output, &parser->output_length, - "#version %" PRIiMAX "%s%s", version, - es_identifier ? " " : "", - es_identifier ? es_identifier : ""); - } -} - -/* GLSL version if no version is explicitly specified. */ -#define IMPLICIT_GLSL_VERSION 110 - -/* GLSL ES version if no version is explicitly specified. */ -#define IMPLICIT_GLSL_ES_VERSION 100 - -void -glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser) -{ - int language_version = parser->api == API_OPENGLES2 ? - IMPLICIT_GLSL_ES_VERSION : - IMPLICIT_GLSL_VERSION; - - _glcpp_parser_handle_version_declaration(parser, language_version, - NULL, false); -} diff --git a/src/glsl/glcpp/glcpp.c b/src/glsl/glcpp/glcpp.c deleted file mode 100644 index c62f4efec9d..00000000000 --- a/src/glsl/glcpp/glcpp.c +++ /dev/null @@ -1,182 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include - -#include "glcpp.h" -#include "main/mtypes.h" -#include "main/shaderobj.h" -#include "util/strtod.h" - -extern int glcpp_parser_debug; - -void -_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, - struct gl_shader *sh) -{ - (void) ctx; - *ptr = sh; -} - -/* Read from fp until EOF and return a string of everything read. - */ -static char * -load_text_fp (void *ctx, FILE *fp) -{ -#define CHUNK 4096 - char *text = NULL; - size_t text_size = 0; - size_t total_read = 0; - size_t bytes; - - while (1) { - if (total_read + CHUNK + 1 > text_size) { - text_size = text_size ? text_size * 2 : CHUNK + 1; - text = reralloc_size (ctx, text, text_size); - if (text == NULL) { - fprintf (stderr, "Out of memory\n"); - return NULL; - } - } - bytes = fread (text + total_read, 1, CHUNK, fp); - total_read += bytes; - - if (bytes < CHUNK) { - break; - } - } - - text[total_read] = '\0'; - - return text; -} - -static char * -load_text_file(void *ctx, const char *filename) -{ - char *text; - FILE *fp; - - if (filename == NULL || strcmp (filename, "-") == 0) - return load_text_fp (ctx, stdin); - - fp = fopen (filename, "r"); - if (fp == NULL) { - fprintf (stderr, "Failed to open file %s: %s\n", - filename, strerror (errno)); - return NULL; - } - - text = load_text_fp (ctx, fp); - - fclose(fp); - - return text; -} - -/* Initialize only those things that glcpp cares about. - */ -static void -init_fake_gl_context (struct gl_context *gl_ctx) -{ - gl_ctx->API = API_OPENGL_COMPAT; - gl_ctx->Const.DisableGLSLLineContinuations = false; -} - -static void -usage (void) -{ - fprintf (stderr, - "Usage: glcpp [OPTIONS] [--] []\n" - "\n" - "Pre-process the given filename (stdin if no filename given).\n" - "The following options are supported:\n" - " --disable-line-continuations Do not interpret lines ending with a\n" - " backslash ('\\') as a line continuation.\n"); -} - -enum { - DISABLE_LINE_CONTINUATIONS_OPT = CHAR_MAX + 1 -}; - -static const struct option -long_options[] = { - {"disable-line-continuations", no_argument, 0, DISABLE_LINE_CONTINUATIONS_OPT }, - {"debug", no_argument, 0, 'd'}, - {0, 0, 0, 0 } -}; - -int -main (int argc, char *argv[]) -{ - char *filename = NULL; - void *ctx = ralloc(NULL, void*); - char *info_log = ralloc_strdup(ctx, ""); - const char *shader; - int ret; - struct gl_context gl_ctx; - int c; - - init_fake_gl_context (&gl_ctx); - - while ((c = getopt_long(argc, argv, "d", long_options, NULL)) != -1) { - switch (c) { - case DISABLE_LINE_CONTINUATIONS_OPT: - gl_ctx.Const.DisableGLSLLineContinuations = true; - break; - case 'd': - glcpp_parser_debug = 1; - break; - default: - usage (); - exit (1); - } - } - - if (optind + 1 < argc) { - printf ("Unexpected argument: %s\n", argv[optind+1]); - usage (); - exit (1); - } - if (optind < argc) { - filename = argv[optind]; - } - - shader = load_text_file (ctx, filename); - if (shader == NULL) - return 1; - - _mesa_locale_init(); - - ret = glcpp_preprocess(ctx, &shader, &info_log, NULL, &gl_ctx); - - printf("%s", shader); - fprintf(stderr, "%s", info_log); - - ralloc_free(ctx); - - return ret; -} diff --git a/src/glsl/glcpp/glcpp.h b/src/glsl/glcpp/glcpp.h deleted file mode 100644 index 70aa14b6ec0..00000000000 --- a/src/glsl/glcpp/glcpp.h +++ /dev/null @@ -1,251 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#ifndef GLCPP_H -#define GLCPP_H - -#include -#include - -#include "main/mtypes.h" - -#include "util/ralloc.h" - -#include "program/hash_table.h" - -#define yyscan_t void* - -/* Some data types used for parser values. */ - -typedef struct expression_value { - intmax_t value; - char *undefined_macro; -} expression_value_t; - - -typedef struct string_node { - const char *str; - struct string_node *next; -} string_node_t; - -typedef struct string_list { - string_node_t *head; - string_node_t *tail; -} string_list_t; - -typedef struct token token_t; -typedef struct token_list token_list_t; - -typedef union YYSTYPE -{ - intmax_t ival; - expression_value_t expression_value; - char *str; - string_list_t *string_list; - token_t *token; - token_list_t *token_list; -} YYSTYPE; - -# define YYSTYPE_IS_TRIVIAL 1 -# define YYSTYPE_IS_DECLARED 1 - -typedef struct YYLTYPE { - int first_line; - int first_column; - int last_line; - int last_column; - unsigned source; -} YYLTYPE; -# define YYLTYPE_IS_DECLARED 1 -# define YYLTYPE_IS_TRIVIAL 1 - -# define YYLLOC_DEFAULT(Current, Rhs, N) \ -do { \ - if (N) \ - { \ - (Current).first_line = YYRHSLOC(Rhs, 1).first_line; \ - (Current).first_column = YYRHSLOC(Rhs, 1).first_column; \ - (Current).last_line = YYRHSLOC(Rhs, N).last_line; \ - (Current).last_column = YYRHSLOC(Rhs, N).last_column; \ - } \ - else \ - { \ - (Current).first_line = (Current).last_line = \ - YYRHSLOC(Rhs, 0).last_line; \ - (Current).first_column = (Current).last_column = \ - YYRHSLOC(Rhs, 0).last_column; \ - } \ - (Current).source = 0; \ -} while (0) - -struct token { - int type; - YYSTYPE value; - YYLTYPE location; -}; - -typedef struct token_node { - token_t *token; - struct token_node *next; -} token_node_t; - -struct token_list { - token_node_t *head; - token_node_t *tail; - token_node_t *non_space_tail; -}; - -typedef struct argument_node { - token_list_t *argument; - struct argument_node *next; -} argument_node_t; - -typedef struct argument_list { - argument_node_t *head; - argument_node_t *tail; -} argument_list_t; - -typedef struct glcpp_parser glcpp_parser_t; - -typedef enum { - TOKEN_CLASS_IDENTIFIER, - TOKEN_CLASS_IDENTIFIER_FINALIZED, - TOKEN_CLASS_FUNC_MACRO, - TOKEN_CLASS_OBJ_MACRO -} token_class_t; - -token_class_t -glcpp_parser_classify_token (glcpp_parser_t *parser, - const char *identifier, - int *parameter_index); - -typedef struct { - int is_function; - string_list_t *parameters; - const char *identifier; - token_list_t *replacements; -} macro_t; - -typedef struct expansion_node { - macro_t *macro; - token_node_t *replacements; - struct expansion_node *next; -} expansion_node_t; - -typedef enum skip_type { - SKIP_NO_SKIP, - SKIP_TO_ELSE, - SKIP_TO_ENDIF -} skip_type_t; - -typedef struct skip_node { - skip_type_t type; - bool has_else; - YYLTYPE loc; /* location of the initial #if/#elif/... */ - struct skip_node *next; -} skip_node_t; - -typedef struct active_list { - const char *identifier; - token_node_t *marker; - struct active_list *next; -} active_list_t; - -struct glcpp_parser { - yyscan_t scanner; - struct hash_table *defines; - active_list_t *active; - int lexing_directive; - int space_tokens; - int last_token_was_newline; - int last_token_was_space; - int first_non_space_token_this_line; - int newline_as_space; - int in_control_line; - int paren_count; - int commented_newlines; - skip_node_t *skip_stack; - int skipping; - token_list_t *lex_from_list; - token_node_t *lex_from_node; - char *output; - char *info_log; - size_t output_length; - size_t info_log_length; - int error; - const struct gl_extensions *extensions; - gl_api api; - bool version_resolved; - bool has_new_line_number; - int new_line_number; - bool has_new_source_number; - int new_source_number; - bool is_gles; -}; - -struct gl_extensions; - -glcpp_parser_t * -glcpp_parser_create (const struct gl_extensions *extensions, gl_api api); - -int -glcpp_parser_parse (glcpp_parser_t *parser); - -void -glcpp_parser_destroy (glcpp_parser_t *parser); - -void -glcpp_parser_resolve_implicit_version(glcpp_parser_t *parser); - -int -glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log, - const struct gl_extensions *extensions, struct gl_context *g_ctx); - -/* Functions for writing to the info log */ - -void -glcpp_error (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...); - -void -glcpp_warning (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...); - -/* Generated by glcpp-lex.l to glcpp-lex.c */ - -int -glcpp_lex_init_extra (glcpp_parser_t *parser, yyscan_t* scanner); - -void -glcpp_lex_set_source_string(glcpp_parser_t *parser, const char *shader); - -int -glcpp_lex (YYSTYPE *lvalp, YYLTYPE *llocp, yyscan_t scanner); - -int -glcpp_lex_destroy (yyscan_t scanner); - -/* Generated by glcpp-parse.y to glcpp-parse.c */ - -int -yyparse (glcpp_parser_t *parser); - -#endif diff --git a/src/glsl/glcpp/pp.c b/src/glsl/glcpp/pp.c deleted file mode 100644 index 160c6662ff6..00000000000 --- a/src/glsl/glcpp/pp.c +++ /dev/null @@ -1,241 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include "glcpp.h" - -void -glcpp_error (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...) -{ - va_list ap; - - parser->error = 1; - ralloc_asprintf_rewrite_tail(&parser->info_log, - &parser->info_log_length, - "%u:%u(%u): " - "preprocessor error: ", - locp->source, - locp->first_line, - locp->first_column); - va_start(ap, fmt); - ralloc_vasprintf_rewrite_tail(&parser->info_log, - &parser->info_log_length, - fmt, ap); - va_end(ap); - ralloc_asprintf_rewrite_tail(&parser->info_log, - &parser->info_log_length, "\n"); -} - -void -glcpp_warning (YYLTYPE *locp, glcpp_parser_t *parser, const char *fmt, ...) -{ - va_list ap; - - ralloc_asprintf_rewrite_tail(&parser->info_log, - &parser->info_log_length, - "%u:%u(%u): " - "preprocessor warning: ", - locp->source, - locp->first_line, - locp->first_column); - va_start(ap, fmt); - ralloc_vasprintf_rewrite_tail(&parser->info_log, - &parser->info_log_length, - fmt, ap); - va_end(ap); - ralloc_asprintf_rewrite_tail(&parser->info_log, - &parser->info_log_length, "\n"); -} - -/* Given str, (that's expected to start with a newline terminator of some - * sort), return a pointer to the first character in str after the newline. - * - * A newline terminator can be any of the following sequences: - * - * "\r\n" - * "\n\r" - * "\n" - * "\r" - * - * And the longest such sequence will be skipped. - */ -static const char * -skip_newline (const char *str) -{ - const char *ret = str; - - if (ret == NULL) - return ret; - - if (*ret == '\0') - return ret; - - if (*ret == '\r') { - ret++; - if (*ret && *ret == '\n') - ret++; - } else if (*ret == '\n') { - ret++; - if (*ret && *ret == '\r') - ret++; - } - - return ret; -} - -/* Remove any line continuation characters in the shader, (whether in - * preprocessing directives or in GLSL code). - */ -static char * -remove_line_continuations(glcpp_parser_t *ctx, const char *shader) -{ - char *clean = ralloc_strdup(ctx, ""); - const char *backslash, *newline, *search_start; - const char *cr, *lf; - char newline_separator[3]; - int collapsed_newlines = 0; - - search_start = shader; - - /* Determine what flavor of newlines this shader is using. GLSL - * provides for 4 different possible ways to separate lines, (using - * one or two characters): - * - * "\n" (line-feed, like Linux, Unix, and new Mac OS) - * "\r" (carriage-return, like old Mac files) - * "\r\n" (carriage-return + line-feed, like DOS files) - * "\n\r" (line-feed + carriage-return, like nothing, really) - * - * This code explicitly supports a shader that uses a mixture of - * newline terminators and will properly handle line continuation - * backslashes followed by any of the above. - * - * But, since we must also insert additional newlines in the output - * (for any collapsed lines) we attempt to maintain consistency by - * examining the first encountered newline terminator, and using the - * same terminator for any newlines we insert. - */ - cr = strchr(search_start, '\r'); - lf = strchr(search_start, '\n'); - - newline_separator[0] = '\n'; - newline_separator[1] = '\0'; - newline_separator[2] = '\0'; - - if (cr == NULL) { - /* Nothing to do. */ - } else if (lf == NULL) { - newline_separator[0] = '\r'; - } else if (lf == cr + 1) { - newline_separator[0] = '\r'; - newline_separator[1] = '\n'; - } else if (cr == lf + 1) { - newline_separator[0] = '\n'; - newline_separator[1] = '\r'; - } - - while (true) { - backslash = strchr(search_start, '\\'); - - /* If we have previously collapsed any line-continuations, - * then we want to insert additional newlines at the next - * occurrence of a newline character to avoid changing any - * line numbers. - */ - if (collapsed_newlines) { - cr = strchr (search_start, '\r'); - lf = strchr (search_start, '\n'); - if (cr && lf) - newline = cr < lf ? cr : lf; - else if (cr) - newline = cr; - else - newline = lf; - if (newline && - (backslash == NULL || newline < backslash)) - { - ralloc_strncat(&clean, shader, - newline - shader + 1); - while (collapsed_newlines) { - ralloc_strcat(&clean, newline_separator); - collapsed_newlines--; - } - shader = skip_newline (newline); - search_start = shader; - } - } - - search_start = backslash + 1; - - if (backslash == NULL) - break; - - /* At each line continuation, (backslash followed by a - * newline), copy all preceding text to the output, then - * advance the shader pointer to the character after the - * newline. - */ - if (backslash[1] == '\r' || backslash[1] == '\n') - { - collapsed_newlines++; - ralloc_strncat(&clean, shader, backslash - shader); - shader = skip_newline (backslash + 1); - search_start = shader; - } - } - - ralloc_strcat(&clean, shader); - - return clean; -} - -int -glcpp_preprocess(void *ralloc_ctx, const char **shader, char **info_log, - const struct gl_extensions *extensions, struct gl_context *gl_ctx) -{ - int errors; - glcpp_parser_t *parser = glcpp_parser_create (extensions, gl_ctx->API); - - if (! gl_ctx->Const.DisableGLSLLineContinuations) - *shader = remove_line_continuations(parser, *shader); - - glcpp_lex_set_source_string (parser, *shader); - - glcpp_parser_parse (parser); - - if (parser->skip_stack) - glcpp_error (&parser->skip_stack->loc, parser, "Unterminated #if\n"); - - glcpp_parser_resolve_implicit_version(parser); - - ralloc_strcat(info_log, parser->info_log); - - ralloc_steal(ralloc_ctx, parser->output); - *shader = parser->output; - - errors = parser->error; - glcpp_parser_destroy (parser); - return errors; -} diff --git a/src/glsl/glcpp/tests/.gitignore b/src/glsl/glcpp/tests/.gitignore deleted file mode 100644 index 3802c850a3e..00000000000 --- a/src/glsl/glcpp/tests/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -subtest-cr/ -subtest-lf/ -subtest-cr-lf/ -subtest-lf-cr/ diff --git a/src/glsl/glcpp/tests/000-content-with-spaces.c b/src/glsl/glcpp/tests/000-content-with-spaces.c deleted file mode 100644 index 1f2320e6fc1..00000000000 --- a/src/glsl/glcpp/tests/000-content-with-spaces.c +++ /dev/null @@ -1 +0,0 @@ - this is four tokens with spaces diff --git a/src/glsl/glcpp/tests/000-content-with-spaces.c.expected b/src/glsl/glcpp/tests/000-content-with-spaces.c.expected deleted file mode 100644 index 00791910ed5..00000000000 --- a/src/glsl/glcpp/tests/000-content-with-spaces.c.expected +++ /dev/null @@ -1 +0,0 @@ - this is four tokens with spaces diff --git a/src/glsl/glcpp/tests/001-define.c b/src/glsl/glcpp/tests/001-define.c deleted file mode 100644 index cbf2fee0e75..00000000000 --- a/src/glsl/glcpp/tests/001-define.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo 1 -foo diff --git a/src/glsl/glcpp/tests/001-define.c.expected b/src/glsl/glcpp/tests/001-define.c.expected deleted file mode 100644 index a464d9da742..00000000000 --- a/src/glsl/glcpp/tests/001-define.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -1 diff --git a/src/glsl/glcpp/tests/002-define-chain.c b/src/glsl/glcpp/tests/002-define-chain.c deleted file mode 100644 index 87d75c68751..00000000000 --- a/src/glsl/glcpp/tests/002-define-chain.c +++ /dev/null @@ -1,3 +0,0 @@ -#define foo 1 -#define bar foo -bar diff --git a/src/glsl/glcpp/tests/002-define-chain.c.expected b/src/glsl/glcpp/tests/002-define-chain.c.expected deleted file mode 100644 index c6c9ee38a9e..00000000000 --- a/src/glsl/glcpp/tests/002-define-chain.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -1 diff --git a/src/glsl/glcpp/tests/003-define-chain-reverse.c b/src/glsl/glcpp/tests/003-define-chain-reverse.c deleted file mode 100644 index a18b724eca0..00000000000 --- a/src/glsl/glcpp/tests/003-define-chain-reverse.c +++ /dev/null @@ -1,3 +0,0 @@ -#define bar foo -#define foo 1 -bar diff --git a/src/glsl/glcpp/tests/003-define-chain-reverse.c.expected b/src/glsl/glcpp/tests/003-define-chain-reverse.c.expected deleted file mode 100644 index c6c9ee38a9e..00000000000 --- a/src/glsl/glcpp/tests/003-define-chain-reverse.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -1 diff --git a/src/glsl/glcpp/tests/004-define-recursive.c b/src/glsl/glcpp/tests/004-define-recursive.c deleted file mode 100644 index 2ac56ea3dcf..00000000000 --- a/src/glsl/glcpp/tests/004-define-recursive.c +++ /dev/null @@ -1,6 +0,0 @@ -#define foo bar -#define bar baz -#define baz foo -foo -bar -baz diff --git a/src/glsl/glcpp/tests/004-define-recursive.c.expected b/src/glsl/glcpp/tests/004-define-recursive.c.expected deleted file mode 100644 index 2d07687f8ca..00000000000 --- a/src/glsl/glcpp/tests/004-define-recursive.c.expected +++ /dev/null @@ -1,6 +0,0 @@ - - - -foo -bar -baz diff --git a/src/glsl/glcpp/tests/005-define-composite-chain.c b/src/glsl/glcpp/tests/005-define-composite-chain.c deleted file mode 100644 index f5521df968d..00000000000 --- a/src/glsl/glcpp/tests/005-define-composite-chain.c +++ /dev/null @@ -1,3 +0,0 @@ -#define foo 1 -#define bar a foo -bar diff --git a/src/glsl/glcpp/tests/005-define-composite-chain.c.expected b/src/glsl/glcpp/tests/005-define-composite-chain.c.expected deleted file mode 100644 index 892975c268c..00000000000 --- a/src/glsl/glcpp/tests/005-define-composite-chain.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -a 1 diff --git a/src/glsl/glcpp/tests/006-define-composite-chain-reverse.c b/src/glsl/glcpp/tests/006-define-composite-chain-reverse.c deleted file mode 100644 index 4bb91a1221a..00000000000 --- a/src/glsl/glcpp/tests/006-define-composite-chain-reverse.c +++ /dev/null @@ -1,3 +0,0 @@ -#define bar a foo -#define foo 1 -bar diff --git a/src/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected b/src/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected deleted file mode 100644 index 892975c268c..00000000000 --- a/src/glsl/glcpp/tests/006-define-composite-chain-reverse.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -a 1 diff --git a/src/glsl/glcpp/tests/007-define-composite-recursive.c b/src/glsl/glcpp/tests/007-define-composite-recursive.c deleted file mode 100644 index 5784565bdf3..00000000000 --- a/src/glsl/glcpp/tests/007-define-composite-recursive.c +++ /dev/null @@ -1,6 +0,0 @@ -#define foo a bar -#define bar b baz -#define baz c foo -foo -bar -baz diff --git a/src/glsl/glcpp/tests/007-define-composite-recursive.c.expected b/src/glsl/glcpp/tests/007-define-composite-recursive.c.expected deleted file mode 100644 index 0b0b477d9df..00000000000 --- a/src/glsl/glcpp/tests/007-define-composite-recursive.c.expected +++ /dev/null @@ -1,6 +0,0 @@ - - - -a b c foo -b c a bar -c a b baz diff --git a/src/glsl/glcpp/tests/008-define-empty.c b/src/glsl/glcpp/tests/008-define-empty.c deleted file mode 100644 index b1bd17ec215..00000000000 --- a/src/glsl/glcpp/tests/008-define-empty.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo -foo diff --git a/src/glsl/glcpp/tests/008-define-empty.c.expected b/src/glsl/glcpp/tests/008-define-empty.c.expected deleted file mode 100644 index d148bc8e800..00000000000 --- a/src/glsl/glcpp/tests/008-define-empty.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/src/glsl/glcpp/tests/009-undef.c b/src/glsl/glcpp/tests/009-undef.c deleted file mode 100644 index 3fc1fb44243..00000000000 --- a/src/glsl/glcpp/tests/009-undef.c +++ /dev/null @@ -1,4 +0,0 @@ -#define foo 1 -foo -#undef foo -foo diff --git a/src/glsl/glcpp/tests/009-undef.c.expected b/src/glsl/glcpp/tests/009-undef.c.expected deleted file mode 100644 index 9c0b35a4518..00000000000 --- a/src/glsl/glcpp/tests/009-undef.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - -1 - -foo diff --git a/src/glsl/glcpp/tests/010-undef-re-define.c b/src/glsl/glcpp/tests/010-undef-re-define.c deleted file mode 100644 index 32ff73798b1..00000000000 --- a/src/glsl/glcpp/tests/010-undef-re-define.c +++ /dev/null @@ -1,6 +0,0 @@ -#define foo 1 -foo -#undef foo -foo -#define foo 2 -foo diff --git a/src/glsl/glcpp/tests/010-undef-re-define.c.expected b/src/glsl/glcpp/tests/010-undef-re-define.c.expected deleted file mode 100644 index 5970f49028e..00000000000 --- a/src/glsl/glcpp/tests/010-undef-re-define.c.expected +++ /dev/null @@ -1,6 +0,0 @@ - -1 - -foo - -2 diff --git a/src/glsl/glcpp/tests/011-define-func-empty.c b/src/glsl/glcpp/tests/011-define-func-empty.c deleted file mode 100644 index d9ce13c2284..00000000000 --- a/src/glsl/glcpp/tests/011-define-func-empty.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo() -foo() diff --git a/src/glsl/glcpp/tests/011-define-func-empty.c.expected b/src/glsl/glcpp/tests/011-define-func-empty.c.expected deleted file mode 100644 index d148bc8e800..00000000000 --- a/src/glsl/glcpp/tests/011-define-func-empty.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/src/glsl/glcpp/tests/012-define-func-no-args.c b/src/glsl/glcpp/tests/012-define-func-no-args.c deleted file mode 100644 index c2bb730b115..00000000000 --- a/src/glsl/glcpp/tests/012-define-func-no-args.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo() bar -foo() diff --git a/src/glsl/glcpp/tests/012-define-func-no-args.c.expected b/src/glsl/glcpp/tests/012-define-func-no-args.c.expected deleted file mode 100644 index 9f075f26004..00000000000 --- a/src/glsl/glcpp/tests/012-define-func-no-args.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -bar diff --git a/src/glsl/glcpp/tests/013-define-func-1-arg-unused.c b/src/glsl/glcpp/tests/013-define-func-1-arg-unused.c deleted file mode 100644 index f78fb8b118a..00000000000 --- a/src/glsl/glcpp/tests/013-define-func-1-arg-unused.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(x) 1 -foo(bar) diff --git a/src/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected b/src/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected deleted file mode 100644 index a464d9da742..00000000000 --- a/src/glsl/glcpp/tests/013-define-func-1-arg-unused.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -1 diff --git a/src/glsl/glcpp/tests/014-define-func-2-arg-unused.c b/src/glsl/glcpp/tests/014-define-func-2-arg-unused.c deleted file mode 100644 index 11feb2624b7..00000000000 --- a/src/glsl/glcpp/tests/014-define-func-2-arg-unused.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(x,y) 1 -foo(bar,baz) diff --git a/src/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected b/src/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected deleted file mode 100644 index a464d9da742..00000000000 --- a/src/glsl/glcpp/tests/014-define-func-2-arg-unused.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -1 diff --git a/src/glsl/glcpp/tests/015-define-object-with-parens.c b/src/glsl/glcpp/tests/015-define-object-with-parens.c deleted file mode 100644 index 558da9c617b..00000000000 --- a/src/glsl/glcpp/tests/015-define-object-with-parens.c +++ /dev/null @@ -1,4 +0,0 @@ -#define foo ()1 -foo() -#define bar ()2 -bar() diff --git a/src/glsl/glcpp/tests/015-define-object-with-parens.c.expected b/src/glsl/glcpp/tests/015-define-object-with-parens.c.expected deleted file mode 100644 index a70321a4c51..00000000000 --- a/src/glsl/glcpp/tests/015-define-object-with-parens.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - -()1() - -()2() diff --git a/src/glsl/glcpp/tests/016-define-func-1-arg.c b/src/glsl/glcpp/tests/016-define-func-1-arg.c deleted file mode 100644 index a2e2404c7c1..00000000000 --- a/src/glsl/glcpp/tests/016-define-func-1-arg.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(x) ((x)+1) -foo(bar) diff --git a/src/glsl/glcpp/tests/016-define-func-1-arg.c.expected b/src/glsl/glcpp/tests/016-define-func-1-arg.c.expected deleted file mode 100644 index 6bfe04f7381..00000000000 --- a/src/glsl/glcpp/tests/016-define-func-1-arg.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -((bar)+1) diff --git a/src/glsl/glcpp/tests/017-define-func-2-args.c b/src/glsl/glcpp/tests/017-define-func-2-args.c deleted file mode 100644 index c7253835278..00000000000 --- a/src/glsl/glcpp/tests/017-define-func-2-args.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(x,y) ((x)*(y)) -foo(bar,baz) diff --git a/src/glsl/glcpp/tests/017-define-func-2-args.c.expected b/src/glsl/glcpp/tests/017-define-func-2-args.c.expected deleted file mode 100644 index f7a2b8c26cb..00000000000 --- a/src/glsl/glcpp/tests/017-define-func-2-args.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -((bar)*(baz)) diff --git a/src/glsl/glcpp/tests/018-define-func-macro-as-parameter.c b/src/glsl/glcpp/tests/018-define-func-macro-as-parameter.c deleted file mode 100644 index 668130b8f9b..00000000000 --- a/src/glsl/glcpp/tests/018-define-func-macro-as-parameter.c +++ /dev/null @@ -1,3 +0,0 @@ -#define x 0 -#define foo(x) x -foo(1) diff --git a/src/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected b/src/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected deleted file mode 100644 index c6c9ee38a9e..00000000000 --- a/src/glsl/glcpp/tests/018-define-func-macro-as-parameter.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -1 diff --git a/src/glsl/glcpp/tests/019-define-func-1-arg-multi.c b/src/glsl/glcpp/tests/019-define-func-1-arg-multi.c deleted file mode 100644 index c4e62b25508..00000000000 --- a/src/glsl/glcpp/tests/019-define-func-1-arg-multi.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(x) (x) -foo(this is more than one word) diff --git a/src/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected b/src/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected deleted file mode 100644 index 1e89b8cfd0c..00000000000 --- a/src/glsl/glcpp/tests/019-define-func-1-arg-multi.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -(this is more than one word) diff --git a/src/glsl/glcpp/tests/020-define-func-2-arg-multi.c b/src/glsl/glcpp/tests/020-define-func-2-arg-multi.c deleted file mode 100644 index 3049ad15465..00000000000 --- a/src/glsl/glcpp/tests/020-define-func-2-arg-multi.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(x,y) x,two fish,red fish,y -foo(one fish, blue fish) diff --git a/src/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected b/src/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected deleted file mode 100644 index 19f59f5ecb7..00000000000 --- a/src/glsl/glcpp/tests/020-define-func-2-arg-multi.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -one fish,two fish,red fish,blue fish diff --git a/src/glsl/glcpp/tests/021-define-func-compose.c b/src/glsl/glcpp/tests/021-define-func-compose.c deleted file mode 100644 index 21ddd0e65f9..00000000000 --- a/src/glsl/glcpp/tests/021-define-func-compose.c +++ /dev/null @@ -1,3 +0,0 @@ -#define bar(x) (1+(x)) -#define foo(y) (2*(y)) -foo(bar(3)) diff --git a/src/glsl/glcpp/tests/021-define-func-compose.c.expected b/src/glsl/glcpp/tests/021-define-func-compose.c.expected deleted file mode 100644 index 87f51f0baca..00000000000 --- a/src/glsl/glcpp/tests/021-define-func-compose.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -(2*((1+(3)))) diff --git a/src/glsl/glcpp/tests/022-define-func-arg-with-parens.c b/src/glsl/glcpp/tests/022-define-func-arg-with-parens.c deleted file mode 100644 index c20d73a4a28..00000000000 --- a/src/glsl/glcpp/tests/022-define-func-arg-with-parens.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(x) (x) -foo(argument(including parens)for the win) diff --git a/src/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected b/src/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected deleted file mode 100644 index 1dfc6698bb7..00000000000 --- a/src/glsl/glcpp/tests/022-define-func-arg-with-parens.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -(argument(including parens)for the win) diff --git a/src/glsl/glcpp/tests/023-define-extra-whitespace.c b/src/glsl/glcpp/tests/023-define-extra-whitespace.c deleted file mode 100644 index 7ebfed6516c..00000000000 --- a/src/glsl/glcpp/tests/023-define-extra-whitespace.c +++ /dev/null @@ -1,8 +0,0 @@ -#define noargs() 1 -# define onearg(foo) foo - # define twoargs( x , y ) x y - # define threeargs( a , b , c ) a b c -noargs ( ) -onearg ( 2 ) -twoargs ( 3 , 4 ) -threeargs ( 5 , 6 , 7 ) diff --git a/src/glsl/glcpp/tests/023-define-extra-whitespace.c.expected b/src/glsl/glcpp/tests/023-define-extra-whitespace.c.expected deleted file mode 100644 index 9c58275d0f9..00000000000 --- a/src/glsl/glcpp/tests/023-define-extra-whitespace.c.expected +++ /dev/null @@ -1,8 +0,0 @@ - - - - -1 -2 -3 4 -5 6 7 diff --git a/src/glsl/glcpp/tests/024-define-chain-to-self-recursion.c b/src/glsl/glcpp/tests/024-define-chain-to-self-recursion.c deleted file mode 100644 index e788adce30c..00000000000 --- a/src/glsl/glcpp/tests/024-define-chain-to-self-recursion.c +++ /dev/null @@ -1,3 +0,0 @@ -#define foo foo -#define bar foo -bar diff --git a/src/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected b/src/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected deleted file mode 100644 index 15600af546b..00000000000 --- a/src/glsl/glcpp/tests/024-define-chain-to-self-recursion.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -foo diff --git a/src/glsl/glcpp/tests/025-func-macro-as-non-macro.c b/src/glsl/glcpp/tests/025-func-macro-as-non-macro.c deleted file mode 100644 index b433671d1bf..00000000000 --- a/src/glsl/glcpp/tests/025-func-macro-as-non-macro.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(bar) bar -foo bar diff --git a/src/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected b/src/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected deleted file mode 100644 index 4a59f0520e3..00000000000 --- a/src/glsl/glcpp/tests/025-func-macro-as-non-macro.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -foo bar diff --git a/src/glsl/glcpp/tests/026-define-func-extra-newlines.c b/src/glsl/glcpp/tests/026-define-func-extra-newlines.c deleted file mode 100644 index 0d837405309..00000000000 --- a/src/glsl/glcpp/tests/026-define-func-extra-newlines.c +++ /dev/null @@ -1,6 +0,0 @@ -#define foo(a) bar - -foo -( -1 -) diff --git a/src/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected b/src/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected deleted file mode 100644 index 5e3c70f2cc5..00000000000 --- a/src/glsl/glcpp/tests/026-define-func-extra-newlines.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -bar diff --git a/src/glsl/glcpp/tests/027-define-chain-obj-to-func.c b/src/glsl/glcpp/tests/027-define-chain-obj-to-func.c deleted file mode 100644 index 5ccb52caba5..00000000000 --- a/src/glsl/glcpp/tests/027-define-chain-obj-to-func.c +++ /dev/null @@ -1,3 +0,0 @@ -#define failure() success -#define foo failure() -foo diff --git a/src/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected b/src/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected deleted file mode 100644 index 94c15f95059..00000000000 --- a/src/glsl/glcpp/tests/027-define-chain-obj-to-func.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -success diff --git a/src/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c b/src/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c deleted file mode 100644 index 44962a71876..00000000000 --- a/src/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c +++ /dev/null @@ -1,3 +0,0 @@ -#define success() failure -#define foo success -foo diff --git a/src/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected b/src/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected deleted file mode 100644 index 94c15f95059..00000000000 --- a/src/glsl/glcpp/tests/028-define-chain-obj-to-non-func.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -success diff --git a/src/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c b/src/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c deleted file mode 100644 index 261f7d28fc2..00000000000 --- a/src/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c +++ /dev/null @@ -1,3 +0,0 @@ -#define bar(failure) failure -#define foo bar(success) -foo diff --git a/src/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected b/src/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected deleted file mode 100644 index 94c15f95059..00000000000 --- a/src/glsl/glcpp/tests/029-define-chain-obj-to-func-with-args.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -success diff --git a/src/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c b/src/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c deleted file mode 100644 index e56fbefd62d..00000000000 --- a/src/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c +++ /dev/null @@ -1,4 +0,0 @@ -#define baz(failure) failure -#define bar(failure) failure -#define foo bar(baz(success)) -foo diff --git a/src/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected b/src/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected deleted file mode 100644 index bed826e7831..00000000000 --- a/src/glsl/glcpp/tests/030-define-chain-obj-to-func-compose.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - - - -success diff --git a/src/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c b/src/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c deleted file mode 100644 index 3f4c8744dff..00000000000 --- a/src/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c +++ /dev/null @@ -1,4 +0,0 @@ -#define baz(failure) failure -#define bar(failure) failure -#define foo() bar(baz(success)) -foo() diff --git a/src/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected b/src/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected deleted file mode 100644 index bed826e7831..00000000000 --- a/src/glsl/glcpp/tests/031-define-chain-func-to-func-compose.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - - - -success diff --git a/src/glsl/glcpp/tests/032-define-func-self-recurse.c b/src/glsl/glcpp/tests/032-define-func-self-recurse.c deleted file mode 100644 index b3ac70f499c..00000000000 --- a/src/glsl/glcpp/tests/032-define-func-self-recurse.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(a) foo(2*(a)) -foo(3) diff --git a/src/glsl/glcpp/tests/032-define-func-self-recurse.c.expected b/src/glsl/glcpp/tests/032-define-func-self-recurse.c.expected deleted file mode 100644 index 983f9417401..00000000000 --- a/src/glsl/glcpp/tests/032-define-func-self-recurse.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -foo(2*(3)) diff --git a/src/glsl/glcpp/tests/033-define-func-self-compose.c b/src/glsl/glcpp/tests/033-define-func-self-compose.c deleted file mode 100644 index f65e48286cf..00000000000 --- a/src/glsl/glcpp/tests/033-define-func-self-compose.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(a) foo(2*(a)) -foo(foo(3)) diff --git a/src/glsl/glcpp/tests/033-define-func-self-compose.c.expected b/src/glsl/glcpp/tests/033-define-func-self-compose.c.expected deleted file mode 100644 index 08183623643..00000000000 --- a/src/glsl/glcpp/tests/033-define-func-self-compose.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -foo(2*(foo(2*(3)))) diff --git a/src/glsl/glcpp/tests/034-define-func-self-compose-non-func.c b/src/glsl/glcpp/tests/034-define-func-self-compose-non-func.c deleted file mode 100644 index 209a5f7e07c..00000000000 --- a/src/glsl/glcpp/tests/034-define-func-self-compose-non-func.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(bar) bar -foo(foo) diff --git a/src/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected b/src/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected deleted file mode 100644 index 3f808fe665d..00000000000 --- a/src/glsl/glcpp/tests/034-define-func-self-compose-non-func.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -foo diff --git a/src/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c b/src/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c deleted file mode 100644 index c307fbe830f..00000000000 --- a/src/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(bar) bar -foo(1+foo) diff --git a/src/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected b/src/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected deleted file mode 100644 index 09dfdd64e9b..00000000000 --- a/src/glsl/glcpp/tests/035-define-func-self-compose-non-func-multi-token-argument.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -1+foo diff --git a/src/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c b/src/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c deleted file mode 100644 index b21ff336738..00000000000 --- a/src/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c +++ /dev/null @@ -1,3 +0,0 @@ -#define bar success -#define foo(x) x -foo(more bar) diff --git a/src/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected b/src/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected deleted file mode 100644 index 580ed9599c5..00000000000 --- a/src/glsl/glcpp/tests/036-define-func-non-macro-multi-token-argument.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -more success diff --git a/src/glsl/glcpp/tests/037-finalize-unexpanded-macro.c b/src/glsl/glcpp/tests/037-finalize-unexpanded-macro.c deleted file mode 100644 index b3a2f37f1b9..00000000000 --- a/src/glsl/glcpp/tests/037-finalize-unexpanded-macro.c +++ /dev/null @@ -1,3 +0,0 @@ -#define expand(x) expand(x once) -#define foo(x) x -foo(expand(just)) diff --git a/src/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected b/src/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected deleted file mode 100644 index e804d7e4f9f..00000000000 --- a/src/glsl/glcpp/tests/037-finalize-unexpanded-macro.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -expand(just once) diff --git a/src/glsl/glcpp/tests/038-func-arg-with-commas.c b/src/glsl/glcpp/tests/038-func-arg-with-commas.c deleted file mode 100644 index 1407c7d6e3c..00000000000 --- a/src/glsl/glcpp/tests/038-func-arg-with-commas.c +++ /dev/null @@ -1,2 +0,0 @@ -#define foo(x) success -foo(argument (with,embedded , commas) -- tricky) diff --git a/src/glsl/glcpp/tests/038-func-arg-with-commas.c.expected b/src/glsl/glcpp/tests/038-func-arg-with-commas.c.expected deleted file mode 100644 index 6544adb3a25..00000000000 --- a/src/glsl/glcpp/tests/038-func-arg-with-commas.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -success diff --git a/src/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c b/src/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c deleted file mode 100644 index a7c053bb402..00000000000 --- a/src/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c +++ /dev/null @@ -1,24 +0,0 @@ -/* This works. */ -#define foo(a) (a) -#define bar two,words -foo(bar) - -/* So does this. */ -#define foo2(a,b) (a separate b) -#define foo2_wrap(a) foo2(a) -foo2_wrap(bar) - -/* But this generates an error. */ -#define foo_wrap(a) foo(a) -foo_wrap(bar) - -/* Adding parentheses to foo_wrap fixes it. */ -#define foo_wrap_parens(a) foo((a)) -foo_wrap_parens(bar) - -/* As does adding parentheses to bar */ -#define bar_parens (two,words) -foo_wrap(bar_parens) -foo_wrap_parens(bar_parens) - - diff --git a/src/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected b/src/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected deleted file mode 100644 index 4cc795338b2..00000000000 --- a/src/glsl/glcpp/tests/039-func-arg-obj-macro-with-comma.c.expected +++ /dev/null @@ -1,26 +0,0 @@ -0:12(21): preprocessor error: Error: macro foo invoked with 2 arguments (expected 1) - - - - -(two,words) - - - - -(two separate words) - - - -foo(two,words) - - - -((two,words)) - - - -((two,words)) -(((two,words))) - - diff --git a/src/glsl/glcpp/tests/040-token-pasting.c b/src/glsl/glcpp/tests/040-token-pasting.c deleted file mode 100644 index caab3ba7368..00000000000 --- a/src/glsl/glcpp/tests/040-token-pasting.c +++ /dev/null @@ -1,2 +0,0 @@ -#define paste(a,b) a ## b -paste(one , token) diff --git a/src/glsl/glcpp/tests/040-token-pasting.c.expected b/src/glsl/glcpp/tests/040-token-pasting.c.expected deleted file mode 100644 index 48e836ec3fa..00000000000 --- a/src/glsl/glcpp/tests/040-token-pasting.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -onetoken diff --git a/src/glsl/glcpp/tests/041-if-0.c b/src/glsl/glcpp/tests/041-if-0.c deleted file mode 100644 index 2cab677d3e8..00000000000 --- a/src/glsl/glcpp/tests/041-if-0.c +++ /dev/null @@ -1,5 +0,0 @@ -success_1 -#if 0 -failure -#endif -success_2 diff --git a/src/glsl/glcpp/tests/041-if-0.c.expected b/src/glsl/glcpp/tests/041-if-0.c.expected deleted file mode 100644 index 8b506b32d55..00000000000 --- a/src/glsl/glcpp/tests/041-if-0.c.expected +++ /dev/null @@ -1,5 +0,0 @@ -success_1 - - - -success_2 diff --git a/src/glsl/glcpp/tests/042-if-1.c b/src/glsl/glcpp/tests/042-if-1.c deleted file mode 100644 index 874a25cf41b..00000000000 --- a/src/glsl/glcpp/tests/042-if-1.c +++ /dev/null @@ -1,5 +0,0 @@ -success_1 -#if 1 -success_2 -#endif -success_3 diff --git a/src/glsl/glcpp/tests/042-if-1.c.expected b/src/glsl/glcpp/tests/042-if-1.c.expected deleted file mode 100644 index a6ae9465a97..00000000000 --- a/src/glsl/glcpp/tests/042-if-1.c.expected +++ /dev/null @@ -1,5 +0,0 @@ -success_1 - -success_2 - -success_3 diff --git a/src/glsl/glcpp/tests/043-if-0-else.c b/src/glsl/glcpp/tests/043-if-0-else.c deleted file mode 100644 index 323351f9dbf..00000000000 --- a/src/glsl/glcpp/tests/043-if-0-else.c +++ /dev/null @@ -1,7 +0,0 @@ -success_1 -#if 0 -failure -#else -success_2 -#endif -success_3 diff --git a/src/glsl/glcpp/tests/043-if-0-else.c.expected b/src/glsl/glcpp/tests/043-if-0-else.c.expected deleted file mode 100644 index 3d7e6be96c8..00000000000 --- a/src/glsl/glcpp/tests/043-if-0-else.c.expected +++ /dev/null @@ -1,7 +0,0 @@ -success_1 - - - -success_2 - -success_3 diff --git a/src/glsl/glcpp/tests/044-if-1-else.c b/src/glsl/glcpp/tests/044-if-1-else.c deleted file mode 100644 index 28dfc25c6f0..00000000000 --- a/src/glsl/glcpp/tests/044-if-1-else.c +++ /dev/null @@ -1,7 +0,0 @@ -success_1 -#if 1 -success_2 -#else -failure -#endif -success_3 diff --git a/src/glsl/glcpp/tests/044-if-1-else.c.expected b/src/glsl/glcpp/tests/044-if-1-else.c.expected deleted file mode 100644 index 4a31e1cfa9e..00000000000 --- a/src/glsl/glcpp/tests/044-if-1-else.c.expected +++ /dev/null @@ -1,7 +0,0 @@ -success_1 - -success_2 - - - -success_3 diff --git a/src/glsl/glcpp/tests/045-if-0-elif.c b/src/glsl/glcpp/tests/045-if-0-elif.c deleted file mode 100644 index e50f686d461..00000000000 --- a/src/glsl/glcpp/tests/045-if-0-elif.c +++ /dev/null @@ -1,11 +0,0 @@ -success_1 -#if 0 -failure_1 -#elif 0 -failure_2 -#elif 1 -success_3 -#elif 1 -failure_3 -#endif -success_4 diff --git a/src/glsl/glcpp/tests/045-if-0-elif.c.expected b/src/glsl/glcpp/tests/045-if-0-elif.c.expected deleted file mode 100644 index a9bb1588e4f..00000000000 --- a/src/glsl/glcpp/tests/045-if-0-elif.c.expected +++ /dev/null @@ -1,11 +0,0 @@ -success_1 - - - - - -success_3 - - - -success_4 diff --git a/src/glsl/glcpp/tests/046-if-1-elsif.c b/src/glsl/glcpp/tests/046-if-1-elsif.c deleted file mode 100644 index 130515a01ea..00000000000 --- a/src/glsl/glcpp/tests/046-if-1-elsif.c +++ /dev/null @@ -1,11 +0,0 @@ -success_1 -#if 1 -success_2 -#elif 0 -failure_1 -#elif 1 -failure_2 -#elif 0 -failure_3 -#endif -success_3 diff --git a/src/glsl/glcpp/tests/046-if-1-elsif.c.expected b/src/glsl/glcpp/tests/046-if-1-elsif.c.expected deleted file mode 100644 index a4995713ca5..00000000000 --- a/src/glsl/glcpp/tests/046-if-1-elsif.c.expected +++ /dev/null @@ -1,11 +0,0 @@ -success_1 - -success_2 - - - - - - - -success_3 diff --git a/src/glsl/glcpp/tests/047-if-elif-else.c b/src/glsl/glcpp/tests/047-if-elif-else.c deleted file mode 100644 index e8f0838a9ed..00000000000 --- a/src/glsl/glcpp/tests/047-if-elif-else.c +++ /dev/null @@ -1,11 +0,0 @@ -success_1 -#if 0 -failure_1 -#elif 0 -failure_2 -#elif 0 -failure_3 -#else -success_2 -#endif -success_3 diff --git a/src/glsl/glcpp/tests/047-if-elif-else.c.expected b/src/glsl/glcpp/tests/047-if-elif-else.c.expected deleted file mode 100644 index 54d30861197..00000000000 --- a/src/glsl/glcpp/tests/047-if-elif-else.c.expected +++ /dev/null @@ -1,11 +0,0 @@ -success_1 - - - - - - - -success_2 - -success_3 diff --git a/src/glsl/glcpp/tests/048-if-nested.c b/src/glsl/glcpp/tests/048-if-nested.c deleted file mode 100644 index fc4679c3be4..00000000000 --- a/src/glsl/glcpp/tests/048-if-nested.c +++ /dev/null @@ -1,11 +0,0 @@ -success_1 -#if 0 -failure_1 -#if 1 -failure_2 -#else -failure_3 -#endif -failure_4 -#endif -success_2 diff --git a/src/glsl/glcpp/tests/048-if-nested.c.expected b/src/glsl/glcpp/tests/048-if-nested.c.expected deleted file mode 100644 index 8beb9c32c37..00000000000 --- a/src/glsl/glcpp/tests/048-if-nested.c.expected +++ /dev/null @@ -1,11 +0,0 @@ -success_1 - - - - - - - - - -success_2 diff --git a/src/glsl/glcpp/tests/049-if-expression-precedence.c b/src/glsl/glcpp/tests/049-if-expression-precedence.c deleted file mode 100644 index 833ea03882a..00000000000 --- a/src/glsl/glcpp/tests/049-if-expression-precedence.c +++ /dev/null @@ -1,5 +0,0 @@ -#if 1 + 2 * 3 + - (25 % 17 - + 1) -failure with operator precedence -#else -success -#endif diff --git a/src/glsl/glcpp/tests/049-if-expression-precedence.c.expected b/src/glsl/glcpp/tests/049-if-expression-precedence.c.expected deleted file mode 100644 index 729bdd15f80..00000000000 --- a/src/glsl/glcpp/tests/049-if-expression-precedence.c.expected +++ /dev/null @@ -1,5 +0,0 @@ - - - -success - diff --git a/src/glsl/glcpp/tests/050-if-defined.c b/src/glsl/glcpp/tests/050-if-defined.c deleted file mode 100644 index 34f0f95140e..00000000000 --- a/src/glsl/glcpp/tests/050-if-defined.c +++ /dev/null @@ -1,17 +0,0 @@ -#if defined foo -failure_1 -#else -success_1 -#endif -#define foo -#if defined foo -success_2 -#else -failure_2 -#endif -#undef foo -#if defined foo -failure_3 -#else -success_3 -#endif diff --git a/src/glsl/glcpp/tests/050-if-defined.c.expected b/src/glsl/glcpp/tests/050-if-defined.c.expected deleted file mode 100644 index 737eb8d9403..00000000000 --- a/src/glsl/glcpp/tests/050-if-defined.c.expected +++ /dev/null @@ -1,17 +0,0 @@ - - - -success_1 - - - -success_2 - - - - - - - -success_3 - diff --git a/src/glsl/glcpp/tests/051-if-relational.c b/src/glsl/glcpp/tests/051-if-relational.c deleted file mode 100644 index c3db488e0de..00000000000 --- a/src/glsl/glcpp/tests/051-if-relational.c +++ /dev/null @@ -1,35 +0,0 @@ -#if 3 < 2 -failure_1 -#else -success_1 -#endif - -#if 3 >= 2 -success_2 -#else -failure_2 -#endif - -#if 2 + 3 <= 5 -success_3 -#else -failure_3 -#endif - -#if 3 - 2 == 1 -success_3 -#else -failure_3 -#endif - -#if 1 > 3 -failure_4 -#else -success_4 -#endif - -#if 1 != 5 -success_5 -#else -failure_5 -#endif diff --git a/src/glsl/glcpp/tests/051-if-relational.c.expected b/src/glsl/glcpp/tests/051-if-relational.c.expected deleted file mode 100644 index 652fefdd43b..00000000000 --- a/src/glsl/glcpp/tests/051-if-relational.c.expected +++ /dev/null @@ -1,35 +0,0 @@ - - - -success_1 - - - -success_2 - - - - - -success_3 - - - - - -success_3 - - - - - - - -success_4 - - - -success_5 - - - diff --git a/src/glsl/glcpp/tests/052-if-bitwise.c b/src/glsl/glcpp/tests/052-if-bitwise.c deleted file mode 100644 index 2d8e45eb61e..00000000000 --- a/src/glsl/glcpp/tests/052-if-bitwise.c +++ /dev/null @@ -1,20 +0,0 @@ -#if (0xaaaaaaaa | 0x55555555) != 4294967295 -failure_1 -#else -success_1 -#endif -#if (0x12345678 ^ 0xfdecba98) == 4023971040 -success_2 -#else -failure_2 -#endif -#if (~ 0xdeadbeef) != -3735928560 -failure_3 -#else -success_3 -#endif -#if (0667 & 0733) == 403 -success_4 -#else -failure_4 -#endif diff --git a/src/glsl/glcpp/tests/052-if-bitwise.c.expected b/src/glsl/glcpp/tests/052-if-bitwise.c.expected deleted file mode 100644 index 44e52b206e5..00000000000 --- a/src/glsl/glcpp/tests/052-if-bitwise.c.expected +++ /dev/null @@ -1,20 +0,0 @@ - - - -success_1 - - -success_2 - - - - - - -success_3 - - -success_4 - - - diff --git a/src/glsl/glcpp/tests/053-if-divide-and-shift.c b/src/glsl/glcpp/tests/053-if-divide-and-shift.c deleted file mode 100644 index d24c54a88d1..00000000000 --- a/src/glsl/glcpp/tests/053-if-divide-and-shift.c +++ /dev/null @@ -1,15 +0,0 @@ -#if (15 / 2) != 7 -failure_1 -#else -success_1 -#endif -#if (1 << 12) == 4096 -success_2 -#else -failure_2 -#endif -#if (31762 >> 8) != 124 -failure_3 -#else -success_3 -#endif diff --git a/src/glsl/glcpp/tests/053-if-divide-and-shift.c.expected b/src/glsl/glcpp/tests/053-if-divide-and-shift.c.expected deleted file mode 100644 index 7e78e0454e0..00000000000 --- a/src/glsl/glcpp/tests/053-if-divide-and-shift.c.expected +++ /dev/null @@ -1,15 +0,0 @@ - - - -success_1 - - -success_2 - - - - - - -success_3 - diff --git a/src/glsl/glcpp/tests/054-if-with-macros.c b/src/glsl/glcpp/tests/054-if-with-macros.c deleted file mode 100644 index 3da79a0d96e..00000000000 --- a/src/glsl/glcpp/tests/054-if-with-macros.c +++ /dev/null @@ -1,34 +0,0 @@ -#define one 1 -#define two 2 -#define three 3 -#define five 5 -#if five < two -failure_1 -#else -success_1 -#endif -#if three >= two -success_2 -#else -failure_2 -#endif -#if two + three <= five -success_3 -#else -failure_3 -#endif -#if five - two == three -success_4 -#else -failure_4 -#endif -#if one > three -failure_5 -#else -success_5 -#endif -#if one != five -success_6 -#else -failure_6 -#endif diff --git a/src/glsl/glcpp/tests/054-if-with-macros.c.expected b/src/glsl/glcpp/tests/054-if-with-macros.c.expected deleted file mode 100644 index 70f737c90a9..00000000000 --- a/src/glsl/glcpp/tests/054-if-with-macros.c.expected +++ /dev/null @@ -1,34 +0,0 @@ - - - - - - - -success_1 - - -success_2 - - - - -success_3 - - - - -success_4 - - - - - - -success_5 - - -success_6 - - - diff --git a/src/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c b/src/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c deleted file mode 100644 index 00f2c2346d6..00000000000 --- a/src/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c +++ /dev/null @@ -1,3 +0,0 @@ -#define failure() success -#define foo failure -foo() diff --git a/src/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected b/src/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected deleted file mode 100644 index 94c15f95059..00000000000 --- a/src/glsl/glcpp/tests/055-define-chain-obj-to-func-parens-in-text.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -success diff --git a/src/glsl/glcpp/tests/056-macro-argument-with-comma.c b/src/glsl/glcpp/tests/056-macro-argument-with-comma.c deleted file mode 100644 index 58701d1f25b..00000000000 --- a/src/glsl/glcpp/tests/056-macro-argument-with-comma.c +++ /dev/null @@ -1,4 +0,0 @@ -#define bar with,embedded,commas -#define function(x) success -#define foo function -foo(bar) diff --git a/src/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected b/src/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected deleted file mode 100644 index bed826e7831..00000000000 --- a/src/glsl/glcpp/tests/056-macro-argument-with-comma.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - - - -success diff --git a/src/glsl/glcpp/tests/057-empty-arguments.c b/src/glsl/glcpp/tests/057-empty-arguments.c deleted file mode 100644 index 6140232865d..00000000000 --- a/src/glsl/glcpp/tests/057-empty-arguments.c +++ /dev/null @@ -1,6 +0,0 @@ -#define zero() success -zero() -#define one(x) success -one() -#define two(x,y) success -two(,) diff --git a/src/glsl/glcpp/tests/057-empty-arguments.c.expected b/src/glsl/glcpp/tests/057-empty-arguments.c.expected deleted file mode 100644 index 7d97e15e29d..00000000000 --- a/src/glsl/glcpp/tests/057-empty-arguments.c.expected +++ /dev/null @@ -1,6 +0,0 @@ - -success - -success - -success diff --git a/src/glsl/glcpp/tests/058-token-pasting-empty-arguments.c b/src/glsl/glcpp/tests/058-token-pasting-empty-arguments.c deleted file mode 100644 index 8ac260c76b6..00000000000 --- a/src/glsl/glcpp/tests/058-token-pasting-empty-arguments.c +++ /dev/null @@ -1,5 +0,0 @@ -#define paste(x,y) x ## y -paste(a,b) -paste(a,) -paste(,b) -paste(,) diff --git a/src/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected b/src/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected deleted file mode 100644 index e0967a1b951..00000000000 --- a/src/glsl/glcpp/tests/058-token-pasting-empty-arguments.c.expected +++ /dev/null @@ -1,5 +0,0 @@ - -ab -a -b - diff --git a/src/glsl/glcpp/tests/059-token-pasting-integer.c b/src/glsl/glcpp/tests/059-token-pasting-integer.c deleted file mode 100644 index 37b895a4237..00000000000 --- a/src/glsl/glcpp/tests/059-token-pasting-integer.c +++ /dev/null @@ -1,4 +0,0 @@ -#define paste(x,y) x ## y -paste(1,2) -paste(1,000) -paste(identifier,2) diff --git a/src/glsl/glcpp/tests/059-token-pasting-integer.c.expected b/src/glsl/glcpp/tests/059-token-pasting-integer.c.expected deleted file mode 100644 index f1288aa7cb7..00000000000 --- a/src/glsl/glcpp/tests/059-token-pasting-integer.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - -12 -1000 -identifier2 diff --git a/src/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c b/src/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c deleted file mode 100644 index ed80ea879ce..00000000000 --- a/src/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c +++ /dev/null @@ -1,3 +0,0 @@ -#define double(a) a*2 -#define foo double( -foo 5) diff --git a/src/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected b/src/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected deleted file mode 100644 index 3e5501aa6e8..00000000000 --- a/src/glsl/glcpp/tests/060-left-paren-in-macro-right-paren-in-text.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -5*2 diff --git a/src/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c b/src/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c deleted file mode 100644 index 6dbfd1f62d1..00000000000 --- a/src/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c +++ /dev/null @@ -1,5 +0,0 @@ -#define foo(x) success -#define bar foo -#define baz bar -#define joe baz -joe (failure) diff --git a/src/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected b/src/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected deleted file mode 100644 index 15eb64b97f1..00000000000 --- a/src/glsl/glcpp/tests/061-define-chain-obj-to-func-multi.c.expected +++ /dev/null @@ -1,5 +0,0 @@ - - - - -success diff --git a/src/glsl/glcpp/tests/062-if-0-skips-garbage.c b/src/glsl/glcpp/tests/062-if-0-skips-garbage.c deleted file mode 100644 index d9e439bb890..00000000000 --- a/src/glsl/glcpp/tests/062-if-0-skips-garbage.c +++ /dev/null @@ -1,5 +0,0 @@ -#define foo(a,b) -#if 0 -foo(bar) -foo( -#endif diff --git a/src/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected b/src/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected deleted file mode 100644 index 3f2ff2d6cc8..00000000000 --- a/src/glsl/glcpp/tests/062-if-0-skips-garbage.c.expected +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/src/glsl/glcpp/tests/063-comments.c b/src/glsl/glcpp/tests/063-comments.c deleted file mode 100644 index e641d2f0f9e..00000000000 --- a/src/glsl/glcpp/tests/063-comments.c +++ /dev/null @@ -1,20 +0,0 @@ -/* this is a comment */ -// so is this -// */ -f = g/**//h; -/*//*/l(); -m = n//**/o -+ p; -/* this -comment spans -multiple lines and -contains *** stars -and slashes / *** / -and other stuff. -****/ -more code here -/* Test that /* nested - comments */ -are not treated like comments. -/*/ this is a comment */ -/*/*/ diff --git a/src/glsl/glcpp/tests/063-comments.c.expected b/src/glsl/glcpp/tests/063-comments.c.expected deleted file mode 100644 index f6e10ce0377..00000000000 --- a/src/glsl/glcpp/tests/063-comments.c.expected +++ /dev/null @@ -1,20 +0,0 @@ - - - -f = g /h; - l(); -m = n -+ p; - - - - - - - -more code here - - -are not treated like comments. - - diff --git a/src/glsl/glcpp/tests/064-version.c b/src/glsl/glcpp/tests/064-version.c deleted file mode 100644 index 21326481b87..00000000000 --- a/src/glsl/glcpp/tests/064-version.c +++ /dev/null @@ -1,2 +0,0 @@ -#version 130 -#define FOO diff --git a/src/glsl/glcpp/tests/064-version.c.expected b/src/glsl/glcpp/tests/064-version.c.expected deleted file mode 100644 index 4036b1ee374..00000000000 --- a/src/glsl/glcpp/tests/064-version.c.expected +++ /dev/null @@ -1,2 +0,0 @@ -#version 130 - diff --git a/src/glsl/glcpp/tests/065-if-defined-parens.c b/src/glsl/glcpp/tests/065-if-defined-parens.c deleted file mode 100644 index 48aa0f8c3ef..00000000000 --- a/src/glsl/glcpp/tests/065-if-defined-parens.c +++ /dev/null @@ -1,17 +0,0 @@ -#if defined(foo) -failure_1 -#else -success_1 -#endif -#define foo -#if defined ( foo ) -success_2 -#else -failure_2 -#endif -#undef foo -#if defined (foo) -failure_3 -#else -success_3 -#endif diff --git a/src/glsl/glcpp/tests/065-if-defined-parens.c.expected b/src/glsl/glcpp/tests/065-if-defined-parens.c.expected deleted file mode 100644 index 737eb8d9403..00000000000 --- a/src/glsl/glcpp/tests/065-if-defined-parens.c.expected +++ /dev/null @@ -1,17 +0,0 @@ - - - -success_1 - - - -success_2 - - - - - - - -success_3 - diff --git a/src/glsl/glcpp/tests/066-if-nospace-expression.c b/src/glsl/glcpp/tests/066-if-nospace-expression.c deleted file mode 100644 index 3b0b47349d0..00000000000 --- a/src/glsl/glcpp/tests/066-if-nospace-expression.c +++ /dev/null @@ -1,3 +0,0 @@ -#if(1) -success -#endif diff --git a/src/glsl/glcpp/tests/066-if-nospace-expression.c.expected b/src/glsl/glcpp/tests/066-if-nospace-expression.c.expected deleted file mode 100644 index 5a28fb3b66c..00000000000 --- a/src/glsl/glcpp/tests/066-if-nospace-expression.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - -success - diff --git a/src/glsl/glcpp/tests/067-nested-ifdef-ifndef.c b/src/glsl/glcpp/tests/067-nested-ifdef-ifndef.c deleted file mode 100644 index f46cce4e60a..00000000000 --- a/src/glsl/glcpp/tests/067-nested-ifdef-ifndef.c +++ /dev/null @@ -1,40 +0,0 @@ -#define D1 -#define D2 - -#define result success - -#ifdef U1 -#ifdef U2 -#undef result -#define result failure -#endif -#endif -result - -#ifndef D1 -#ifndef D2 -#undef result -#define result failure -#endif -#endif -result - -#undef result -#define result failure -#ifdef D1 -#ifdef D2 -#undef result -#define result success -#endif -#endif -result - -#undef result -#define result failure -#ifndef U1 -#ifndef U2 -#undef result -#define result success -#endif -#endif -result diff --git a/src/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected b/src/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected deleted file mode 100644 index 9a5ed2eb2d3..00000000000 --- a/src/glsl/glcpp/tests/067-nested-ifdef-ifndef.c.expected +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - - - - - -success - - - - - - - -success - - - - - - - - - -success - - - - - - - - - -success diff --git a/src/glsl/glcpp/tests/068-accidental-pasting.c b/src/glsl/glcpp/tests/068-accidental-pasting.c deleted file mode 100644 index 699ac5144e5..00000000000 --- a/src/glsl/glcpp/tests/068-accidental-pasting.c +++ /dev/null @@ -1,11 +0,0 @@ -#define empty -empty> ->empty= -=empty= -!empty= -&empty& -|empty| -+empty+ --empty- diff --git a/src/glsl/glcpp/tests/068-accidental-pasting.c.expected b/src/glsl/glcpp/tests/068-accidental-pasting.c.expected deleted file mode 100644 index 27582cda5e8..00000000000 --- a/src/glsl/glcpp/tests/068-accidental-pasting.c.expected +++ /dev/null @@ -1,11 +0,0 @@ - -< < -< = -> > -> = -= = -! = -& & -| | -+ + -- - diff --git a/src/glsl/glcpp/tests/069-repeated-argument.c b/src/glsl/glcpp/tests/069-repeated-argument.c deleted file mode 100644 index 2b46ead294c..00000000000 --- a/src/glsl/glcpp/tests/069-repeated-argument.c +++ /dev/null @@ -1,2 +0,0 @@ -#define double(x) x x -double(1) diff --git a/src/glsl/glcpp/tests/069-repeated-argument.c.expected b/src/glsl/glcpp/tests/069-repeated-argument.c.expected deleted file mode 100644 index 8b4b095e488..00000000000 --- a/src/glsl/glcpp/tests/069-repeated-argument.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -1 1 diff --git a/src/glsl/glcpp/tests/070-undefined-macro-in-expression.c b/src/glsl/glcpp/tests/070-undefined-macro-in-expression.c deleted file mode 100644 index d15a4840b01..00000000000 --- a/src/glsl/glcpp/tests/070-undefined-macro-in-expression.c +++ /dev/null @@ -1,5 +0,0 @@ -#if UNDEFINED_MACRO -Failure -#else -Success -#endif diff --git a/src/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected b/src/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected deleted file mode 100644 index 44b93a434fa..00000000000 --- a/src/glsl/glcpp/tests/070-undefined-macro-in-expression.c.expected +++ /dev/null @@ -1,5 +0,0 @@ - - - -Success - diff --git a/src/glsl/glcpp/tests/071-punctuator.c b/src/glsl/glcpp/tests/071-punctuator.c deleted file mode 100644 index 959d6825988..00000000000 --- a/src/glsl/glcpp/tests/071-punctuator.c +++ /dev/null @@ -1 +0,0 @@ -a = b diff --git a/src/glsl/glcpp/tests/071-punctuator.c.expected b/src/glsl/glcpp/tests/071-punctuator.c.expected deleted file mode 100644 index 959d6825988..00000000000 --- a/src/glsl/glcpp/tests/071-punctuator.c.expected +++ /dev/null @@ -1 +0,0 @@ -a = b diff --git a/src/glsl/glcpp/tests/072-token-pasting-same-line.c b/src/glsl/glcpp/tests/072-token-pasting-same-line.c deleted file mode 100644 index e421e9d5e29..00000000000 --- a/src/glsl/glcpp/tests/072-token-pasting-same-line.c +++ /dev/null @@ -1,2 +0,0 @@ -#define paste(x) success_ ## x -paste(1) paste(2) paste(3) diff --git a/src/glsl/glcpp/tests/072-token-pasting-same-line.c.expected b/src/glsl/glcpp/tests/072-token-pasting-same-line.c.expected deleted file mode 100644 index 7b80af7e465..00000000000 --- a/src/glsl/glcpp/tests/072-token-pasting-same-line.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -success_1 success_2 success_3 diff --git a/src/glsl/glcpp/tests/073-if-in-ifdef.c b/src/glsl/glcpp/tests/073-if-in-ifdef.c deleted file mode 100644 index 61a48097ca3..00000000000 --- a/src/glsl/glcpp/tests/073-if-in-ifdef.c +++ /dev/null @@ -1,4 +0,0 @@ -#ifdef UNDEF -#if UNDEF > 1 -#endif -#endif diff --git a/src/glsl/glcpp/tests/073-if-in-ifdef.c.expected b/src/glsl/glcpp/tests/073-if-in-ifdef.c.expected deleted file mode 100644 index fd40910d9e7..00000000000 --- a/src/glsl/glcpp/tests/073-if-in-ifdef.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/src/glsl/glcpp/tests/074-elif-undef.c b/src/glsl/glcpp/tests/074-elif-undef.c deleted file mode 100644 index 67aac8977e0..00000000000 --- a/src/glsl/glcpp/tests/074-elif-undef.c +++ /dev/null @@ -1,3 +0,0 @@ -#ifndef UNDEF -#elif UNDEF < 0 -#endif diff --git a/src/glsl/glcpp/tests/074-elif-undef.c.expected b/src/glsl/glcpp/tests/074-elif-undef.c.expected deleted file mode 100644 index b28b04f6431..00000000000 --- a/src/glsl/glcpp/tests/074-elif-undef.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/src/glsl/glcpp/tests/075-elif-elif-undef.c b/src/glsl/glcpp/tests/075-elif-elif-undef.c deleted file mode 100644 index 264bc4f10ee..00000000000 --- a/src/glsl/glcpp/tests/075-elif-elif-undef.c +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef UNDEF -#elif UNDEF < 0 -#elif UNDEF == 3 -#endif diff --git a/src/glsl/glcpp/tests/075-elif-elif-undef.c.expected b/src/glsl/glcpp/tests/075-elif-elif-undef.c.expected deleted file mode 100644 index fd40910d9e7..00000000000 --- a/src/glsl/glcpp/tests/075-elif-elif-undef.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - - - - diff --git a/src/glsl/glcpp/tests/076-elif-undef-nested.c b/src/glsl/glcpp/tests/076-elif-undef-nested.c deleted file mode 100644 index ebd550ed005..00000000000 --- a/src/glsl/glcpp/tests/076-elif-undef-nested.c +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef UNDEF -#if UNDEF == 4 -#elif UNDEF == 5 -#endif -#endif diff --git a/src/glsl/glcpp/tests/076-elif-undef-nested.c.expected b/src/glsl/glcpp/tests/076-elif-undef-nested.c.expected deleted file mode 100644 index 3f2ff2d6cc8..00000000000 --- a/src/glsl/glcpp/tests/076-elif-undef-nested.c.expected +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/src/glsl/glcpp/tests/077-else-without-if.c b/src/glsl/glcpp/tests/077-else-without-if.c deleted file mode 100644 index 81f00bfe278..00000000000 --- a/src/glsl/glcpp/tests/077-else-without-if.c +++ /dev/null @@ -1 +0,0 @@ -#else diff --git a/src/glsl/glcpp/tests/077-else-without-if.c.expected b/src/glsl/glcpp/tests/077-else-without-if.c.expected deleted file mode 100644 index 69f34047033..00000000000 --- a/src/glsl/glcpp/tests/077-else-without-if.c.expected +++ /dev/null @@ -1,3 +0,0 @@ -0:1(1): preprocessor error: #else without #if - - diff --git a/src/glsl/glcpp/tests/078-elif-without-if.c b/src/glsl/glcpp/tests/078-elif-without-if.c deleted file mode 100644 index 60466b3890a..00000000000 --- a/src/glsl/glcpp/tests/078-elif-without-if.c +++ /dev/null @@ -1 +0,0 @@ -#elif defined FOO diff --git a/src/glsl/glcpp/tests/078-elif-without-if.c.expected b/src/glsl/glcpp/tests/078-elif-without-if.c.expected deleted file mode 100644 index b8e40ecc09b..00000000000 --- a/src/glsl/glcpp/tests/078-elif-without-if.c.expected +++ /dev/null @@ -1,3 +0,0 @@ -0:1(1): preprocessor error: #elif without #if - - diff --git a/src/glsl/glcpp/tests/079-endif-without-if.c b/src/glsl/glcpp/tests/079-endif-without-if.c deleted file mode 100644 index 69331c3ca9d..00000000000 --- a/src/glsl/glcpp/tests/079-endif-without-if.c +++ /dev/null @@ -1 +0,0 @@ -#endif diff --git a/src/glsl/glcpp/tests/079-endif-without-if.c.expected b/src/glsl/glcpp/tests/079-endif-without-if.c.expected deleted file mode 100644 index 7ae579dd25e..00000000000 --- a/src/glsl/glcpp/tests/079-endif-without-if.c.expected +++ /dev/null @@ -1,3 +0,0 @@ -0:1(1): preprocessor error: #endif without #if - - diff --git a/src/glsl/glcpp/tests/080-if-without-expression.c b/src/glsl/glcpp/tests/080-if-without-expression.c deleted file mode 100644 index a27ba36a366..00000000000 --- a/src/glsl/glcpp/tests/080-if-without-expression.c +++ /dev/null @@ -1,4 +0,0 @@ -/* Error message for unskipped #if with no expression. */ -#if -#endif - diff --git a/src/glsl/glcpp/tests/080-if-without-expression.c.expected b/src/glsl/glcpp/tests/080-if-without-expression.c.expected deleted file mode 100644 index 2e4cd7323c5..00000000000 --- a/src/glsl/glcpp/tests/080-if-without-expression.c.expected +++ /dev/null @@ -1,5 +0,0 @@ -0:2(1): preprocessor error: #if with no expression - - - - diff --git a/src/glsl/glcpp/tests/081-elif-without-expression.c b/src/glsl/glcpp/tests/081-elif-without-expression.c deleted file mode 100644 index 79c78663dd3..00000000000 --- a/src/glsl/glcpp/tests/081-elif-without-expression.c +++ /dev/null @@ -1,3 +0,0 @@ -#if 0 -#elif -#endif diff --git a/src/glsl/glcpp/tests/081-elif-without-expression.c.expected b/src/glsl/glcpp/tests/081-elif-without-expression.c.expected deleted file mode 100644 index b607b849068..00000000000 --- a/src/glsl/glcpp/tests/081-elif-without-expression.c.expected +++ /dev/null @@ -1,4 +0,0 @@ -0:2(1): preprocessor error: #elif with no expression - - - diff --git a/src/glsl/glcpp/tests/082-invalid-paste.c b/src/glsl/glcpp/tests/082-invalid-paste.c deleted file mode 100644 index 8b84d50c3a0..00000000000 --- a/src/glsl/glcpp/tests/082-invalid-paste.c +++ /dev/null @@ -1,7 +0,0 @@ -#define PASTE(x,y) x ## y -PASTE(<,>) -PASTE(0,abc) -PASTE(1,=) -PASTE(2,@) -PASTE(3,-4) -PASTE(4,+5.2) diff --git a/src/glsl/glcpp/tests/082-invalid-paste.c.expected b/src/glsl/glcpp/tests/082-invalid-paste.c.expected deleted file mode 100644 index b48a2d6d296..00000000000 --- a/src/glsl/glcpp/tests/082-invalid-paste.c.expected +++ /dev/null @@ -1,19 +0,0 @@ -0:2(7): preprocessor error: -Pasting "<" and ">" does not give a valid preprocessing token. -0:3(7): preprocessor error: -Pasting "0" and "abc" does not give a valid preprocessing token. -0:4(7): preprocessor error: -Pasting "1" and "=" does not give a valid preprocessing token. -0:5(7): preprocessor error: -Pasting "2" and "@" does not give a valid preprocessing token. -0:6(7): preprocessor error: -Pasting "3" and "-" does not give a valid preprocessing token. -0:7(7): preprocessor error: -Pasting "4" and "+" does not give a valid preprocessing token. - -< -0 -1 -2 -34 -45.2 diff --git a/src/glsl/glcpp/tests/083-unterminated-if.c b/src/glsl/glcpp/tests/083-unterminated-if.c deleted file mode 100644 index 91806350927..00000000000 --- a/src/glsl/glcpp/tests/083-unterminated-if.c +++ /dev/null @@ -1,2 +0,0 @@ -#if 1 - diff --git a/src/glsl/glcpp/tests/083-unterminated-if.c.expected b/src/glsl/glcpp/tests/083-unterminated-if.c.expected deleted file mode 100644 index 4659ab6fe67..00000000000 --- a/src/glsl/glcpp/tests/083-unterminated-if.c.expected +++ /dev/null @@ -1,4 +0,0 @@ -0:1(6): preprocessor error: Unterminated #if - - - diff --git a/src/glsl/glcpp/tests/084-unbalanced-parentheses.c b/src/glsl/glcpp/tests/084-unbalanced-parentheses.c deleted file mode 100644 index 0789ba5e525..00000000000 --- a/src/glsl/glcpp/tests/084-unbalanced-parentheses.c +++ /dev/null @@ -1,2 +0,0 @@ -#define FUNC(x) (2*(x)) -FUNC(23 diff --git a/src/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected b/src/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected deleted file mode 100644 index af49a37369d..00000000000 --- a/src/glsl/glcpp/tests/084-unbalanced-parentheses.c.expected +++ /dev/null @@ -1,2 +0,0 @@ -0:2(8): preprocessor error: syntax error, unexpected $end - diff --git a/src/glsl/glcpp/tests/085-incorrect-argument-count.c b/src/glsl/glcpp/tests/085-incorrect-argument-count.c deleted file mode 100644 index 91bea600612..00000000000 --- a/src/glsl/glcpp/tests/085-incorrect-argument-count.c +++ /dev/null @@ -1,5 +0,0 @@ -#define MULT(x,y) ((x)*(y)) -MULT() -MULT(1) -MULT(1,2,3) - diff --git a/src/glsl/glcpp/tests/085-incorrect-argument-count.c.expected b/src/glsl/glcpp/tests/085-incorrect-argument-count.c.expected deleted file mode 100644 index d23845bfd49..00000000000 --- a/src/glsl/glcpp/tests/085-incorrect-argument-count.c.expected +++ /dev/null @@ -1,11 +0,0 @@ -0:2(1): preprocessor error: Error: macro MULT invoked with 1 arguments (expected 2) - -0:3(1): preprocessor error: Error: macro MULT invoked with 1 arguments (expected 2) - -0:4(1): preprocessor error: Error: macro MULT invoked with 3 arguments (expected 2) - - -MULT() -MULT(1) -MULT(1,2,3) - diff --git a/src/glsl/glcpp/tests/086-reserved-macro-names.c b/src/glsl/glcpp/tests/086-reserved-macro-names.c deleted file mode 100644 index a6b7201f95d..00000000000 --- a/src/glsl/glcpp/tests/086-reserved-macro-names.c +++ /dev/null @@ -1,3 +0,0 @@ -#define __BAD reserved -#define GL_ALSO_BAD() also reserved -#define THIS__TOO__IS__BAD reserved diff --git a/src/glsl/glcpp/tests/086-reserved-macro-names.c.expected b/src/glsl/glcpp/tests/086-reserved-macro-names.c.expected deleted file mode 100644 index 38b089daec3..00000000000 --- a/src/glsl/glcpp/tests/086-reserved-macro-names.c.expected +++ /dev/null @@ -1,9 +0,0 @@ -0:1(9): preprocessor warning: Macro names containing "__" are reserved for use by the implementation. - -0:2(9): preprocessor error: Macro names starting with "GL_" are reserved. - -0:3(9): preprocessor warning: Macro names containing "__" are reserved for use by the implementation. - - - - diff --git a/src/glsl/glcpp/tests/087-if-comments.c b/src/glsl/glcpp/tests/087-if-comments.c deleted file mode 100644 index ce8dc43057f..00000000000 --- a/src/glsl/glcpp/tests/087-if-comments.c +++ /dev/null @@ -1,5 +0,0 @@ -#if (1 == 0) // dangerous comment -fail -#else -win -#endif diff --git a/src/glsl/glcpp/tests/087-if-comments.c.expected b/src/glsl/glcpp/tests/087-if-comments.c.expected deleted file mode 100644 index 2783a9c14d5..00000000000 --- a/src/glsl/glcpp/tests/087-if-comments.c.expected +++ /dev/null @@ -1,5 +0,0 @@ - - - -win - diff --git a/src/glsl/glcpp/tests/088-redefine-macro-legitimate.c b/src/glsl/glcpp/tests/088-redefine-macro-legitimate.c deleted file mode 100644 index 422c6546414..00000000000 --- a/src/glsl/glcpp/tests/088-redefine-macro-legitimate.c +++ /dev/null @@ -1,5 +0,0 @@ -#define abc 123 -#define abc 123 - -#define foo(x) ( x ) + 23 -#define foo(x) ( x ) + 23 diff --git a/src/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected b/src/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected deleted file mode 100644 index 3f2ff2d6cc8..00000000000 --- a/src/glsl/glcpp/tests/088-redefine-macro-legitimate.c.expected +++ /dev/null @@ -1,5 +0,0 @@ - - - - - diff --git a/src/glsl/glcpp/tests/089-redefine-macro-error.c b/src/glsl/glcpp/tests/089-redefine-macro-error.c deleted file mode 100644 index b3d1391e160..00000000000 --- a/src/glsl/glcpp/tests/089-redefine-macro-error.c +++ /dev/null @@ -1,17 +0,0 @@ -#define x y -#define x z - -#define abc 123 -#define abc() 123 - -#define foo() bar -#define foo(x) bar - -#define bar() baz -#define bar baz - -#define biff(a,b) a+b -#define biff(a,b,c) a+b - -#define oper(a,b) a+b -#define oper(a,b) a*b diff --git a/src/glsl/glcpp/tests/089-redefine-macro-error.c.expected b/src/glsl/glcpp/tests/089-redefine-macro-error.c.expected deleted file mode 100644 index a945161497f..00000000000 --- a/src/glsl/glcpp/tests/089-redefine-macro-error.c.expected +++ /dev/null @@ -1,29 +0,0 @@ -0:2(9): preprocessor error: Redefinition of macro x - -0:5(9): preprocessor error: Redefinition of macro abc - -0:8(9): preprocessor error: Redefinition of macro foo - -0:11(9): preprocessor error: Redefinition of macro bar - -0:14(9): preprocessor error: Redefinition of macro biff - -0:17(9): preprocessor error: Redefinition of macro oper - - - - - - - - - - - - - - - - - - diff --git a/src/glsl/glcpp/tests/090-hash-error.c b/src/glsl/glcpp/tests/090-hash-error.c deleted file mode 100644 index d19bb7faed8..00000000000 --- a/src/glsl/glcpp/tests/090-hash-error.c +++ /dev/null @@ -1 +0,0 @@ -#error human error diff --git a/src/glsl/glcpp/tests/090-hash-error.c.expected b/src/glsl/glcpp/tests/090-hash-error.c.expected deleted file mode 100644 index 876a6ea9cc5..00000000000 --- a/src/glsl/glcpp/tests/090-hash-error.c.expected +++ /dev/null @@ -1 +0,0 @@ -0:1(1): preprocessor error: #error human error diff --git a/src/glsl/glcpp/tests/091-hash-line.c b/src/glsl/glcpp/tests/091-hash-line.c deleted file mode 100644 index 26d70382a89..00000000000 --- a/src/glsl/glcpp/tests/091-hash-line.c +++ /dev/null @@ -1,14 +0,0 @@ -#line 0 -#error line 0 error -#line 25 -#error line 25 error -#line 0 1 -#error source 1, line 0 error -#line 30 2 -#error source 2, line 30 error -#line 45 2 /* A line with a comment */ -#define NINETY 90 -#define TWO 2 -#line NINETY TWO /* A #line line with macro expansion */ -#define FUNCTION_LIKE_MACRO(source, line) source line -#line FUNCTION_LIKE_MACRO(180,2) diff --git a/src/glsl/glcpp/tests/091-hash-line.c.expected b/src/glsl/glcpp/tests/091-hash-line.c.expected deleted file mode 100644 index ac9ab252f1e..00000000000 --- a/src/glsl/glcpp/tests/091-hash-line.c.expected +++ /dev/null @@ -1,14 +0,0 @@ -0:0(1): preprocessor error: #error line 0 error -0:25(1): preprocessor error: #error line 25 error -1:0(1): preprocessor error: #error source 1, line 0 error -2:30(1): preprocessor error: #error source 2, line 30 error -#line 0 -#line 25 -#line 0 1 -#line 30 2 -#line 45 2 - - -#line 90 2 - -#line 180 2 diff --git a/src/glsl/glcpp/tests/092-redefine-macro-error-2.c b/src/glsl/glcpp/tests/092-redefine-macro-error-2.c deleted file mode 100644 index 3c161a5c501..00000000000 --- a/src/glsl/glcpp/tests/092-redefine-macro-error-2.c +++ /dev/null @@ -1,5 +0,0 @@ -#define A -#define A 1 - -#define B 1 -#define B diff --git a/src/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected b/src/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected deleted file mode 100644 index 698294d91f2..00000000000 --- a/src/glsl/glcpp/tests/092-redefine-macro-error-2.c.expected +++ /dev/null @@ -1,9 +0,0 @@ -0:2(9): preprocessor error: Redefinition of macro A - -0:5(9): preprocessor error: Redefinition of macro B - - - - - - diff --git a/src/glsl/glcpp/tests/093-divide-by-zero.c b/src/glsl/glcpp/tests/093-divide-by-zero.c deleted file mode 100644 index bf65d4f5271..00000000000 --- a/src/glsl/glcpp/tests/093-divide-by-zero.c +++ /dev/null @@ -1,2 +0,0 @@ -#if (1 / 0) -#endif diff --git a/src/glsl/glcpp/tests/093-divide-by-zero.c.expected b/src/glsl/glcpp/tests/093-divide-by-zero.c.expected deleted file mode 100644 index a858870b794..00000000000 --- a/src/glsl/glcpp/tests/093-divide-by-zero.c.expected +++ /dev/null @@ -1,3 +0,0 @@ -0:1(12): preprocessor error: division by 0 in preprocessor directive - - diff --git a/src/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c b/src/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c deleted file mode 100644 index 04497b17913..00000000000 --- a/src/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c +++ /dev/null @@ -1,13 +0,0 @@ -/* glcpp is generating a division-by-zero error for this case. It's - * easy to argue that it should be short-circuiting the evaluation and - * not generating the diagnostic (which happens to be what gcc does). - * But it doesn't seem like we should force this behavior on our - * pre-processor, (and, as always, the GLSL specification of the - * pre-processor is too vague on this point). - * - * If a short-circuit evaluation optimization does get added to the - * pre-processor then it would legitimate to update the expected file - * for this test. -*/ -#if 1 || (1 / 0) -#endif diff --git a/src/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected b/src/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected deleted file mode 100644 index 570952b2454..00000000000 --- a/src/glsl/glcpp/tests/094-divide-by-zero-short-circuit.c.expected +++ /dev/null @@ -1,14 +0,0 @@ -0:12(17): preprocessor error: division by 0 in preprocessor directive - - - - - - - - - - - - - diff --git a/src/glsl/glcpp/tests/095-recursive-define.c b/src/glsl/glcpp/tests/095-recursive-define.c deleted file mode 100644 index 801d90ce2e3..00000000000 --- a/src/glsl/glcpp/tests/095-recursive-define.c +++ /dev/null @@ -1,3 +0,0 @@ -#define A(a, b) B(a, b) -#define C A(0, C) -C diff --git a/src/glsl/glcpp/tests/095-recursive-define.c.expected b/src/glsl/glcpp/tests/095-recursive-define.c.expected deleted file mode 100644 index 493ab091be7..00000000000 --- a/src/glsl/glcpp/tests/095-recursive-define.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - -B(0, C) diff --git a/src/glsl/glcpp/tests/096-paste-twice.c b/src/glsl/glcpp/tests/096-paste-twice.c deleted file mode 100644 index 8da756fcba4..00000000000 --- a/src/glsl/glcpp/tests/096-paste-twice.c +++ /dev/null @@ -1,3 +0,0 @@ -#define paste_twice(a,b,c) a ## b ## c -paste_twice(just, one, token) - diff --git a/src/glsl/glcpp/tests/096-paste-twice.c.expected b/src/glsl/glcpp/tests/096-paste-twice.c.expected deleted file mode 100644 index 96c57d2a6ec..00000000000 --- a/src/glsl/glcpp/tests/096-paste-twice.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - -justonetoken - diff --git a/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c b/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c deleted file mode 100644 index 0f46835c29e..00000000000 --- a/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c +++ /dev/null @@ -1,3 +0,0 @@ -#define PASTE_MACRO one ## token -PASTE_MACRO - diff --git a/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected b/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected deleted file mode 100644 index 36f66992539..00000000000 --- a/src/glsl/glcpp/tests/097-paste-with-non-function-macro.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - -onetoken - diff --git a/src/glsl/glcpp/tests/098-elif-undefined.c b/src/glsl/glcpp/tests/098-elif-undefined.c deleted file mode 100644 index 1f520d4d432..00000000000 --- a/src/glsl/glcpp/tests/098-elif-undefined.c +++ /dev/null @@ -1,7 +0,0 @@ -#if 0 -Not this -#elif UNDEFINED_MACRO -Nor this -#else -Yes, this. -#endif diff --git a/src/glsl/glcpp/tests/098-elif-undefined.c.expected b/src/glsl/glcpp/tests/098-elif-undefined.c.expected deleted file mode 100644 index c6ef689ce4b..00000000000 --- a/src/glsl/glcpp/tests/098-elif-undefined.c.expected +++ /dev/null @@ -1,7 +0,0 @@ - - - - - -Yes, this. - diff --git a/src/glsl/glcpp/tests/099-c99-example.c b/src/glsl/glcpp/tests/099-c99-example.c deleted file mode 100644 index d1976b1f265..00000000000 --- a/src/glsl/glcpp/tests/099-c99-example.c +++ /dev/null @@ -1,17 +0,0 @@ -#define x 3 -#define f(a) f(x * (a)) -#undef x -#define x 2 -#define g f -#define z z[0] -#define h g(~ -#define m(a) a(w) -#define w 0,1 -#define t(a) a -#define p() int -#define q(x) x -#define r(x,y) x ## y -f(y+1) + f(f(z)) % t(t(g)(0) + t)(1); -g(x +(3,4)-w) | h 5) & m - (f)^m(m); -p() i[q()] = { q(1), r(2,3), r(4,), r(,5), r(,)}; diff --git a/src/glsl/glcpp/tests/099-c99-example.c.expected b/src/glsl/glcpp/tests/099-c99-example.c.expected deleted file mode 100644 index 352bbff48f5..00000000000 --- a/src/glsl/glcpp/tests/099-c99-example.c.expected +++ /dev/null @@ -1,16 +0,0 @@ - - - - - - - - - - - - - -f(2 * (y+1)) + f(2 * (f(2 * (z[0])))) % f(2 * (0)) + t(1); -f(2 * (2 +(3,4)-0,1)) | f(2 * (~ 5)) & f(2 * (0,1))^m(0,1); -int i[] = { 1, 23, 4, 5, }; diff --git a/src/glsl/glcpp/tests/100-macro-with-colon.c b/src/glsl/glcpp/tests/100-macro-with-colon.c deleted file mode 100644 index 31dbb9a9edc..00000000000 --- a/src/glsl/glcpp/tests/100-macro-with-colon.c +++ /dev/null @@ -1,7 +0,0 @@ -#define one 1 -#define two 2 - -switch (1) { - case one + two: - break; -} diff --git a/src/glsl/glcpp/tests/100-macro-with-colon.c.expected b/src/glsl/glcpp/tests/100-macro-with-colon.c.expected deleted file mode 100644 index 09f1f417bdd..00000000000 --- a/src/glsl/glcpp/tests/100-macro-with-colon.c.expected +++ /dev/null @@ -1,7 +0,0 @@ - - - -switch (1) { - case 1 + 2: - break; -} diff --git a/src/glsl/glcpp/tests/101-macros-used-twice.c b/src/glsl/glcpp/tests/101-macros-used-twice.c deleted file mode 100644 index e1693805b6a..00000000000 --- a/src/glsl/glcpp/tests/101-macros-used-twice.c +++ /dev/null @@ -1,16 +0,0 @@ -#define object 1 -#define function(x) 1 - -#if object -once -#endif -#if object -twice -#endif - -#if function(0) -once -#endif -#if function(0) -once again -#endif diff --git a/src/glsl/glcpp/tests/101-macros-used-twice.c.expected b/src/glsl/glcpp/tests/101-macros-used-twice.c.expected deleted file mode 100644 index 1e0b30696cb..00000000000 --- a/src/glsl/glcpp/tests/101-macros-used-twice.c.expected +++ /dev/null @@ -1,16 +0,0 @@ - - - - -once - - -twice - - - -once - - -once again - diff --git a/src/glsl/glcpp/tests/102-garbage-after-endif.c b/src/glsl/glcpp/tests/102-garbage-after-endif.c deleted file mode 100644 index 301779eb948..00000000000 --- a/src/glsl/glcpp/tests/102-garbage-after-endif.c +++ /dev/null @@ -1,2 +0,0 @@ -#if 0 -#endif garbage diff --git a/src/glsl/glcpp/tests/102-garbage-after-endif.c.expected b/src/glsl/glcpp/tests/102-garbage-after-endif.c.expected deleted file mode 100644 index d9f3bdc9465..00000000000 --- a/src/glsl/glcpp/tests/102-garbage-after-endif.c.expected +++ /dev/null @@ -1,2 +0,0 @@ -0:2(8): preprocessor error: syntax error, unexpected IDENTIFIER, expecting NEWLINE - diff --git a/src/glsl/glcpp/tests/103-garbage-after-else-0.c b/src/glsl/glcpp/tests/103-garbage-after-else-0.c deleted file mode 100644 index c460feadf57..00000000000 --- a/src/glsl/glcpp/tests/103-garbage-after-else-0.c +++ /dev/null @@ -1,3 +0,0 @@ -#if 0 -#else garbage -#endif diff --git a/src/glsl/glcpp/tests/103-garbage-after-else-0.c.expected b/src/glsl/glcpp/tests/103-garbage-after-else-0.c.expected deleted file mode 100644 index b053b399775..00000000000 --- a/src/glsl/glcpp/tests/103-garbage-after-else-0.c.expected +++ /dev/null @@ -1,4 +0,0 @@ -0:2(7): preprocessor error: syntax error, unexpected IDENTIFIER, expecting NEWLINE -0:1(6): preprocessor error: Unterminated #if - - diff --git a/src/glsl/glcpp/tests/104-hash-line-followed-by-code.c b/src/glsl/glcpp/tests/104-hash-line-followed-by-code.c deleted file mode 100644 index 3fbeec48e4c..00000000000 --- a/src/glsl/glcpp/tests/104-hash-line-followed-by-code.c +++ /dev/null @@ -1,2 +0,0 @@ -#line 2 -int foo(); diff --git a/src/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected b/src/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected deleted file mode 100644 index 3fbeec48e4c..00000000000 --- a/src/glsl/glcpp/tests/104-hash-line-followed-by-code.c.expected +++ /dev/null @@ -1,2 +0,0 @@ -#line 2 -int foo(); diff --git a/src/glsl/glcpp/tests/105-multiline-hash-line.c b/src/glsl/glcpp/tests/105-multiline-hash-line.c deleted file mode 100644 index da156c6a596..00000000000 --- a/src/glsl/glcpp/tests/105-multiline-hash-line.c +++ /dev/null @@ -1,5 +0,0 @@ -#define X(x) x -#line X( \ - 1 \ - ) -#line 2 diff --git a/src/glsl/glcpp/tests/105-multiline-hash-line.c.expected b/src/glsl/glcpp/tests/105-multiline-hash-line.c.expected deleted file mode 100644 index 814cef1b8c5..00000000000 --- a/src/glsl/glcpp/tests/105-multiline-hash-line.c.expected +++ /dev/null @@ -1,5 +0,0 @@ - -#line 1 - - -#line 2 diff --git a/src/glsl/glcpp/tests/106-multiline-hash-if.c b/src/glsl/glcpp/tests/106-multiline-hash-if.c deleted file mode 100644 index 929e93e782f..00000000000 --- a/src/glsl/glcpp/tests/106-multiline-hash-if.c +++ /dev/null @@ -1,6 +0,0 @@ -#define X(x) x -#if X( \ - 1 \ - ) -int foo(); -#endif diff --git a/src/glsl/glcpp/tests/106-multiline-hash-if.c.expected b/src/glsl/glcpp/tests/106-multiline-hash-if.c.expected deleted file mode 100644 index 1c0cbc970f5..00000000000 --- a/src/glsl/glcpp/tests/106-multiline-hash-if.c.expected +++ /dev/null @@ -1,6 +0,0 @@ - - - - -int foo(); - diff --git a/src/glsl/glcpp/tests/107-multiline-hash-elif.c b/src/glsl/glcpp/tests/107-multiline-hash-elif.c deleted file mode 100644 index 8c1c67a4d6b..00000000000 --- a/src/glsl/glcpp/tests/107-multiline-hash-elif.c +++ /dev/null @@ -1,7 +0,0 @@ -#define X(x) x -#if 0 -#elif X( \ - 1 \ - ) -int foo(); -#endif diff --git a/src/glsl/glcpp/tests/107-multiline-hash-elif.c.expected b/src/glsl/glcpp/tests/107-multiline-hash-elif.c.expected deleted file mode 100644 index b0601d7ee42..00000000000 --- a/src/glsl/glcpp/tests/107-multiline-hash-elif.c.expected +++ /dev/null @@ -1,7 +0,0 @@ - - - - - -int foo(); - diff --git a/src/glsl/glcpp/tests/108-no-space-after-hash-version.c b/src/glsl/glcpp/tests/108-no-space-after-hash-version.c deleted file mode 100644 index 0ce36f2eb1b..00000000000 --- a/src/glsl/glcpp/tests/108-no-space-after-hash-version.c +++ /dev/null @@ -1 +0,0 @@ -#version110 diff --git a/src/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected b/src/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected deleted file mode 100644 index 4f4243f947f..00000000000 --- a/src/glsl/glcpp/tests/108-no-space-after-hash-version.c.expected +++ /dev/null @@ -1 +0,0 @@ -0:1(1): preprocessor error: Illegal non-directive after # diff --git a/src/glsl/glcpp/tests/109-no-space-after-hash-line.c b/src/glsl/glcpp/tests/109-no-space-after-hash-line.c deleted file mode 100644 index f52966a8e80..00000000000 --- a/src/glsl/glcpp/tests/109-no-space-after-hash-line.c +++ /dev/null @@ -1 +0,0 @@ -#line2 diff --git a/src/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected b/src/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected deleted file mode 100644 index 4f4243f947f..00000000000 --- a/src/glsl/glcpp/tests/109-no-space-after-hash-line.c.expected +++ /dev/null @@ -1 +0,0 @@ -0:1(1): preprocessor error: Illegal non-directive after # diff --git a/src/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c b/src/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c deleted file mode 100644 index 6d7d0f38d9a..00000000000 --- a/src/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c +++ /dev/null @@ -1,3 +0,0 @@ -#if 1 -#elif110 -#endif diff --git a/src/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected b/src/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected deleted file mode 100644 index 4d93de41dd3..00000000000 --- a/src/glsl/glcpp/tests/110-no-space-digits-after-hash-elif.c.expected +++ /dev/null @@ -1,3 +0,0 @@ -0:2(1): preprocessor error: Illegal non-directive after # - - diff --git a/src/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c b/src/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c deleted file mode 100644 index b3413371ec2..00000000000 --- a/src/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c +++ /dev/null @@ -1,19 +0,0 @@ -#if(1) -success -#endif - -#if+1 -success -#endif - -#if-1 -success -#endif - -#if!1 -success -#endif - -#if~1 -success -#endif diff --git a/src/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected b/src/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected deleted file mode 100644 index 5c005c393a9..00000000000 --- a/src/glsl/glcpp/tests/111-no-space-operator-after-hash-if.c.expected +++ /dev/null @@ -1,19 +0,0 @@ - -success - - - -success - - - -success - - - - - - - -success - diff --git a/src/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c b/src/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c deleted file mode 100644 index e8221bc49c3..00000000000 --- a/src/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c +++ /dev/null @@ -1,24 +0,0 @@ -#if 0 -#elif(1) -success -#endif - -#if 0 -#elif+1 -success -#endif - -#if 0 -#elif-1 -success -#endif - -#if 0 -#elif!1 -success -#endif - -#if 0 -#elif~1 -success -#endif diff --git a/src/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected b/src/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected deleted file mode 100644 index 86b37036b65..00000000000 --- a/src/glsl/glcpp/tests/112-no-space-operator-after-hash-elif.c.expected +++ /dev/null @@ -1,24 +0,0 @@ - - -success - - - - -success - - - - -success - - - - - - - - - -success - diff --git a/src/glsl/glcpp/tests/113-line-and-file-macros.c b/src/glsl/glcpp/tests/113-line-and-file-macros.c deleted file mode 100644 index 369c4879260..00000000000 --- a/src/glsl/glcpp/tests/113-line-and-file-macros.c +++ /dev/null @@ -1,7 +0,0 @@ -1. Number of dalmations: __LINE__ __FILE__ __LINE__ -2. Nominal visual acuity: __LINE__ __FILE__ / __LINE__ __FILE__ -3. Battle of Thermopylae, as film: __LINE__ __FILE__ __FILE__ -4. HTTP code for "Not Found": __LINE__ __FILE__ __LINE__ -5. Hexadecimal for 20560: __LINE__ __FILE__ __LINE__ __FILE__ -6: Zip code for Nortonville, KS: __LINE__ __LINE__ __FILE__ __LINE__ __FILE__ -7. James Bond, as a number: __FILE__ __FILE__ __LINE__ diff --git a/src/glsl/glcpp/tests/113-line-and-file-macros.c.expected b/src/glsl/glcpp/tests/113-line-and-file-macros.c.expected deleted file mode 100644 index 55bc788ffd7..00000000000 --- a/src/glsl/glcpp/tests/113-line-and-file-macros.c.expected +++ /dev/null @@ -1,7 +0,0 @@ -1. Number of dalmations: 1 0 1 -2. Nominal visual acuity: 2 0 / 2 0 -3. Battle of Thermopylae, as film: 3 0 0 -4. HTTP code for "Not Found": 4 0 4 -5. Hexadecimal for 20560: 5 0 5 0 -6: Zip code for Nortonville, KS: 6 6 0 6 0 -7. James Bond, as a number: 0 0 7 diff --git a/src/glsl/glcpp/tests/114-paste-integer-tokens.c b/src/glsl/glcpp/tests/114-paste-integer-tokens.c deleted file mode 100644 index d80d9c7ef4b..00000000000 --- a/src/glsl/glcpp/tests/114-paste-integer-tokens.c +++ /dev/null @@ -1,7 +0,0 @@ -#define PASTE3(a,b,c) a ## b ## c -#define PASTE4(a,b,c,d) a ## b ## c ## d -#define PASTE5(a,b,c,d,e) a ## b ## c ## d ## e -4. HTTP code for "Not Found": PASTE3(__LINE__, __FILE__ , __LINE__) -5. Hexadecimal for 20560: PASTE4(__LINE__, __FILE__, __LINE__, __FILE__) -6: Zip code for Nortonville, KS: PASTE5(__LINE__, __LINE__, __FILE__, __LINE__, __FILE__) -7. James Bond, as a number: PASTE3(__FILE__, __FILE__, __LINE__) diff --git a/src/glsl/glcpp/tests/114-paste-integer-tokens.c.expected b/src/glsl/glcpp/tests/114-paste-integer-tokens.c.expected deleted file mode 100644 index aa9711034a6..00000000000 --- a/src/glsl/glcpp/tests/114-paste-integer-tokens.c.expected +++ /dev/null @@ -1,7 +0,0 @@ - - - -4. HTTP code for "Not Found": 404 -5. Hexadecimal for 20560: 5050 -6: Zip code for Nortonville, KS: 66060 -7. James Bond, as a number: 007 diff --git a/src/glsl/glcpp/tests/115-line-continuations.c b/src/glsl/glcpp/tests/115-line-continuations.c deleted file mode 100644 index 105590d85ed..00000000000 --- a/src/glsl/glcpp/tests/115-line-continuations.c +++ /dev/null @@ -1,9 +0,0 @@ -// This comment continues to the next line, hiding the define \ -#define CONTINUATION_UNSUPPORTED - -#ifdef CONTINUATION_UNSUPPORTED -failure -#else -success -#endif - diff --git a/src/glsl/glcpp/tests/115-line-continuations.c.expected b/src/glsl/glcpp/tests/115-line-continuations.c.expected deleted file mode 100644 index 428b5e822c5..00000000000 --- a/src/glsl/glcpp/tests/115-line-continuations.c.expected +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - -success - - diff --git a/src/glsl/glcpp/tests/116-disable-line-continuations.c b/src/glsl/glcpp/tests/116-disable-line-continuations.c deleted file mode 100644 index 83d5ddf681b..00000000000 --- a/src/glsl/glcpp/tests/116-disable-line-continuations.c +++ /dev/null @@ -1,13 +0,0 @@ -// glcpp-args: --disable-line-continuations - -// This comments ends with a backslash \\ -#define NO_CONTINUATION - -#ifdef NO_CONTINUATION -success -#else -failure -#endif - - - diff --git a/src/glsl/glcpp/tests/116-disable-line-continuations.c.expected b/src/glsl/glcpp/tests/116-disable-line-continuations.c.expected deleted file mode 100644 index 5ca78928a61..00000000000 --- a/src/glsl/glcpp/tests/116-disable-line-continuations.c.expected +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - -success - - - - - - diff --git a/src/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c b/src/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c deleted file mode 100644 index 6a6f2829883..00000000000 --- a/src/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c +++ /dev/null @@ -1,12 +0,0 @@ -/* This test case is the minimal case to replicate the bug reported here: - * - * https://bugs.freedesktop.org/show_bug.cgi?id=65112 - * - * To trigger the bug, there must be a line-continuation sequence - * (backslash newline), then an additional newline character, and - * finally another backslash that is not part of a line-continuation - * sequence. - */ -\ - -/* \ */ diff --git a/src/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected b/src/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected deleted file mode 100644 index 8aaa04d28e4..00000000000 --- a/src/glsl/glcpp/tests/117-line-continuation-and-non-continuation-backslash.c.expected +++ /dev/null @@ -1,12 +0,0 @@ - - - - - - - - - - - - diff --git a/src/glsl/glcpp/tests/118-comment-becomes-space.c b/src/glsl/glcpp/tests/118-comment-becomes-space.c deleted file mode 100644 index 53e80394ab6..00000000000 --- a/src/glsl/glcpp/tests/118-comment-becomes-space.c +++ /dev/null @@ -1,4 +0,0 @@ -#define FOO first/* -*/second - -FOO diff --git a/src/glsl/glcpp/tests/118-comment-becomes-space.c.expected b/src/glsl/glcpp/tests/118-comment-becomes-space.c.expected deleted file mode 100644 index 1fa8135cb13..00000000000 --- a/src/glsl/glcpp/tests/118-comment-becomes-space.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - - - -first second diff --git a/src/glsl/glcpp/tests/119-elif-after-else.c b/src/glsl/glcpp/tests/119-elif-after-else.c deleted file mode 100644 index 9b9e9233bcb..00000000000 --- a/src/glsl/glcpp/tests/119-elif-after-else.c +++ /dev/null @@ -1,6 +0,0 @@ -#if 0 -#else -int foo; -#elif 0 -int bar; -#endif diff --git a/src/glsl/glcpp/tests/119-elif-after-else.c.expected b/src/glsl/glcpp/tests/119-elif-after-else.c.expected deleted file mode 100644 index 636956799cd..00000000000 --- a/src/glsl/glcpp/tests/119-elif-after-else.c.expected +++ /dev/null @@ -1,7 +0,0 @@ -0:4(1): preprocessor error: #elif after #else - - -int foo; - -int bar; - diff --git a/src/glsl/glcpp/tests/120-undef-builtin.c b/src/glsl/glcpp/tests/120-undef-builtin.c deleted file mode 100644 index 49e7696613e..00000000000 --- a/src/glsl/glcpp/tests/120-undef-builtin.c +++ /dev/null @@ -1,3 +0,0 @@ -#undef __LINE__ -#undef __FILE__ -#undef __VERSION__ diff --git a/src/glsl/glcpp/tests/120-undef-builtin.c.expected b/src/glsl/glcpp/tests/120-undef-builtin.c.expected deleted file mode 100644 index 3b736df378e..00000000000 --- a/src/glsl/glcpp/tests/120-undef-builtin.c.expected +++ /dev/null @@ -1,6 +0,0 @@ -0:1(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. -0:2(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. -0:3(1): preprocessor error: Built-in (pre-defined) macro names cannot be undefined. - - - diff --git a/src/glsl/glcpp/tests/121-comment-bug-72686.c b/src/glsl/glcpp/tests/121-comment-bug-72686.c deleted file mode 100644 index 67ebe73e5cf..00000000000 --- a/src/glsl/glcpp/tests/121-comment-bug-72686.c +++ /dev/null @@ -1,2 +0,0 @@ -/* - */ // diff --git a/src/glsl/glcpp/tests/121-comment-bug-72686.c.expected b/src/glsl/glcpp/tests/121-comment-bug-72686.c.expected deleted file mode 100644 index 8cb7cb9891f..00000000000 --- a/src/glsl/glcpp/tests/121-comment-bug-72686.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - - diff --git a/src/glsl/glcpp/tests/122-redefine-whitespace.c b/src/glsl/glcpp/tests/122-redefine-whitespace.c deleted file mode 100644 index ae7ea09f67e..00000000000 --- a/src/glsl/glcpp/tests/122-redefine-whitespace.c +++ /dev/null @@ -1,16 +0,0 @@ -/* Original definitions. */ -#define TWO ( 1+1 ) -#define FOUR (2 + 2) -#define SIX (3 + 3) - -/* Redefinitions with whitespace in same places, but different amounts, (so no - * error). */ -#define TWO ( 1+1 ) -#define FOUR (2 + 2) -#define SIX (3/*comment is whitespace*/+ /* collapsed */ /* to */ /* one */ /* space */ 3) - -/* Redefinitions with whitespace in different places. Each of these should - * trigger an error. */ -#define TWO (1 + 1) -#define FOUR ( 2+2 ) -#define SIX (/*not*/3 + 3/*expected*/) diff --git a/src/glsl/glcpp/tests/122-redefine-whitespace.c.expected b/src/glsl/glcpp/tests/122-redefine-whitespace.c.expected deleted file mode 100644 index 602bdef94c2..00000000000 --- a/src/glsl/glcpp/tests/122-redefine-whitespace.c.expected +++ /dev/null @@ -1,22 +0,0 @@ -0:14(9): preprocessor error: Redefinition of macro TWO - -0:15(9): preprocessor error: Redefinition of macro FOUR - -0:16(9): preprocessor error: Redefinition of macro SIX - - - - - - - - - - - - - - - - - diff --git a/src/glsl/glcpp/tests/123-garbage-after-else-1.c b/src/glsl/glcpp/tests/123-garbage-after-else-1.c deleted file mode 100644 index 0b341a381f8..00000000000 --- a/src/glsl/glcpp/tests/123-garbage-after-else-1.c +++ /dev/null @@ -1,3 +0,0 @@ -#if 1 -#else garbage -#endif diff --git a/src/glsl/glcpp/tests/123-garbage-after-else-1.c.expected b/src/glsl/glcpp/tests/123-garbage-after-else-1.c.expected deleted file mode 100644 index b053b399775..00000000000 --- a/src/glsl/glcpp/tests/123-garbage-after-else-1.c.expected +++ /dev/null @@ -1,4 +0,0 @@ -0:2(7): preprocessor error: syntax error, unexpected IDENTIFIER, expecting NEWLINE -0:1(6): preprocessor error: Unterminated #if - - diff --git a/src/glsl/glcpp/tests/124-preprocessing-numbers.c b/src/glsl/glcpp/tests/124-preprocessing-numbers.c deleted file mode 100644 index 947ba1885ec..00000000000 --- a/src/glsl/glcpp/tests/124-preprocessing-numbers.c +++ /dev/null @@ -1,37 +0,0 @@ -#define e THIS_SHOULD_NOT_BE_EXPANDED -#define E NOR_THIS -#define p NOT_THIS_EITHER -#define P AND_SURELY_NOT_THIS -#define OK CRAZY_BUT_TRUE_THIS_NEITHER - -/* This one is actually meant to be expanded */ -#define MUST_EXPAND GO - -/* The following are "preprocessing numbers" and should not trigger macro - * expansion. */ -1e -1OK - -/* These are also "preprocessing numbers", so no expansion */ -123e+OK -.23E+OK -1.3e-OK -12.E-OK -123p+OK -.23P+OK -1.3p-OK -12.P-OK -123..OK -.23.OK.OK - -/* Importantly, just before the MUST_EXPAND in each of these, the preceding - * "preprocessing number" ends and we have an actual expression. So the - * MUST_EXPAND macro must be expanded (who would have though?) in each case. */ -123ef+MUST_EXPAND -.23E3-MUST_EXPAND -1.3e--MUST_EXPAND -12.E-&MUST_EXPAND -123p+OK+MUST_EXPAND -.23P+OK;MUST_EXPAND -1.3p-OK-MUST_EXPAND -12.P-OK&MUST_EXPAND diff --git a/src/glsl/glcpp/tests/124-preprocessing-numbers.c.expected b/src/glsl/glcpp/tests/124-preprocessing-numbers.c.expected deleted file mode 100644 index 6ec588862d6..00000000000 --- a/src/glsl/glcpp/tests/124-preprocessing-numbers.c.expected +++ /dev/null @@ -1,37 +0,0 @@ - - - - - - - - - - - -1e -1OK - - -123e+OK -.23E+OK -1.3e-OK -12.E-OK -123p+OK -.23P+OK -1.3p-OK -12.P-OK -123..OK -.23.OK.OK - - - - -123ef+GO -.23E3-GO -1.3e--GO -12.E-&GO -123p+OK+GO -.23P+OK;GO -1.3p-OK-GO -12.P-OK&GO diff --git a/src/glsl/glcpp/tests/125-es-short-circuit-undefined.c b/src/glsl/glcpp/tests/125-es-short-circuit-undefined.c deleted file mode 100644 index 4ee29f6d93f..00000000000 --- a/src/glsl/glcpp/tests/125-es-short-circuit-undefined.c +++ /dev/null @@ -1,27 +0,0 @@ -/* For GLSL in OpenGL ES, an undefined macro appearing in an #if or #elif - * expression, (other than as an argument to defined) is an error. - * - * Except in the case of a short-circuiting && or || operator, where the - * specification explicitly mandates that there be no error. - */ -#version 300 es - -/* These yield errors */ -#if NOT_DEFINED -#endif - -#if 0 -#elif ALSO_NOT_DEFINED -#endif - -/* But these yield no errors */ -#if 1 || STILL_NOT_DEFINED -Success -#endif - -#if 0 -#elif 0 && WILL_ANYONE_DEFINE_ANYTHING -#else -More success -#endif - diff --git a/src/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected b/src/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected deleted file mode 100644 index 616aa912e2f..00000000000 --- a/src/glsl/glcpp/tests/125-es-short-circuit-undefined.c.expected +++ /dev/null @@ -1,29 +0,0 @@ -0:10(16): preprocessor error: undefined macro NOT_DEFINED in expression (illegal in GLES) -0:14(23): preprocessor error: undefined macro ALSO_NOT_DEFINED in expression (illegal in GLES) - - - - - - -#version 300 es - - - - - - - - - - - -Success - - - - - -More success - - diff --git a/src/glsl/glcpp/tests/126-garbage-after-directive.c b/src/glsl/glcpp/tests/126-garbage-after-directive.c deleted file mode 100644 index 4c0d29000a3..00000000000 --- a/src/glsl/glcpp/tests/126-garbage-after-directive.c +++ /dev/null @@ -1,5 +0,0 @@ -#ifdef MACRO garbage -#endif - -#ifndef MORE garbage -#endif diff --git a/src/glsl/glcpp/tests/126-garbage-after-directive.c.expected b/src/glsl/glcpp/tests/126-garbage-after-directive.c.expected deleted file mode 100644 index 82a06f8a3b3..00000000000 --- a/src/glsl/glcpp/tests/126-garbage-after-directive.c.expected +++ /dev/null @@ -1,7 +0,0 @@ -0:1(14): preprocessor error: extra tokens at end of directive -0:4(14): preprocessor error: extra tokens at end of directive - - - - - diff --git a/src/glsl/glcpp/tests/127-pragma-empty.c b/src/glsl/glcpp/tests/127-pragma-empty.c deleted file mode 100644 index 0f9b0b3d38f..00000000000 --- a/src/glsl/glcpp/tests/127-pragma-empty.c +++ /dev/null @@ -1,3 +0,0 @@ -/* It seems an odd (and particularly useless) thing to have an empty pragma, - * but we probably shouldn't trigger an error in this case. */ -#pragma diff --git a/src/glsl/glcpp/tests/127-pragma-empty.c.expected b/src/glsl/glcpp/tests/127-pragma-empty.c.expected deleted file mode 100644 index 92371a07c32..00000000000 --- a/src/glsl/glcpp/tests/127-pragma-empty.c.expected +++ /dev/null @@ -1,3 +0,0 @@ - - - diff --git a/src/glsl/glcpp/tests/128-space-before-hash.c b/src/glsl/glcpp/tests/128-space-before-hash.c deleted file mode 100644 index fba9596baf8..00000000000 --- a/src/glsl/glcpp/tests/128-space-before-hash.c +++ /dev/null @@ -1,21 +0,0 @@ - /* Any directive can be preceded by a space. */ - #version 300 - #pragma Testing spaces before hash - # - #line 3 - #define FOO - #ifdef FOO - yes - #endif - #if 0 - #elif defined FOO - yes again - #endif - #if 0 - #else - for the third time, yes! - #endif - #undef FOO - #ifndef FOO - yes, of course - #endif diff --git a/src/glsl/glcpp/tests/128-space-before-hash.c.expected b/src/glsl/glcpp/tests/128-space-before-hash.c.expected deleted file mode 100644 index 9babb6fb078..00000000000 --- a/src/glsl/glcpp/tests/128-space-before-hash.c.expected +++ /dev/null @@ -1,21 +0,0 @@ - -#version 300 -#pragma Testing spaces before hash - -#line 3 - - - yes - - - - yes again - - - - for the third time, yes! - - - - yes, of course - diff --git a/src/glsl/glcpp/tests/129-define-non-identifier.c b/src/glsl/glcpp/tests/129-define-non-identifier.c deleted file mode 100644 index a229179f18c..00000000000 --- a/src/glsl/glcpp/tests/129-define-non-identifier.c +++ /dev/null @@ -1 +0,0 @@ -#define 123 456 diff --git a/src/glsl/glcpp/tests/129-define-non-identifier.c.expected b/src/glsl/glcpp/tests/129-define-non-identifier.c.expected deleted file mode 100644 index fd0b41347fa..00000000000 --- a/src/glsl/glcpp/tests/129-define-non-identifier.c.expected +++ /dev/null @@ -1,2 +0,0 @@ -0:1(9): preprocessor error: #define followed by a non-identifier: 123 -0:1(9): preprocessor error: syntax error, unexpected INTEGER_STRING, expecting FUNC_IDENTIFIER or OBJ_IDENTIFIER diff --git a/src/glsl/glcpp/tests/130-define-comment.c b/src/glsl/glcpp/tests/130-define-comment.c deleted file mode 100644 index 33312362cc7..00000000000 --- a/src/glsl/glcpp/tests/130-define-comment.c +++ /dev/null @@ -1,2 +0,0 @@ -#define /*...*/ FUNC( /*...*/ x /*...*/ ) /*...*/ FOO( /*...*/ x /*...*/ ) -FUNC(bar) diff --git a/src/glsl/glcpp/tests/130-define-comment.c.expected b/src/glsl/glcpp/tests/130-define-comment.c.expected deleted file mode 100644 index d789e29d5a8..00000000000 --- a/src/glsl/glcpp/tests/130-define-comment.c.expected +++ /dev/null @@ -1,2 +0,0 @@ - -FOO( bar ) diff --git a/src/glsl/glcpp/tests/131-eof-without-newline.c b/src/glsl/glcpp/tests/131-eof-without-newline.c deleted file mode 100644 index 240292dad01..00000000000 --- a/src/glsl/glcpp/tests/131-eof-without-newline.c +++ /dev/null @@ -1 +0,0 @@ -this file ends with no newline \ No newline at end of file diff --git a/src/glsl/glcpp/tests/131-eof-without-newline.c.expected b/src/glsl/glcpp/tests/131-eof-without-newline.c.expected deleted file mode 100644 index 57800306322..00000000000 --- a/src/glsl/glcpp/tests/131-eof-without-newline.c.expected +++ /dev/null @@ -1 +0,0 @@ -this file ends with no newline diff --git a/src/glsl/glcpp/tests/132-eof-without-newline-define.c b/src/glsl/glcpp/tests/132-eof-without-newline-define.c deleted file mode 100644 index 6795e35ea00..00000000000 --- a/src/glsl/glcpp/tests/132-eof-without-newline-define.c +++ /dev/null @@ -1 +0,0 @@ -#define \ No newline at end of file diff --git a/src/glsl/glcpp/tests/132-eof-without-newline-define.c.expected b/src/glsl/glcpp/tests/132-eof-without-newline-define.c.expected deleted file mode 100644 index 341e5e2aa67..00000000000 --- a/src/glsl/glcpp/tests/132-eof-without-newline-define.c.expected +++ /dev/null @@ -1 +0,0 @@ -0:1(1): preprocessor error: #define without macro name diff --git a/src/glsl/glcpp/tests/133-eof-without-newline-comment.c b/src/glsl/glcpp/tests/133-eof-without-newline-comment.c deleted file mode 100644 index 56ec5f722c0..00000000000 --- a/src/glsl/glcpp/tests/133-eof-without-newline-comment.c +++ /dev/null @@ -1 +0,0 @@ -This file ends with no newline within a comment /* \ No newline at end of file diff --git a/src/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected b/src/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected deleted file mode 100644 index d186f48761f..00000000000 --- a/src/glsl/glcpp/tests/133-eof-without-newline-comment.c.expected +++ /dev/null @@ -1,2 +0,0 @@ -0:1(51): preprocessor error: Unterminated comment -This file ends with no newline within a comment diff --git a/src/glsl/glcpp/tests/134-hash-comment-directive.c b/src/glsl/glcpp/tests/134-hash-comment-directive.c deleted file mode 100644 index 3015f0e886e..00000000000 --- a/src/glsl/glcpp/tests/134-hash-comment-directive.c +++ /dev/null @@ -1,22 +0,0 @@ -/*...*/ # /*...*/ version 300 - /*...*/#/*...*/ extension whatever - /*..*/ # /*..*/ pragma ignored -/**/ # /**/ line 4 - /*...*/# /*...*/ ifdef NOT_DEFINED - /*...*/# /*...*/ else - /*..*/ #/*..*/ endif - /*...*/# /*...*/ ifndef ALSO_NOT_DEFINED - /*...*/# /*...*/ else - /*..*/ #/*..*/ endif -/*...*/ # /*...*/ if 0 - /*...*/#/*...*/ elif 1 - /*..*/ # /*..*/ else - /**/ # /**/ endif - /*...*/# /*...*/ define FOO bar - /*..*/ #/*..*/ define FUNC() baz - /*..*/ # /*..*/ define FUNC2(a,b) b a -FOO -FUNC() -FUNC2(x,y) - - diff --git a/src/glsl/glcpp/tests/134-hash-comment-directive.c.expected b/src/glsl/glcpp/tests/134-hash-comment-directive.c.expected deleted file mode 100644 index 760c960cb62..00000000000 --- a/src/glsl/glcpp/tests/134-hash-comment-directive.c.expected +++ /dev/null @@ -1,22 +0,0 @@ -#version 300 -#extension whatever -#pragma ignored -#line 4 - - - - - - - - - - - - - -bar -baz -y x - - diff --git a/src/glsl/glcpp/tests/135-duplicate-parameter.c b/src/glsl/glcpp/tests/135-duplicate-parameter.c deleted file mode 100644 index fd96bd64c74..00000000000 --- a/src/glsl/glcpp/tests/135-duplicate-parameter.c +++ /dev/null @@ -1,2 +0,0 @@ -#define FOO(a,a) which a? -#define BAR(x,y,z,x) so very x diff --git a/src/glsl/glcpp/tests/135-duplicate-parameter.c.expected b/src/glsl/glcpp/tests/135-duplicate-parameter.c.expected deleted file mode 100644 index bc1a334ed29..00000000000 --- a/src/glsl/glcpp/tests/135-duplicate-parameter.c.expected +++ /dev/null @@ -1,4 +0,0 @@ -0:1(9): preprocessor error: Duplicate macro parameter "a" -0:2(9): preprocessor error: Duplicate macro parameter "x" - - diff --git a/src/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c b/src/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c deleted file mode 100644 index 167d3c8a3cf..00000000000 --- a/src/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c +++ /dev/null @@ -1,8 +0,0 @@ -/* The body can include C expressions with ++ and -- */ -a = x++; -b = ++x; -c = x--; -d = --x; -/* But these are not legal in preprocessor expressions. */ -#if x++ > 4 -#endif diff --git a/src/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected b/src/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected deleted file mode 100644 index 137921b1695..00000000000 --- a/src/glsl/glcpp/tests/136-plus-plus-and-minus-minus.c.expected +++ /dev/null @@ -1,8 +0,0 @@ -0:7(12): preprocessor error: syntax error, unexpected PLUS_PLUS - -a = x++; -b = ++x; -c = x--; -d = --x; - - diff --git a/src/glsl/glcpp/tests/137-expand-macro-after-period.c b/src/glsl/glcpp/tests/137-expand-macro-after-period.c deleted file mode 100644 index c8cd47fb57e..00000000000 --- a/src/glsl/glcpp/tests/137-expand-macro-after-period.c +++ /dev/null @@ -1,4 +0,0 @@ -#define FIELD(x) foo.x -#define FIELD_OF(s, x) s.x -FIELD(bar) -FIELD_OF(foo, bar) diff --git a/src/glsl/glcpp/tests/137-expand-macro-after-period.c.expected b/src/glsl/glcpp/tests/137-expand-macro-after-period.c.expected deleted file mode 100644 index f9f5be13e01..00000000000 --- a/src/glsl/glcpp/tests/137-expand-macro-after-period.c.expected +++ /dev/null @@ -1,4 +0,0 @@ - - -foo.bar -foo.bar diff --git a/src/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c b/src/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c deleted file mode 100644 index 38967dc57d1..00000000000 --- a/src/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c +++ /dev/null @@ -1,7 +0,0 @@ -#if 0 -/* - * This multi-line comment needs to be 3 lines to test what's intended. - */ -#else -SUCCESS -#endif diff --git a/src/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected b/src/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected deleted file mode 100644 index 0d6ef4d9ad4..00000000000 --- a/src/glsl/glcpp/tests/138-multi-line-comment-in-if-0.c.expected +++ /dev/null @@ -1,7 +0,0 @@ - - - - - -SUCCESS - diff --git a/src/glsl/glcpp/tests/139-define-without-macro-name.c b/src/glsl/glcpp/tests/139-define-without-macro-name.c deleted file mode 100644 index 30e128db4c5..00000000000 --- a/src/glsl/glcpp/tests/139-define-without-macro-name.c +++ /dev/null @@ -1,5 +0,0 @@ -#define -#define -#define /*...*/ -#define //... -Errors expected because no macro name is ever given! diff --git a/src/glsl/glcpp/tests/139-define-without-macro-name.c.expected b/src/glsl/glcpp/tests/139-define-without-macro-name.c.expected deleted file mode 100644 index 42b02d1a8a1..00000000000 --- a/src/glsl/glcpp/tests/139-define-without-macro-name.c.expected +++ /dev/null @@ -1,5 +0,0 @@ -0:1(1): preprocessor error: #define without macro name -0:2(1): preprocessor error: #define without macro name -0:3(1): preprocessor error: #define without macro name -0:4(1): preprocessor error: #define without macro name -Errors expected because no macro name is ever given! diff --git a/src/glsl/glcpp/tests/140-null-directive.c b/src/glsl/glcpp/tests/140-null-directive.c deleted file mode 100644 index 1dcb26ef8b9..00000000000 --- a/src/glsl/glcpp/tests/140-null-directive.c +++ /dev/null @@ -1,9 +0,0 @@ -/* GLSL accepts a null directive. Let's test that in several variations: */ -# - # -/*....*/#/*....*/ - /*..*/ # /*..*/ -#//... - # //... -/*....*/#/**///.. - /*..*/ # /**/ // diff --git a/src/glsl/glcpp/tests/140-null-directive.c.expected b/src/glsl/glcpp/tests/140-null-directive.c.expected deleted file mode 100644 index fa103f60e82..00000000000 --- a/src/glsl/glcpp/tests/140-null-directive.c.expected +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - diff --git a/src/glsl/glcpp/tests/141-pragma-and-__LINE__.c b/src/glsl/glcpp/tests/141-pragma-and-__LINE__.c deleted file mode 100644 index a93f3ce35fd..00000000000 --- a/src/glsl/glcpp/tests/141-pragma-and-__LINE__.c +++ /dev/null @@ -1,6 +0,0 @@ -Line 1 /* Test for a bug where #pragma was throwing off the __LINE__ count. */ -Line __LINE__ /* Line 2 */ -#pragma Line 3 -Line __LINE__ /* Line 4 */ -#pragma Line 5 -Line __LINE__ /* Line 6 */ diff --git a/src/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected b/src/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected deleted file mode 100644 index 330731dc800..00000000000 --- a/src/glsl/glcpp/tests/141-pragma-and-__LINE__.c.expected +++ /dev/null @@ -1,6 +0,0 @@ -Line 1 -Line 2 -#pragma Line 3 -Line 4 -#pragma Line 5 -Line 6 diff --git a/src/glsl/glcpp/tests/142-defined-within-macro.c b/src/glsl/glcpp/tests/142-defined-within-macro.c deleted file mode 100644 index b60c04232a6..00000000000 --- a/src/glsl/glcpp/tests/142-defined-within-macro.c +++ /dev/null @@ -1,94 +0,0 @@ -/* Macro using defined with a hard-coded identifier (no parentheses) */ -#define is_foo_defined defined /*...*/ foo -#undef foo -#if is_foo_defined -failure -#else -success -#endif -#define foo -#if is_foo_defined -success -#else -failure -#endif - -/* Macro using defined with a hard-coded identifier within parentheses */ -#define is_foo_defined_parens defined /*...*/ ( /*...*/ foo /*...*/ ) // -#define foo -#if is_foo_defined_parens -success -#else -failure -#endif -#undef foo -#if is_foo_defined_parens -failure -#else -success -#endif - -/* Macro using defined with an argument identifier (no parentheses) */ -#define is_defined(arg) defined /*...*/ arg -#define foo bar -#undef bar -#if is_defined(foo) -failure -#else -success -#endif -#define bar bar -#if is_defined(foo) -success -#else -failure -#endif - -/* Macro using defined with an argument identifier within parentheses */ -#define is_defined_parens(arg) defined /*...*/ ( /*...*/ arg /*...*/ ) // -#define foo bar -#define bar bar -#if is_defined_parens(foo) -success -#else -failure -#endif -#undef bar -#if is_defined_parens(foo) -failure -#else -success -#endif - -/* Multiple levels of macro resulting in defined */ -#define X defined A && Y -#define Y defined B && Z -#define Z defined C -#define A -#define B -#define C -#if X -success -#else -failure -#endif -#undef A -#if X -failure -#else -success -#endif -#define A -#undef B -#if X -failure -#else -success -#endif -#define B -#undef C -#if X -failure -#else -success -#endif diff --git a/src/glsl/glcpp/tests/142-defined-within-macro.c.expected b/src/glsl/glcpp/tests/142-defined-within-macro.c.expected deleted file mode 100644 index 4eca90bc3db..00000000000 --- a/src/glsl/glcpp/tests/142-defined-within-macro.c.expected +++ /dev/null @@ -1,94 +0,0 @@ - - - - - - -success - - - -success - - - - - - - - -success - - - - - - - -success - - - - - - - - - -success - - - -success - - - - - - - - - -success - - - - - - - -success - - - - - - - - - - -success - - - - - - - -success - - - - - - -success - - - - - - -success - diff --git a/src/glsl/glcpp/tests/143-multiple-else.c b/src/glsl/glcpp/tests/143-multiple-else.c deleted file mode 100644 index 62ad49cf7bb..00000000000 --- a/src/glsl/glcpp/tests/143-multiple-else.c +++ /dev/null @@ -1,6 +0,0 @@ -#if 0 -#else -int foo; -#else -int bar; -#endif diff --git a/src/glsl/glcpp/tests/143-multiple-else.c.expected b/src/glsl/glcpp/tests/143-multiple-else.c.expected deleted file mode 100644 index 00b3328c835..00000000000 --- a/src/glsl/glcpp/tests/143-multiple-else.c.expected +++ /dev/null @@ -1,7 +0,0 @@ -0:4(1): preprocessor error: multiple #else - - -int foo; - -int bar; - diff --git a/src/glsl/glcpp/tests/glcpp-test b/src/glsl/glcpp/tests/glcpp-test deleted file mode 100755 index 825c22e71fb..00000000000 --- a/src/glsl/glcpp/tests/glcpp-test +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/sh - -if [ ! -z "$srcdir" ]; then - testdir=$srcdir/glcpp/tests - outdir=`pwd`/glcpp/tests - glcpp=`pwd`/glcpp/glcpp -else - testdir=. - outdir=. - glcpp=../glcpp -fi - -trap 'rm $test.valgrind-errors; exit 1' INT QUIT - -usage () -{ - cat < Use tests in the given (default is ".") - --valgrind Run the test suite a second time under valgrind -EOF -} - -test_specific_args () -{ - test="$1" - - tr "\r" "\n" < "$test" | grep 'glcpp-args:' | sed -e 's,^.*glcpp-args: *,,' -} - -# Parse command-line options -for option; do - case "${option}" in - "--help") - usage - exit 0 - ;; - "--valgrind") - do_valgrind=yes - ;; - "--testdir="*) - testdir="${option#--testdir=}" - outdir="${outdir}/${option#--testdir=}" - ;; - *) - echo "Unrecognized option: $option" >&2 - echo >&2 - usage - exit 1 - ;; - esac -done - -total=0 -pass=0 -clean=0 - -mkdir -p $outdir - -echo "====== Testing for correctness ======" -for test in $testdir/*.c; do - out=$outdir/${test##*/}.out - - printf "Testing $test... > $out ($test.expected) " - $glcpp $(test_specific_args $test) < $test > $out 2>&1 - total=$((total+1)) - if cmp $test.expected $out >/dev/null 2>&1; then - echo "PASS" - pass=$((pass+1)) - else - echo "FAIL" - diff -u $test.expected $out - fi -done - -echo "" -echo "$pass/$total tests returned correct results" -echo "" - -if [ "$do_valgrind" = "yes" ]; then - echo "====== Testing for valgrind cleanliness ======" - for test in $testdir/*.c; do - printf "Testing $test with valgrind..." - valgrind --error-exitcode=31 --log-file=$test.valgrind-errors $glcpp $(test_specific_args $test) < $test >/dev/null 2>&1 - if [ "$?" = "31" ]; then - echo "ERRORS" - cat $test.valgrind-errors - else - echo "CLEAN" - clean=$((clean+1)) - rm $test.valgrind-errors - fi - done - - echo "" - echo "$pass/$total tests returned correct results" - echo "$clean/$total tests are valgrind-clean" -fi - -if [ "$pass" = "$total" ] && [ "$do_valgrind" != "yes" ] || [ "$pass" = "$total" ]; then - exit 0 -else - exit 1 -fi - diff --git a/src/glsl/glcpp/tests/glcpp-test-cr-lf b/src/glsl/glcpp/tests/glcpp-test-cr-lf deleted file mode 100755 index c4ed0b86d74..00000000000 --- a/src/glsl/glcpp/tests/glcpp-test-cr-lf +++ /dev/null @@ -1,141 +0,0 @@ -#!/bin/sh - -# The build system runs this test from a different working directory, and may -# be in a build directory entirely separate from the source. So if the -# "srcdir" variable is set, we must use it to locate the test files and the -# glcpp-test script. - -if [ ! -z "$srcdir" ]; then - testdir="$srcdir/glcpp/tests" - glcpp_test="$srcdir/glcpp/tests/glcpp-test" -else - testdir=. - glcpp_test=./glcpp-test -fi - -total=0 -pass=0 - -# This supports a pipe that doesn't destroy the exit status of first command -# -# http://unix.stackexchange.com/questions/14270/get-exit-status-of-process-thats-piped-to-another -stdintoexitstatus() { - read exitstatus - return $exitstatus -} - -run_test () -{ - cmd="$1" - - total=$((total+1)) - - if [ "$VERBOSE" = "yes" ]; then - if $cmd; then - echo "PASS" - pass=$((pass+1)) - else - echo "FAIL" - fi - else - # This is "$cmd | tail -2" but with the exit status of "$cmd" not "tail -2" - if (((($cmd; echo $? >&3) | tail -2 | head -1 >&4) 3>&1) | stdintoexitstatus) 4>&1; then - echo "PASS" - pass=$((pass+1)) - else - echo "FAIL" - fi - fi -} - -usage () -{ - cat <&2 - echo >&2 - usage - exit 1 - ;; - esac -done - -# All tests depend on the .out files being present. So first do a -# normal run of the test suite, (silently) just to create the .out -# files as a side effect. -rm -rf ./subtest-lf -mkdir subtest-lf -for file in "$testdir"/*.c; do - base=$(basename "$file") - cp "$file" subtest-lf -done - -${glcpp_test} --testdir=subtest-lf >/dev/null 2>&1 - -echo "===== Testing with \\\\r line terminators (old Mac format) =====" - -# Prepare test files with '\r' instead of '\n' -rm -rf ./subtest-cr -mkdir subtest-cr -for file in "$testdir"/*.c; do - base=$(basename "$file") - tr "\n" "\r" < "$file" > subtest-cr/"$base" - cp `pwd`/glcpp/tests/subtest-lf/"$base".out subtest-cr/"$base".expected -done - -run_test "${glcpp_test} --testdir=subtest-cr" - -echo "===== Testing with \\\\r\\\\n line terminators (DOS format) =====" - -# Prepare test files with '\r\n' instead of '\n' -rm -rf ./subtest-cr-lf -mkdir subtest-cr-lf -for file in "$testdir"/*.c; do - base=$(basename "$file") - sed -e 's/$/ /' < "$file" > subtest-cr-lf/"$base" - cp `pwd`/glcpp/tests/subtest-lf/"$base".out subtest-cr-lf/"$base".expected -done - -run_test "${glcpp_test} --testdir=subtest-cr-lf" - -echo "===== Testing with \\\\n\\\\r (bizarre, but allowed by GLSL spec.) =====" - -# Prepare test files with '\n\r' instead of '\n' -rm -rf ./subtest-lf-cr -mkdir subtest-lf-cr -for file in "$testdir"/*.c; do - base=$(basename "$file") - sed -e 's/$/ /' < "$file" | tr "\n\r" "\r\n" > subtest-lf-cr/"$base" - cp `pwd`/glcpp/tests/subtest-lf/"$base".out subtest-lf-cr/"$base".expected -done - -run_test "${glcpp_test} --testdir=subtest-lf-cr" - -echo "" -echo "$pass/$total tests returned correct results" -echo "" - -if [ "$pass" = "$total" ]; then - exit 0 -else - exit 1 -fi diff --git a/src/glsl/glsl_lexer.ll b/src/glsl/glsl_lexer.ll deleted file mode 100644 index e59f93e10ef..00000000000 --- a/src/glsl/glsl_lexer.ll +++ /dev/null @@ -1,635 +0,0 @@ -%{ -/* - * Copyright © 2008, 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include "util/strtod.h" -#include "ast.h" -#include "glsl_parser_extras.h" -#include "glsl_parser.h" - -static int classify_identifier(struct _mesa_glsl_parse_state *, const char *); - -#ifdef _MSC_VER -#define YY_NO_UNISTD_H -#endif - -#define YY_USER_ACTION \ - do { \ - yylloc->first_column = yycolumn + 1; \ - yylloc->first_line = yylloc->last_line = yylineno + 1; \ - yycolumn += yyleng; \ - yylloc->last_column = yycolumn + 1; \ - } while(0); - -#define YY_USER_INIT yylineno = 0; yycolumn = 0; yylloc->source = 0; - -/* A macro for handling reserved words and keywords across language versions. - * - * Certain words start out as identifiers, become reserved words in - * later language revisions, and finally become language keywords. - * This may happen at different times in desktop GLSL and GLSL ES. - * - * For example, consider the following lexer rule: - * samplerBuffer KEYWORD(130, 0, 140, 0, SAMPLERBUFFER) - * - * This means that "samplerBuffer" will be treated as: - * - a keyword (SAMPLERBUFFER token) ...in GLSL >= 1.40 - * - a reserved word - error ...in GLSL >= 1.30 - * - an identifier ...in GLSL < 1.30 or GLSL ES - */ -#define KEYWORD(reserved_glsl, reserved_glsl_es, \ - allowed_glsl, allowed_glsl_es, token) \ - KEYWORD_WITH_ALT(reserved_glsl, reserved_glsl_es, \ - allowed_glsl, allowed_glsl_es, false, token) - -/** - * Like the KEYWORD macro, but the word is also treated as a keyword - * if the given boolean expression is true. - */ -#define KEYWORD_WITH_ALT(reserved_glsl, reserved_glsl_es, \ - allowed_glsl, allowed_glsl_es, \ - alt_expr, token) \ - do { \ - if (yyextra->is_version(allowed_glsl, allowed_glsl_es) \ - || (alt_expr)) { \ - return token; \ - } else if (yyextra->is_version(reserved_glsl, \ - reserved_glsl_es)) { \ - _mesa_glsl_error(yylloc, yyextra, \ - "illegal use of reserved word `%s'", yytext); \ - return ERROR_TOK; \ - } else { \ - void *mem_ctx = yyextra; \ - yylval->identifier = ralloc_strdup(mem_ctx, yytext); \ - return classify_identifier(yyextra, yytext); \ - } \ - } while (0) - -/** - * A macro for handling keywords that have been present in GLSL since - * its origin, but were changed into reserved words in GLSL 3.00 ES. - */ -#define DEPRECATED_ES_KEYWORD(token) \ - do { \ - if (yyextra->is_version(0, 300)) { \ - _mesa_glsl_error(yylloc, yyextra, \ - "illegal use of reserved word `%s'", yytext); \ - return ERROR_TOK; \ - } else { \ - return token; \ - } \ - } while (0) - -static int -literal_integer(char *text, int len, struct _mesa_glsl_parse_state *state, - YYSTYPE *lval, YYLTYPE *lloc, int base) -{ - bool is_uint = (text[len - 1] == 'u' || - text[len - 1] == 'U'); - const char *digits = text; - - /* Skip "0x" */ - if (base == 16) - digits += 2; - -#ifdef _MSC_VER - unsigned __int64 value = _strtoui64(digits, NULL, base); -#else - unsigned long long value = strtoull(digits, NULL, base); -#endif - - lval->n = (int)value; - - if (value > UINT_MAX) { - /* Note that signed 0xffffffff is valid, not out of range! */ - if (state->is_version(130, 300)) { - _mesa_glsl_error(lloc, state, - "literal value `%s' out of range", text); - } else { - _mesa_glsl_warning(lloc, state, - "literal value `%s' out of range", text); - } - } else if (base == 10 && !is_uint && (unsigned)value > (unsigned)INT_MAX + 1) { - /* Tries to catch unintentionally providing a negative value. - * Note that -2147483648 is parsed as -(2147483648), so we don't - * want to warn for INT_MAX. - */ - _mesa_glsl_warning(lloc, state, - "signed literal value `%s' is interpreted as %d", - text, lval->n); - } - return is_uint ? UINTCONSTANT : INTCONSTANT; -} - -#define LITERAL_INTEGER(base) \ - literal_integer(yytext, yyleng, yyextra, yylval, yylloc, base) - -%} - -%option bison-bridge bison-locations reentrant noyywrap -%option nounput noyy_top_state -%option never-interactive -%option prefix="_mesa_glsl_lexer_" -%option extra-type="struct _mesa_glsl_parse_state *" -%option warn nodefault - - /* Note: When adding any start conditions to this list, you must also - * update the "Internal compiler error" catch-all rule near the end of - * this file. */ -%x PP PRAGMA - -DEC_INT [1-9][0-9]* -HEX_INT 0[xX][0-9a-fA-F]+ -OCT_INT 0[0-7]* -INT ({DEC_INT}|{HEX_INT}|{OCT_INT}) -SPC [ \t]* -SPCP [ \t]+ -HASH ^{SPC}#{SPC} -%% - -[ \r\t]+ ; - - /* Preprocessor tokens. */ -^[ \t]*#[ \t]*$ ; -^[ \t]*#[ \t]*version { BEGIN PP; return VERSION_TOK; } -^[ \t]*#[ \t]*extension { BEGIN PP; return EXTENSION; } -{HASH}line{SPCP}{INT}{SPCP}{INT}{SPC}$ { - /* Eat characters until the first digit is - * encountered - */ - char *ptr = yytext; - while (!isdigit(*ptr)) - ptr++; - - /* Subtract one from the line number because - * yylineno is zero-based instead of - * one-based. - */ - yylineno = strtol(ptr, &ptr, 0) - 1; - - /* From GLSL 3.30 and GLSL ES on, after processing the - * line directive (including its new-line), the implementation - * will behave as if it is compiling at the line number passed - * as argument. It was line number + 1 in older specifications. - */ - if (yyextra->is_version(330, 100)) - yylineno--; - - yylloc->source = strtol(ptr, NULL, 0); - } -{HASH}line{SPCP}{INT}{SPC}$ { - /* Eat characters until the first digit is - * encountered - */ - char *ptr = yytext; - while (!isdigit(*ptr)) - ptr++; - - /* Subtract one from the line number because - * yylineno is zero-based instead of - * one-based. - */ - yylineno = strtol(ptr, &ptr, 0) - 1; - - /* From GLSL 3.30 and GLSL ES on, after processing the - * line directive (including its new-line), the implementation - * will behave as if it is compiling at the line number passed - * as argument. It was line number + 1 in older specifications. - */ - if (yyextra->is_version(330, 100)) - yylineno--; - } -^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}on{SPC}\) { - BEGIN PP; - return PRAGMA_DEBUG_ON; - } -^{SPC}#{SPC}pragma{SPCP}debug{SPC}\({SPC}off{SPC}\) { - BEGIN PP; - return PRAGMA_DEBUG_OFF; - } -^{SPC}#{SPC}pragma{SPCP}optimize{SPC}\({SPC}on{SPC}\) { - BEGIN PP; - return PRAGMA_OPTIMIZE_ON; - } -^{SPC}#{SPC}pragma{SPCP}optimize{SPC}\({SPC}off{SPC}\) { - BEGIN PP; - return PRAGMA_OPTIMIZE_OFF; - } -^{SPC}#{SPC}pragma{SPCP}STDGL{SPCP}invariant{SPC}\({SPC}all{SPC}\) { - BEGIN PP; - return PRAGMA_INVARIANT_ALL; - } -^{SPC}#{SPC}pragma{SPCP} { BEGIN PRAGMA; } - -\n { BEGIN 0; yylineno++; yycolumn = 0; } -. { } - -\/\/[^\n]* { } -[ \t\r]* { } -: return COLON; -[_a-zA-Z][_a-zA-Z0-9]* { - void *mem_ctx = yyextra; - yylval->identifier = ralloc_strdup(mem_ctx, yytext); - return IDENTIFIER; - } -[1-9][0-9]* { - yylval->n = strtol(yytext, NULL, 10); - return INTCONSTANT; - } -\n { BEGIN 0; yylineno++; yycolumn = 0; return EOL; } -. { return yytext[0]; } - -\n { yylineno++; yycolumn = 0; } - -attribute DEPRECATED_ES_KEYWORD(ATTRIBUTE); -const return CONST_TOK; -bool return BOOL_TOK; -float return FLOAT_TOK; -int return INT_TOK; -uint KEYWORD(130, 300, 130, 300, UINT_TOK); - -break return BREAK; -continue return CONTINUE; -do return DO; -while return WHILE; -else return ELSE; -for return FOR; -if return IF; -discard return DISCARD; -return return RETURN; - -bvec2 return BVEC2; -bvec3 return BVEC3; -bvec4 return BVEC4; -ivec2 return IVEC2; -ivec3 return IVEC3; -ivec4 return IVEC4; -uvec2 KEYWORD(130, 300, 130, 300, UVEC2); -uvec3 KEYWORD(130, 300, 130, 300, UVEC3); -uvec4 KEYWORD(130, 300, 130, 300, UVEC4); -vec2 return VEC2; -vec3 return VEC3; -vec4 return VEC4; -mat2 return MAT2X2; -mat3 return MAT3X3; -mat4 return MAT4X4; -mat2x2 KEYWORD(120, 300, 120, 300, MAT2X2); -mat2x3 KEYWORD(120, 300, 120, 300, MAT2X3); -mat2x4 KEYWORD(120, 300, 120, 300, MAT2X4); -mat3x2 KEYWORD(120, 300, 120, 300, MAT3X2); -mat3x3 KEYWORD(120, 300, 120, 300, MAT3X3); -mat3x4 KEYWORD(120, 300, 120, 300, MAT3X4); -mat4x2 KEYWORD(120, 300, 120, 300, MAT4X2); -mat4x3 KEYWORD(120, 300, 120, 300, MAT4X3); -mat4x4 KEYWORD(120, 300, 120, 300, MAT4X4); - -in return IN_TOK; -out return OUT_TOK; -inout return INOUT_TOK; -uniform return UNIFORM; -buffer return BUFFER; -varying DEPRECATED_ES_KEYWORD(VARYING); -centroid KEYWORD(120, 300, 120, 300, CENTROID); -invariant KEYWORD(120, 100, 120, 100, INVARIANT); -flat KEYWORD(130, 100, 130, 300, FLAT); -smooth KEYWORD(130, 300, 130, 300, SMOOTH); -noperspective KEYWORD(130, 300, 130, 0, NOPERSPECTIVE); -patch KEYWORD_WITH_ALT(0, 300, 400, 0, yyextra->ARB_tessellation_shader_enable, PATCH); - -sampler1D DEPRECATED_ES_KEYWORD(SAMPLER1D); -sampler2D return SAMPLER2D; -sampler3D return SAMPLER3D; -samplerCube return SAMPLERCUBE; -sampler1DArray KEYWORD(130, 300, 130, 0, SAMPLER1DARRAY); -sampler2DArray KEYWORD(130, 300, 130, 300, SAMPLER2DARRAY); -sampler1DShadow DEPRECATED_ES_KEYWORD(SAMPLER1DSHADOW); -sampler2DShadow return SAMPLER2DSHADOW; -samplerCubeShadow KEYWORD(130, 300, 130, 300, SAMPLERCUBESHADOW); -sampler1DArrayShadow KEYWORD(130, 300, 130, 0, SAMPLER1DARRAYSHADOW); -sampler2DArrayShadow KEYWORD(130, 300, 130, 300, SAMPLER2DARRAYSHADOW); -isampler1D KEYWORD(130, 300, 130, 0, ISAMPLER1D); -isampler2D KEYWORD(130, 300, 130, 300, ISAMPLER2D); -isampler3D KEYWORD(130, 300, 130, 300, ISAMPLER3D); -isamplerCube KEYWORD(130, 300, 130, 300, ISAMPLERCUBE); -isampler1DArray KEYWORD(130, 300, 130, 0, ISAMPLER1DARRAY); -isampler2DArray KEYWORD(130, 300, 130, 300, ISAMPLER2DARRAY); -usampler1D KEYWORD(130, 300, 130, 0, USAMPLER1D); -usampler2D KEYWORD(130, 300, 130, 300, USAMPLER2D); -usampler3D KEYWORD(130, 300, 130, 300, USAMPLER3D); -usamplerCube KEYWORD(130, 300, 130, 300, USAMPLERCUBE); -usampler1DArray KEYWORD(130, 300, 130, 0, USAMPLER1DARRAY); -usampler2DArray KEYWORD(130, 300, 130, 300, USAMPLER2DARRAY); - - /* additional keywords in ARB_texture_multisample, included in GLSL 1.50 */ - /* these are reserved but not defined in GLSL 3.00 */ - /* [iu]sampler2DMS are defined in GLSL ES 3.10 */ -sampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, SAMPLER2DMS); -isampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, ISAMPLER2DMS); -usampler2DMS KEYWORD_WITH_ALT(150, 300, 150, 310, yyextra->ARB_texture_multisample_enable, USAMPLER2DMS); -sampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, SAMPLER2DMSARRAY); -isampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, ISAMPLER2DMSARRAY); -usampler2DMSArray KEYWORD_WITH_ALT(150, 300, 150, 320, yyextra->ARB_texture_multisample_enable || yyextra->OES_texture_storage_multisample_2d_array_enable, USAMPLER2DMSARRAY); - - /* keywords available with ARB_texture_cube_map_array_enable extension on desktop GLSL */ -samplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAY); -isamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, ISAMPLERCUBEARRAY); -usamplerCubeArray KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, USAMPLERCUBEARRAY); -samplerCubeArrayShadow KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_texture_cube_map_array_enable, SAMPLERCUBEARRAYSHADOW); - -samplerExternalOES { - if (yyextra->OES_EGL_image_external_enable) - return SAMPLEREXTERNALOES; - else - return IDENTIFIER; - } - - /* keywords available with ARB_gpu_shader5 */ -precise KEYWORD_WITH_ALT(400, 0, 400, 0, yyextra->ARB_gpu_shader5_enable, PRECISE); - - /* keywords available with ARB_shader_image_load_store */ -image1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1D); -image2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2D); -image3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE3D); -image2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DRECT); -imageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGECUBE); -imageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGEBUFFER); -image1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE1DARRAY); -image2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IMAGE2DARRAY); -imageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGECUBEARRAY); -image2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMS); -image2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IMAGE2DMSARRAY); -iimage1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1D); -iimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2D); -iimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE3D); -iimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DRECT); -iimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBE); -iimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGEBUFFER); -iimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE1DARRAY); -iimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DARRAY); -iimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGECUBEARRAY); -iimage2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMS); -iimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, IIMAGE2DMSARRAY); -uimage1D KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1D); -uimage2D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2D); -uimage3D KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE3D); -uimage2DRect KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DRECT); -uimageCube KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBE); -uimageBuffer KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGEBUFFER); -uimage1DArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE1DARRAY); -uimage2DArray KEYWORD_WITH_ALT(130, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DARRAY); -uimageCubeArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGECUBEARRAY); -uimage2DMS KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMS); -uimage2DMSArray KEYWORD_WITH_ALT(130, 300, 420, 0, yyextra->ARB_shader_image_load_store_enable, UIMAGE2DMSARRAY); -image1DShadow KEYWORD(130, 300, 0, 0, IMAGE1DSHADOW); -image2DShadow KEYWORD(130, 300, 0, 0, IMAGE2DSHADOW); -image1DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE1DARRAYSHADOW); -image2DArrayShadow KEYWORD(130, 300, 0, 0, IMAGE2DARRAYSHADOW); - -coherent KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, COHERENT); -volatile KEYWORD_WITH_ALT(110, 100, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, VOLATILE); -restrict KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, RESTRICT); -readonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, READONLY); -writeonly KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_image_load_store_enable || yyextra->ARB_shader_storage_buffer_object_enable, WRITEONLY); - -atomic_uint KEYWORD_WITH_ALT(420, 300, 420, 310, yyextra->ARB_shader_atomic_counters_enable, ATOMIC_UINT); - -shared KEYWORD_WITH_ALT(430, 310, 430, 310, yyextra->ARB_compute_shader_enable, SHARED); - -struct return STRUCT; -void return VOID_TOK; - -layout { - if ((yyextra->is_version(140, 300)) - || yyextra->AMD_conservative_depth_enable - || yyextra->ARB_conservative_depth_enable - || yyextra->ARB_explicit_attrib_location_enable - || yyextra->ARB_explicit_uniform_location_enable - || yyextra->has_separate_shader_objects() - || yyextra->ARB_uniform_buffer_object_enable - || yyextra->ARB_fragment_coord_conventions_enable - || yyextra->ARB_shading_language_420pack_enable - || yyextra->ARB_compute_shader_enable - || yyextra->ARB_tessellation_shader_enable) { - return LAYOUT_TOK; - } else { - void *mem_ctx = yyextra; - yylval->identifier = ralloc_strdup(mem_ctx, yytext); - return classify_identifier(yyextra, yytext); - } - } - -\+\+ return INC_OP; --- return DEC_OP; -\<= return LE_OP; ->= return GE_OP; -== return EQ_OP; -!= return NE_OP; -&& return AND_OP; -\|\| return OR_OP; -"^^" return XOR_OP; -"<<" return LEFT_OP; -">>" return RIGHT_OP; - -\*= return MUL_ASSIGN; -\/= return DIV_ASSIGN; -\+= return ADD_ASSIGN; -\%= return MOD_ASSIGN; -\<\<= return LEFT_ASSIGN; ->>= return RIGHT_ASSIGN; -&= return AND_ASSIGN; -"^=" return XOR_ASSIGN; -\|= return OR_ASSIGN; --= return SUB_ASSIGN; - -[1-9][0-9]*[uU]? { - return LITERAL_INTEGER(10); - } -0[xX][0-9a-fA-F]+[uU]? { - return LITERAL_INTEGER(16); - } -0[0-7]*[uU]? { - return LITERAL_INTEGER(8); - } - -[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?[fF]? | -\.[0-9]+([eE][+-]?[0-9]+)?[fF]? | -[0-9]+\.([eE][+-]?[0-9]+)?[fF]? | -[0-9]+[eE][+-]?[0-9]+[fF]? { - yylval->real = _mesa_strtof(yytext, NULL); - return FLOATCONSTANT; - } - -[0-9]+\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF) | -\.[0-9]+([eE][+-]?[0-9]+)?(lf|LF) | -[0-9]+\.([eE][+-]?[0-9]+)?(lf|LF) | -[0-9]+[eE][+-]?[0-9]+(lf|LF) { - if (!yyextra->is_version(400, 0) && - !yyextra->ARB_gpu_shader_fp64_enable) - return ERROR_TOK; - yylval->dreal = _mesa_strtod(yytext, NULL); - return DOUBLECONSTANT; - } - -true { - yylval->n = 1; - return BOOLCONSTANT; - } -false { - yylval->n = 0; - return BOOLCONSTANT; - } - - - /* Reserved words in GLSL 1.10. */ -asm KEYWORD(110, 100, 0, 0, ASM); -class KEYWORD(110, 100, 0, 0, CLASS); -union KEYWORD(110, 100, 0, 0, UNION); -enum KEYWORD(110, 100, 0, 0, ENUM); -typedef KEYWORD(110, 100, 0, 0, TYPEDEF); -template KEYWORD(110, 100, 0, 0, TEMPLATE); -this KEYWORD(110, 100, 0, 0, THIS); -packed KEYWORD_WITH_ALT(110, 100, 140, 300, yyextra->ARB_uniform_buffer_object_enable, PACKED_TOK); -goto KEYWORD(110, 100, 0, 0, GOTO); -switch KEYWORD(110, 100, 130, 300, SWITCH); -default KEYWORD(110, 100, 130, 300, DEFAULT); -inline KEYWORD(110, 100, 0, 0, INLINE_TOK); -noinline KEYWORD(110, 100, 0, 0, NOINLINE); -public KEYWORD(110, 100, 0, 0, PUBLIC_TOK); -static KEYWORD(110, 100, 0, 0, STATIC); -extern KEYWORD(110, 100, 0, 0, EXTERN); -external KEYWORD(110, 100, 0, 0, EXTERNAL); -interface KEYWORD(110, 100, 0, 0, INTERFACE); -long KEYWORD(110, 100, 0, 0, LONG_TOK); -short KEYWORD(110, 100, 0, 0, SHORT_TOK); -double KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DOUBLE_TOK); -half KEYWORD(110, 100, 0, 0, HALF); -fixed KEYWORD(110, 100, 0, 0, FIXED_TOK); -unsigned KEYWORD(110, 100, 0, 0, UNSIGNED); -input KEYWORD(110, 100, 0, 0, INPUT_TOK); -output KEYWORD(110, 100, 0, 0, OUTPUT); -hvec2 KEYWORD(110, 100, 0, 0, HVEC2); -hvec3 KEYWORD(110, 100, 0, 0, HVEC3); -hvec4 KEYWORD(110, 100, 0, 0, HVEC4); -dvec2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC2); -dvec3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC3); -dvec4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DVEC4); -dmat2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2); -dmat3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3); -dmat4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4); -dmat2x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X2); -dmat2x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X3); -dmat2x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT2X4); -dmat3x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X2); -dmat3x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X3); -dmat3x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT3X4); -dmat4x2 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X2); -dmat4x3 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X3); -dmat4x4 KEYWORD_WITH_ALT(110, 100, 400, 0, yyextra->ARB_gpu_shader_fp64_enable, DMAT4X4); -fvec2 KEYWORD(110, 100, 0, 0, FVEC2); -fvec3 KEYWORD(110, 100, 0, 0, FVEC3); -fvec4 KEYWORD(110, 100, 0, 0, FVEC4); -sampler2DRect DEPRECATED_ES_KEYWORD(SAMPLER2DRECT); -sampler3DRect KEYWORD(110, 100, 0, 0, SAMPLER3DRECT); -sampler2DRectShadow DEPRECATED_ES_KEYWORD(SAMPLER2DRECTSHADOW); -sizeof KEYWORD(110, 100, 0, 0, SIZEOF); -cast KEYWORD(110, 100, 0, 0, CAST); -namespace KEYWORD(110, 100, 0, 0, NAMESPACE); -using KEYWORD(110, 100, 0, 0, USING); - - /* Additional reserved words in GLSL 1.20. */ -lowp KEYWORD(120, 100, 130, 100, LOWP); -mediump KEYWORD(120, 100, 130, 100, MEDIUMP); -highp KEYWORD(120, 100, 130, 100, HIGHP); -precision KEYWORD(120, 100, 130, 100, PRECISION); - - /* Additional reserved words in GLSL 1.30. */ -case KEYWORD(130, 300, 130, 300, CASE); -common KEYWORD(130, 300, 0, 0, COMMON); -partition KEYWORD(130, 300, 0, 0, PARTITION); -active KEYWORD(130, 300, 0, 0, ACTIVE); -superp KEYWORD(130, 100, 0, 0, SUPERP); -samplerBuffer KEYWORD(130, 300, 140, 0, SAMPLERBUFFER); -filter KEYWORD(130, 300, 0, 0, FILTER); -row_major KEYWORD_WITH_ALT(130, 0, 140, 0, yyextra->ARB_uniform_buffer_object_enable && !yyextra->es_shader, ROW_MAJOR); - - /* Additional reserved words in GLSL 1.40 */ -isampler2DRect KEYWORD(140, 300, 140, 0, ISAMPLER2DRECT); -usampler2DRect KEYWORD(140, 300, 140, 0, USAMPLER2DRECT); -isamplerBuffer KEYWORD(140, 300, 140, 0, ISAMPLERBUFFER); -usamplerBuffer KEYWORD(140, 300, 140, 0, USAMPLERBUFFER); - - /* Additional reserved words in GLSL ES 3.00 */ -resource KEYWORD(0, 300, 0, 0, RESOURCE); -sample KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_gpu_shader5_enable, SAMPLE); -subroutine KEYWORD_WITH_ALT(400, 300, 400, 0, yyextra->ARB_shader_subroutine_enable, SUBROUTINE); - - -[_a-zA-Z][_a-zA-Z0-9]* { - struct _mesa_glsl_parse_state *state = yyextra; - void *ctx = state; - if (state->es_shader && strlen(yytext) > 1024) { - _mesa_glsl_error(yylloc, state, - "Identifier `%s' exceeds 1024 characters", - yytext); - } else { - yylval->identifier = ralloc_strdup(ctx, yytext); - } - return classify_identifier(state, yytext); - } - -\. { struct _mesa_glsl_parse_state *state = yyextra; - state->is_field = true; - return DOT_TOK; } - -. { return yytext[0]; } - -%% - -int -classify_identifier(struct _mesa_glsl_parse_state *state, const char *name) -{ - if (state->is_field) { - state->is_field = false; - return FIELD_SELECTION; - } - if (state->symbols->get_variable(name) || state->symbols->get_function(name)) - return IDENTIFIER; - else if (state->symbols->get_type(name)) - return TYPE_IDENTIFIER; - else - return NEW_IDENTIFIER; -} - -void -_mesa_glsl_lexer_ctor(struct _mesa_glsl_parse_state *state, const char *string) -{ - yylex_init_extra(state, & state->scanner); - yy_scan_string(string, state->scanner); -} - -void -_mesa_glsl_lexer_dtor(struct _mesa_glsl_parse_state *state) -{ - yylex_destroy(state->scanner); -} diff --git a/src/glsl/glsl_parser.yy b/src/glsl/glsl_parser.yy deleted file mode 100644 index 99bd0e61d0e..00000000000 --- a/src/glsl/glsl_parser.yy +++ /dev/null @@ -1,2855 +0,0 @@ -%{ -/* - * Copyright © 2008, 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#ifndef _MSC_VER -#include -#endif -#include - -#include "ast.h" -#include "glsl_parser_extras.h" -#include "compiler/glsl_types.h" -#include "main/context.h" - -#ifdef _MSC_VER -#pragma warning( disable : 4065 ) // switch statement contains 'default' but no 'case' labels -#endif - -#undef yyerror - -static void yyerror(YYLTYPE *loc, _mesa_glsl_parse_state *st, const char *msg) -{ - _mesa_glsl_error(loc, st, "%s", msg); -} - -static int -_mesa_glsl_lex(YYSTYPE *val, YYLTYPE *loc, _mesa_glsl_parse_state *state) -{ - return _mesa_glsl_lexer_lex(val, loc, state->scanner); -} - -static bool match_layout_qualifier(const char *s1, const char *s2, - _mesa_glsl_parse_state *state) -{ - /* From the GLSL 1.50 spec, section 4.3.8 (Layout Qualifiers): - * - * "The tokens in any layout-qualifier-id-list ... are not case - * sensitive, unless explicitly noted otherwise." - * - * The text "unless explicitly noted otherwise" appears to be - * vacuous--no desktop GLSL spec (up through GLSL 4.40) notes - * otherwise. - * - * However, the GLSL ES 3.00 spec says, in section 4.3.8 (Layout - * Qualifiers): - * - * "As for other identifiers, they are case sensitive." - * - * So we need to do a case-sensitive or a case-insensitive match, - * depending on whether we are compiling for GLSL ES. - */ - if (state->es_shader) - return strcmp(s1, s2); - else - return strcasecmp(s1, s2); -} -%} - -%expect 0 - -%pure-parser -%error-verbose - -%locations -%initial-action { - @$.first_line = 1; - @$.first_column = 1; - @$.last_line = 1; - @$.last_column = 1; - @$.source = 0; -} - -%lex-param {struct _mesa_glsl_parse_state *state} -%parse-param {struct _mesa_glsl_parse_state *state} - -%union { - int n; - float real; - double dreal; - const char *identifier; - - struct ast_type_qualifier type_qualifier; - - ast_node *node; - ast_type_specifier *type_specifier; - ast_array_specifier *array_specifier; - ast_fully_specified_type *fully_specified_type; - ast_function *function; - ast_parameter_declarator *parameter_declarator; - ast_function_definition *function_definition; - ast_compound_statement *compound_statement; - ast_expression *expression; - ast_declarator_list *declarator_list; - ast_struct_specifier *struct_specifier; - ast_declaration *declaration; - ast_switch_body *switch_body; - ast_case_label *case_label; - ast_case_label_list *case_label_list; - ast_case_statement *case_statement; - ast_case_statement_list *case_statement_list; - ast_interface_block *interface_block; - ast_subroutine_list *subroutine_list; - struct { - ast_node *cond; - ast_expression *rest; - } for_rest_statement; - - struct { - ast_node *then_statement; - ast_node *else_statement; - } selection_rest_statement; -} - -%token ATTRIBUTE CONST_TOK BOOL_TOK FLOAT_TOK INT_TOK UINT_TOK DOUBLE_TOK -%token BREAK BUFFER CONTINUE DO ELSE FOR IF DISCARD RETURN SWITCH CASE DEFAULT -%token BVEC2 BVEC3 BVEC4 IVEC2 IVEC3 IVEC4 UVEC2 UVEC3 UVEC4 VEC2 VEC3 VEC4 DVEC2 DVEC3 DVEC4 -%token CENTROID IN_TOK OUT_TOK INOUT_TOK UNIFORM VARYING SAMPLE -%token NOPERSPECTIVE FLAT SMOOTH -%token MAT2X2 MAT2X3 MAT2X4 -%token MAT3X2 MAT3X3 MAT3X4 -%token MAT4X2 MAT4X3 MAT4X4 -%token DMAT2X2 DMAT2X3 DMAT2X4 -%token DMAT3X2 DMAT3X3 DMAT3X4 -%token DMAT4X2 DMAT4X3 DMAT4X4 -%token SAMPLER1D SAMPLER2D SAMPLER3D SAMPLERCUBE SAMPLER1DSHADOW SAMPLER2DSHADOW -%token SAMPLERCUBESHADOW SAMPLER1DARRAY SAMPLER2DARRAY SAMPLER1DARRAYSHADOW -%token SAMPLER2DARRAYSHADOW SAMPLERCUBEARRAY SAMPLERCUBEARRAYSHADOW -%token ISAMPLER1D ISAMPLER2D ISAMPLER3D ISAMPLERCUBE -%token ISAMPLER1DARRAY ISAMPLER2DARRAY ISAMPLERCUBEARRAY -%token USAMPLER1D USAMPLER2D USAMPLER3D USAMPLERCUBE USAMPLER1DARRAY -%token USAMPLER2DARRAY USAMPLERCUBEARRAY -%token SAMPLER2DRECT ISAMPLER2DRECT USAMPLER2DRECT SAMPLER2DRECTSHADOW -%token SAMPLERBUFFER ISAMPLERBUFFER USAMPLERBUFFER -%token SAMPLER2DMS ISAMPLER2DMS USAMPLER2DMS -%token SAMPLER2DMSARRAY ISAMPLER2DMSARRAY USAMPLER2DMSARRAY -%token SAMPLEREXTERNALOES -%token IMAGE1D IMAGE2D IMAGE3D IMAGE2DRECT IMAGECUBE IMAGEBUFFER -%token IMAGE1DARRAY IMAGE2DARRAY IMAGECUBEARRAY IMAGE2DMS IMAGE2DMSARRAY -%token IIMAGE1D IIMAGE2D IIMAGE3D IIMAGE2DRECT IIMAGECUBE IIMAGEBUFFER -%token IIMAGE1DARRAY IIMAGE2DARRAY IIMAGECUBEARRAY IIMAGE2DMS IIMAGE2DMSARRAY -%token UIMAGE1D UIMAGE2D UIMAGE3D UIMAGE2DRECT UIMAGECUBE UIMAGEBUFFER -%token UIMAGE1DARRAY UIMAGE2DARRAY UIMAGECUBEARRAY UIMAGE2DMS UIMAGE2DMSARRAY -%token IMAGE1DSHADOW IMAGE2DSHADOW IMAGE1DARRAYSHADOW IMAGE2DARRAYSHADOW -%token COHERENT VOLATILE RESTRICT READONLY WRITEONLY -%token ATOMIC_UINT -%token SHARED -%token STRUCT VOID_TOK WHILE -%token IDENTIFIER TYPE_IDENTIFIER NEW_IDENTIFIER -%type any_identifier -%type instance_name_opt -%type buffer_instance_name_opt -%token FLOATCONSTANT -%token DOUBLECONSTANT -%token INTCONSTANT UINTCONSTANT BOOLCONSTANT -%token FIELD_SELECTION -%token LEFT_OP RIGHT_OP -%token INC_OP DEC_OP LE_OP GE_OP EQ_OP NE_OP -%token AND_OP OR_OP XOR_OP MUL_ASSIGN DIV_ASSIGN ADD_ASSIGN -%token MOD_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN XOR_ASSIGN OR_ASSIGN -%token SUB_ASSIGN -%token INVARIANT PRECISE -%token LOWP MEDIUMP HIGHP SUPERP PRECISION - -%token VERSION_TOK EXTENSION LINE COLON EOL INTERFACE OUTPUT -%token PRAGMA_DEBUG_ON PRAGMA_DEBUG_OFF -%token PRAGMA_OPTIMIZE_ON PRAGMA_OPTIMIZE_OFF -%token PRAGMA_INVARIANT_ALL -%token LAYOUT_TOK -%token DOT_TOK - /* Reserved words that are not actually used in the grammar. - */ -%token ASM CLASS UNION ENUM TYPEDEF TEMPLATE THIS PACKED_TOK GOTO -%token INLINE_TOK NOINLINE PUBLIC_TOK STATIC EXTERN EXTERNAL -%token LONG_TOK SHORT_TOK HALF FIXED_TOK UNSIGNED INPUT_TOK -%token HVEC2 HVEC3 HVEC4 FVEC2 FVEC3 FVEC4 -%token SAMPLER3DRECT -%token SIZEOF CAST NAMESPACE USING -%token RESOURCE PATCH -%token SUBROUTINE - -%token ERROR_TOK - -%token COMMON PARTITION ACTIVE FILTER ROW_MAJOR - -%type variable_identifier -%type statement -%type statement_list -%type simple_statement -%type precision_qualifier -%type type_qualifier -%type auxiliary_storage_qualifier -%type storage_qualifier -%type interpolation_qualifier -%type layout_qualifier -%type layout_qualifier_id_list layout_qualifier_id -%type interface_block_layout_qualifier -%type memory_qualifier -%type subroutine_qualifier -%type subroutine_type_list -%type interface_qualifier -%type buffer_interface_qualifier -%type type_specifier -%type type_specifier_nonarray -%type array_specifier -%type basic_type_specifier_nonarray -%type fully_specified_type -%type function_prototype -%type function_header -%type function_header_with_parameters -%type function_declarator -%type parameter_declarator -%type parameter_declaration -%type parameter_qualifier -%type parameter_direction_qualifier -%type parameter_type_specifier -%type function_definition -%type compound_statement_no_new_scope -%type compound_statement -%type statement_no_new_scope -%type expression_statement -%type expression -%type primary_expression -%type assignment_expression -%type conditional_expression -%type logical_or_expression -%type logical_xor_expression -%type logical_and_expression -%type inclusive_or_expression -%type exclusive_or_expression -%type and_expression -%type equality_expression -%type relational_expression -%type shift_expression -%type additive_expression -%type multiplicative_expression -%type unary_expression -%type constant_expression -%type integer_expression -%type postfix_expression -%type function_call_header_with_parameters -%type function_call_header_no_parameters -%type function_call_header -%type function_call_generic -%type function_call_or_method -%type function_call -%type assignment_operator -%type unary_operator -%type function_identifier -%type external_declaration -%type init_declarator_list -%type single_declaration -%type initializer -%type initializer_list -%type declaration -%type declaration_statement -%type jump_statement -%type interface_block -%type basic_interface_block -%type struct_specifier -%type struct_declaration_list -%type struct_declaration -%type struct_declarator -%type struct_declarator_list -%type member_list -%type member_declaration -%type selection_statement -%type selection_rest_statement -%type switch_statement -%type switch_body -%type case_label_list -%type case_label -%type case_statement -%type case_statement_list -%type iteration_statement -%type condition -%type conditionopt -%type for_init_statement -%type for_rest_statement -%type layout_defaults -%type layout_uniform_defaults -%type layout_buffer_defaults -%type layout_in_defaults -%type layout_out_defaults - -%right THEN ELSE -%% - -translation_unit: - version_statement extension_statement_list - { - _mesa_glsl_initialize_types(state); - } - external_declaration_list - { - delete state->symbols; - state->symbols = new(ralloc_parent(state)) glsl_symbol_table; - if (state->es_shader) { - if (state->stage == MESA_SHADER_FRAGMENT) { - state->symbols->add_default_precision_qualifier("int", ast_precision_medium); - } else { - state->symbols->add_default_precision_qualifier("float", ast_precision_high); - state->symbols->add_default_precision_qualifier("int", ast_precision_high); - } - state->symbols->add_default_precision_qualifier("sampler2D", ast_precision_low); - state->symbols->add_default_precision_qualifier("samplerExternalOES", ast_precision_low); - state->symbols->add_default_precision_qualifier("samplerCube", ast_precision_low); - state->symbols->add_default_precision_qualifier("atomic_uint", ast_precision_high); - } - _mesa_glsl_initialize_types(state); - } - ; - -version_statement: - /* blank - no #version specified: defaults are already set */ - | VERSION_TOK INTCONSTANT EOL - { - state->process_version_directive(&@2, $2, NULL); - if (state->error) { - YYERROR; - } - } - | VERSION_TOK INTCONSTANT any_identifier EOL - { - state->process_version_directive(&@2, $2, $3); - if (state->error) { - YYERROR; - } - } - ; - -pragma_statement: - PRAGMA_DEBUG_ON EOL - | PRAGMA_DEBUG_OFF EOL - | PRAGMA_OPTIMIZE_ON EOL - | PRAGMA_OPTIMIZE_OFF EOL - | PRAGMA_INVARIANT_ALL EOL - { - /* Pragma invariant(all) cannot be used in a fragment shader. - * - * Page 27 of the GLSL 1.20 spec, Page 53 of the GLSL ES 3.00 spec: - * - * "It is an error to use this pragma in a fragment shader." - */ - if (state->is_version(120, 300) && - state->stage == MESA_SHADER_FRAGMENT) { - _mesa_glsl_error(& @1, state, - "pragma `invariant(all)' cannot be used " - "in a fragment shader."); - } else if (!state->is_version(120, 100)) { - _mesa_glsl_warning(& @1, state, - "pragma `invariant(all)' not supported in %s " - "(GLSL ES 1.00 or GLSL 1.20 required)", - state->get_version_string()); - } else { - state->all_invariant = true; - } - } - ; - -extension_statement_list: - - | extension_statement_list extension_statement - ; - -any_identifier: - IDENTIFIER - | TYPE_IDENTIFIER - | NEW_IDENTIFIER - ; - -extension_statement: - EXTENSION any_identifier COLON any_identifier EOL - { - if (!_mesa_glsl_process_extension($2, & @2, $4, & @4, state)) { - YYERROR; - } - } - ; - -external_declaration_list: - external_declaration - { - /* FINISHME: The NULL test is required because pragmas are set to - * FINISHME: NULL. (See production rule for external_declaration.) - */ - if ($1 != NULL) - state->translation_unit.push_tail(& $1->link); - } - | external_declaration_list external_declaration - { - /* FINISHME: The NULL test is required because pragmas are set to - * FINISHME: NULL. (See production rule for external_declaration.) - */ - if ($2 != NULL) - state->translation_unit.push_tail(& $2->link); - } - | external_declaration_list extension_statement { - if (!state->allow_extension_directive_midshader) { - _mesa_glsl_error(& @2, state, - "#extension directive is not allowed " - "in the middle of a shader"); - YYERROR; - } - } - ; - -variable_identifier: - IDENTIFIER - | NEW_IDENTIFIER - ; - -primary_expression: - variable_identifier - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_identifier, NULL, NULL, NULL); - $$->set_location(@1); - $$->primary_expression.identifier = $1; - } - | INTCONSTANT - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_int_constant, NULL, NULL, NULL); - $$->set_location(@1); - $$->primary_expression.int_constant = $1; - } - | UINTCONSTANT - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_uint_constant, NULL, NULL, NULL); - $$->set_location(@1); - $$->primary_expression.uint_constant = $1; - } - | FLOATCONSTANT - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_float_constant, NULL, NULL, NULL); - $$->set_location(@1); - $$->primary_expression.float_constant = $1; - } - | DOUBLECONSTANT - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_double_constant, NULL, NULL, NULL); - $$->set_location(@1); - $$->primary_expression.double_constant = $1; - } - | BOOLCONSTANT - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_bool_constant, NULL, NULL, NULL); - $$->set_location(@1); - $$->primary_expression.bool_constant = $1; - } - | '(' expression ')' - { - $$ = $2; - } - ; - -postfix_expression: - primary_expression - | postfix_expression '[' integer_expression ']' - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_array_index, $1, $3, NULL); - $$->set_location_range(@1, @4); - } - | function_call - { - $$ = $1; - } - | postfix_expression DOT_TOK FIELD_SELECTION - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_field_selection, $1, NULL, NULL); - $$->set_location_range(@1, @3); - $$->primary_expression.identifier = $3; - } - | postfix_expression INC_OP - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_post_inc, $1, NULL, NULL); - $$->set_location_range(@1, @2); - } - | postfix_expression DEC_OP - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_post_dec, $1, NULL, NULL); - $$->set_location_range(@1, @2); - } - ; - -integer_expression: - expression - ; - -function_call: - function_call_or_method - ; - -function_call_or_method: - function_call_generic - ; - -function_call_generic: - function_call_header_with_parameters ')' - | function_call_header_no_parameters ')' - ; - -function_call_header_no_parameters: - function_call_header VOID_TOK - | function_call_header - ; - -function_call_header_with_parameters: - function_call_header assignment_expression - { - $$ = $1; - $$->set_location(@1); - $$->expressions.push_tail(& $2->link); - } - | function_call_header_with_parameters ',' assignment_expression - { - $$ = $1; - $$->set_location(@1); - $$->expressions.push_tail(& $3->link); - } - ; - - // Grammar Note: Constructors look like functions, but lexical - // analysis recognized most of them as keywords. They are now - // recognized through "type_specifier". -function_call_header: - function_identifier '(' - ; - -function_identifier: - type_specifier - { - void *ctx = state; - $$ = new(ctx) ast_function_expression($1); - $$->set_location(@1); - } - | postfix_expression - { - void *ctx = state; - $$ = new(ctx) ast_function_expression($1); - $$->set_location(@1); - } - ; - - // Grammar Note: Constructors look like methods, but lexical - // analysis recognized most of them as keywords. They are now - // recognized through "type_specifier". - - // Grammar Note: No traditional style type casts. -unary_expression: - postfix_expression - | INC_OP unary_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_pre_inc, $2, NULL, NULL); - $$->set_location(@1); - } - | DEC_OP unary_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_pre_dec, $2, NULL, NULL); - $$->set_location(@1); - } - | unary_operator unary_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression($1, $2, NULL, NULL); - $$->set_location_range(@1, @2); - } - ; - - // Grammar Note: No '*' or '&' unary ops. Pointers are not supported. -unary_operator: - '+' { $$ = ast_plus; } - | '-' { $$ = ast_neg; } - | '!' { $$ = ast_logic_not; } - | '~' { $$ = ast_bit_not; } - ; - -multiplicative_expression: - unary_expression - | multiplicative_expression '*' unary_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_mul, $1, $3); - $$->set_location_range(@1, @3); - } - | multiplicative_expression '/' unary_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_div, $1, $3); - $$->set_location_range(@1, @3); - } - | multiplicative_expression '%' unary_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_mod, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -additive_expression: - multiplicative_expression - | additive_expression '+' multiplicative_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_add, $1, $3); - $$->set_location_range(@1, @3); - } - | additive_expression '-' multiplicative_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_sub, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -shift_expression: - additive_expression - | shift_expression LEFT_OP additive_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_lshift, $1, $3); - $$->set_location_range(@1, @3); - } - | shift_expression RIGHT_OP additive_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_rshift, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -relational_expression: - shift_expression - | relational_expression '<' shift_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_less, $1, $3); - $$->set_location_range(@1, @3); - } - | relational_expression '>' shift_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_greater, $1, $3); - $$->set_location_range(@1, @3); - } - | relational_expression LE_OP shift_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_lequal, $1, $3); - $$->set_location_range(@1, @3); - } - | relational_expression GE_OP shift_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_gequal, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -equality_expression: - relational_expression - | equality_expression EQ_OP relational_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_equal, $1, $3); - $$->set_location_range(@1, @3); - } - | equality_expression NE_OP relational_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_nequal, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -and_expression: - equality_expression - | and_expression '&' equality_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_bit_and, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -exclusive_or_expression: - and_expression - | exclusive_or_expression '^' and_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_bit_xor, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -inclusive_or_expression: - exclusive_or_expression - | inclusive_or_expression '|' exclusive_or_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_bit_or, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -logical_and_expression: - inclusive_or_expression - | logical_and_expression AND_OP inclusive_or_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_logic_and, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -logical_xor_expression: - logical_and_expression - | logical_xor_expression XOR_OP logical_and_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_logic_xor, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -logical_or_expression: - logical_xor_expression - | logical_or_expression OR_OP logical_xor_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression_bin(ast_logic_or, $1, $3); - $$->set_location_range(@1, @3); - } - ; - -conditional_expression: - logical_or_expression - | logical_or_expression '?' expression ':' assignment_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression(ast_conditional, $1, $3, $5); - $$->set_location_range(@1, @5); - } - ; - -assignment_expression: - conditional_expression - | unary_expression assignment_operator assignment_expression - { - void *ctx = state; - $$ = new(ctx) ast_expression($2, $1, $3, NULL); - $$->set_location_range(@1, @3); - } - ; - -assignment_operator: - '=' { $$ = ast_assign; } - | MUL_ASSIGN { $$ = ast_mul_assign; } - | DIV_ASSIGN { $$ = ast_div_assign; } - | MOD_ASSIGN { $$ = ast_mod_assign; } - | ADD_ASSIGN { $$ = ast_add_assign; } - | SUB_ASSIGN { $$ = ast_sub_assign; } - | LEFT_ASSIGN { $$ = ast_ls_assign; } - | RIGHT_ASSIGN { $$ = ast_rs_assign; } - | AND_ASSIGN { $$ = ast_and_assign; } - | XOR_ASSIGN { $$ = ast_xor_assign; } - | OR_ASSIGN { $$ = ast_or_assign; } - ; - -expression: - assignment_expression - { - $$ = $1; - } - | expression ',' assignment_expression - { - void *ctx = state; - if ($1->oper != ast_sequence) { - $$ = new(ctx) ast_expression(ast_sequence, NULL, NULL, NULL); - $$->set_location_range(@1, @3); - $$->expressions.push_tail(& $1->link); - } else { - $$ = $1; - } - - $$->expressions.push_tail(& $3->link); - } - ; - -constant_expression: - conditional_expression - ; - -declaration: - function_prototype ';' - { - state->symbols->pop_scope(); - $$ = $1; - } - | init_declarator_list ';' - { - $$ = $1; - } - | PRECISION precision_qualifier type_specifier ';' - { - $3->default_precision = $2; - $$ = $3; - } - | interface_block - { - $$ = $1; - } - ; - -function_prototype: - function_declarator ')' - ; - -function_declarator: - function_header - | function_header_with_parameters - ; - -function_header_with_parameters: - function_header parameter_declaration - { - $$ = $1; - $$->parameters.push_tail(& $2->link); - } - | function_header_with_parameters ',' parameter_declaration - { - $$ = $1; - $$->parameters.push_tail(& $3->link); - } - ; - -function_header: - fully_specified_type variable_identifier '(' - { - void *ctx = state; - $$ = new(ctx) ast_function(); - $$->set_location(@2); - $$->return_type = $1; - $$->identifier = $2; - - if ($1->qualifier.flags.q.subroutine) { - /* add type for IDENTIFIER search */ - state->symbols->add_type($2, glsl_type::get_subroutine_instance($2)); - } else - state->symbols->add_function(new(state) ir_function($2)); - state->symbols->push_scope(); - } - ; - -parameter_declarator: - type_specifier any_identifier - { - void *ctx = state; - $$ = new(ctx) ast_parameter_declarator(); - $$->set_location_range(@1, @2); - $$->type = new(ctx) ast_fully_specified_type(); - $$->type->set_location(@1); - $$->type->specifier = $1; - $$->identifier = $2; - } - | type_specifier any_identifier array_specifier - { - void *ctx = state; - $$ = new(ctx) ast_parameter_declarator(); - $$->set_location_range(@1, @3); - $$->type = new(ctx) ast_fully_specified_type(); - $$->type->set_location(@1); - $$->type->specifier = $1; - $$->identifier = $2; - $$->array_specifier = $3; - } - ; - -parameter_declaration: - parameter_qualifier parameter_declarator - { - $$ = $2; - $$->type->qualifier = $1; - } - | parameter_qualifier parameter_type_specifier - { - void *ctx = state; - $$ = new(ctx) ast_parameter_declarator(); - $$->set_location(@2); - $$->type = new(ctx) ast_fully_specified_type(); - $$->type->set_location_range(@1, @2); - $$->type->qualifier = $1; - $$->type->specifier = $2; - } - ; - -parameter_qualifier: - /* empty */ - { - memset(& $$, 0, sizeof($$)); - } - | CONST_TOK parameter_qualifier - { - if ($2.flags.q.constant) - _mesa_glsl_error(&@1, state, "duplicate const qualifier"); - - $$ = $2; - $$.flags.q.constant = 1; - } - | PRECISE parameter_qualifier - { - if ($2.flags.q.precise) - _mesa_glsl_error(&@1, state, "duplicate precise qualifier"); - - $$ = $2; - $$.flags.q.precise = 1; - } - | parameter_direction_qualifier parameter_qualifier - { - if (($1.flags.q.in || $1.flags.q.out) && ($2.flags.q.in || $2.flags.q.out)) - _mesa_glsl_error(&@1, state, "duplicate in/out/inout qualifier"); - - if (!state->has_420pack_or_es31() && $2.flags.q.constant) - _mesa_glsl_error(&@1, state, "in/out/inout must come after const " - "or precise"); - - $$ = $1; - $$.merge_qualifier(&@1, state, $2, false); - } - | precision_qualifier parameter_qualifier - { - if ($2.precision != ast_precision_none) - _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); - - if (!state->has_420pack_or_es31() && - $2.flags.i != 0) - _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); - - $$ = $2; - $$.precision = $1; - } - | memory_qualifier parameter_qualifier - { - $$ = $1; - $$.merge_qualifier(&@1, state, $2, false); - } - -parameter_direction_qualifier: - IN_TOK - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.in = 1; - } - | OUT_TOK - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.out = 1; - } - | INOUT_TOK - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.in = 1; - $$.flags.q.out = 1; - } - ; - -parameter_type_specifier: - type_specifier - ; - -init_declarator_list: - single_declaration - | init_declarator_list ',' any_identifier - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($3, NULL, NULL); - decl->set_location(@3); - - $$ = $1; - $$->declarations.push_tail(&decl->link); - state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); - } - | init_declarator_list ',' any_identifier array_specifier - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($3, $4, NULL); - decl->set_location_range(@3, @4); - - $$ = $1; - $$->declarations.push_tail(&decl->link); - state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); - } - | init_declarator_list ',' any_identifier array_specifier '=' initializer - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($3, $4, $6); - decl->set_location_range(@3, @4); - - $$ = $1; - $$->declarations.push_tail(&decl->link); - state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); - } - | init_declarator_list ',' any_identifier '=' initializer - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($3, NULL, $5); - decl->set_location(@3); - - $$ = $1; - $$->declarations.push_tail(&decl->link); - state->symbols->add_variable(new(state) ir_variable(NULL, $3, ir_var_auto)); - } - ; - - // Grammar Note: No 'enum', or 'typedef'. -single_declaration: - fully_specified_type - { - void *ctx = state; - /* Empty declaration list is valid. */ - $$ = new(ctx) ast_declarator_list($1); - $$->set_location(@1); - } - | fully_specified_type any_identifier - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, NULL, NULL); - decl->set_location(@2); - - $$ = new(ctx) ast_declarator_list($1); - $$->set_location_range(@1, @2); - $$->declarations.push_tail(&decl->link); - } - | fully_specified_type any_identifier array_specifier - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, $3, NULL); - decl->set_location_range(@2, @3); - - $$ = new(ctx) ast_declarator_list($1); - $$->set_location_range(@1, @3); - $$->declarations.push_tail(&decl->link); - } - | fully_specified_type any_identifier array_specifier '=' initializer - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, $3, $5); - decl->set_location_range(@2, @3); - - $$ = new(ctx) ast_declarator_list($1); - $$->set_location_range(@1, @3); - $$->declarations.push_tail(&decl->link); - } - | fully_specified_type any_identifier '=' initializer - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, NULL, $4); - decl->set_location(@2); - - $$ = new(ctx) ast_declarator_list($1); - $$->set_location_range(@1, @2); - $$->declarations.push_tail(&decl->link); - } - | INVARIANT variable_identifier - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, NULL, NULL); - decl->set_location(@2); - - $$ = new(ctx) ast_declarator_list(NULL); - $$->set_location_range(@1, @2); - $$->invariant = true; - - $$->declarations.push_tail(&decl->link); - } - | PRECISE variable_identifier - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, NULL, NULL); - decl->set_location(@2); - - $$ = new(ctx) ast_declarator_list(NULL); - $$->set_location_range(@1, @2); - $$->precise = true; - - $$->declarations.push_tail(&decl->link); - } - ; - -fully_specified_type: - type_specifier - { - void *ctx = state; - $$ = new(ctx) ast_fully_specified_type(); - $$->set_location(@1); - $$->specifier = $1; - } - | type_qualifier type_specifier - { - void *ctx = state; - $$ = new(ctx) ast_fully_specified_type(); - $$->set_location_range(@1, @2); - $$->qualifier = $1; - $$->specifier = $2; - if ($$->specifier->structure != NULL && - $$->specifier->structure->is_declaration) { - $$->specifier->structure->layout = &$$->qualifier; - } - } - ; - -layout_qualifier: - LAYOUT_TOK '(' layout_qualifier_id_list ')' - { - $$ = $3; - } - ; - -layout_qualifier_id_list: - layout_qualifier_id - | layout_qualifier_id_list ',' layout_qualifier_id - { - $$ = $1; - if (!$$.merge_qualifier(& @3, state, $3, true)) { - YYERROR; - } - } - ; - -layout_qualifier_id: - any_identifier - { - memset(& $$, 0, sizeof($$)); - - /* Layout qualifiers for ARB_fragment_coord_conventions. */ - if (!$$.flags.i && (state->ARB_fragment_coord_conventions_enable || - state->is_version(150, 0))) { - if (match_layout_qualifier($1, "origin_upper_left", state) == 0) { - $$.flags.q.origin_upper_left = 1; - } else if (match_layout_qualifier($1, "pixel_center_integer", - state) == 0) { - $$.flags.q.pixel_center_integer = 1; - } - - if ($$.flags.i && state->ARB_fragment_coord_conventions_warn) { - _mesa_glsl_warning(& @1, state, - "GL_ARB_fragment_coord_conventions layout " - "identifier `%s' used", $1); - } - } - - /* Layout qualifiers for AMD/ARB_conservative_depth. */ - if (!$$.flags.i && - (state->AMD_conservative_depth_enable || - state->ARB_conservative_depth_enable || - state->is_version(420, 0))) { - if (match_layout_qualifier($1, "depth_any", state) == 0) { - $$.flags.q.depth_any = 1; - } else if (match_layout_qualifier($1, "depth_greater", state) == 0) { - $$.flags.q.depth_greater = 1; - } else if (match_layout_qualifier($1, "depth_less", state) == 0) { - $$.flags.q.depth_less = 1; - } else if (match_layout_qualifier($1, "depth_unchanged", - state) == 0) { - $$.flags.q.depth_unchanged = 1; - } - - if ($$.flags.i && state->AMD_conservative_depth_warn) { - _mesa_glsl_warning(& @1, state, - "GL_AMD_conservative_depth " - "layout qualifier `%s' is used", $1); - } - if ($$.flags.i && state->ARB_conservative_depth_warn) { - _mesa_glsl_warning(& @1, state, - "GL_ARB_conservative_depth " - "layout qualifier `%s' is used", $1); - } - } - - /* See also interface_block_layout_qualifier. */ - if (!$$.flags.i && state->has_uniform_buffer_objects()) { - if (match_layout_qualifier($1, "std140", state) == 0) { - $$.flags.q.std140 = 1; - } else if (match_layout_qualifier($1, "shared", state) == 0) { - $$.flags.q.shared = 1; - } else if (match_layout_qualifier($1, "std430", state) == 0) { - $$.flags.q.std430 = 1; - } else if (match_layout_qualifier($1, "column_major", state) == 0) { - $$.flags.q.column_major = 1; - /* "row_major" is a reserved word in GLSL 1.30+. Its token is parsed - * below in the interface_block_layout_qualifier rule. - * - * It is not a reserved word in GLSL ES 3.00, so it's handled here as - * an identifier. - * - * Also, this takes care of alternate capitalizations of - * "row_major" (which is necessary because layout qualifiers - * are case-insensitive in desktop GLSL). - */ - } else if (match_layout_qualifier($1, "row_major", state) == 0) { - $$.flags.q.row_major = 1; - /* "packed" is a reserved word in GLSL, and its token is - * parsed below in the interface_block_layout_qualifier rule. - * However, we must take care of alternate capitalizations of - * "packed", because layout qualifiers are case-insensitive - * in desktop GLSL. - */ - } else if (match_layout_qualifier($1, "packed", state) == 0) { - $$.flags.q.packed = 1; - } - - if ($$.flags.i && state->ARB_uniform_buffer_object_warn) { - _mesa_glsl_warning(& @1, state, - "#version 140 / GL_ARB_uniform_buffer_object " - "layout qualifier `%s' is used", $1); - } - } - - /* Layout qualifiers for GLSL 1.50 geometry shaders. */ - if (!$$.flags.i) { - static const struct { - const char *s; - GLenum e; - } map[] = { - { "points", GL_POINTS }, - { "lines", GL_LINES }, - { "lines_adjacency", GL_LINES_ADJACENCY }, - { "line_strip", GL_LINE_STRIP }, - { "triangles", GL_TRIANGLES }, - { "triangles_adjacency", GL_TRIANGLES_ADJACENCY }, - { "triangle_strip", GL_TRIANGLE_STRIP }, - }; - for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { - if (match_layout_qualifier($1, map[i].s, state) == 0) { - $$.flags.q.prim_type = 1; - $$.prim_type = map[i].e; - break; - } - } - - if ($$.flags.i && !state->has_geometry_shader()) { - _mesa_glsl_error(& @1, state, "#version 150 layout " - "qualifier `%s' used", $1); - } - } - - /* Layout qualifiers for ARB_shader_image_load_store. */ - if (state->ARB_shader_image_load_store_enable || - state->is_version(420, 310)) { - if (!$$.flags.i) { - static const struct { - const char *name; - GLenum format; - glsl_base_type base_type; - /** Minimum desktop GLSL version required for the image - * format. Use 130 if already present in the original - * ARB extension. - */ - unsigned required_glsl; - /** Minimum GLSL ES version required for the image format. */ - unsigned required_essl; - } map[] = { - { "rgba32f", GL_RGBA32F, GLSL_TYPE_FLOAT, 130, 310 }, - { "rgba16f", GL_RGBA16F, GLSL_TYPE_FLOAT, 130, 310 }, - { "rg32f", GL_RG32F, GLSL_TYPE_FLOAT, 130, 0 }, - { "rg16f", GL_RG16F, GLSL_TYPE_FLOAT, 130, 0 }, - { "r11f_g11f_b10f", GL_R11F_G11F_B10F, GLSL_TYPE_FLOAT, 130, 0 }, - { "r32f", GL_R32F, GLSL_TYPE_FLOAT, 130, 310 }, - { "r16f", GL_R16F, GLSL_TYPE_FLOAT, 130, 0 }, - { "rgba32ui", GL_RGBA32UI, GLSL_TYPE_UINT, 130, 310 }, - { "rgba16ui", GL_RGBA16UI, GLSL_TYPE_UINT, 130, 310 }, - { "rgb10_a2ui", GL_RGB10_A2UI, GLSL_TYPE_UINT, 130, 0 }, - { "rgba8ui", GL_RGBA8UI, GLSL_TYPE_UINT, 130, 310 }, - { "rg32ui", GL_RG32UI, GLSL_TYPE_UINT, 130, 0 }, - { "rg16ui", GL_RG16UI, GLSL_TYPE_UINT, 130, 0 }, - { "rg8ui", GL_RG8UI, GLSL_TYPE_UINT, 130, 0 }, - { "r32ui", GL_R32UI, GLSL_TYPE_UINT, 130, 310 }, - { "r16ui", GL_R16UI, GLSL_TYPE_UINT, 130, 0 }, - { "r8ui", GL_R8UI, GLSL_TYPE_UINT, 130, 0 }, - { "rgba32i", GL_RGBA32I, GLSL_TYPE_INT, 130, 310 }, - { "rgba16i", GL_RGBA16I, GLSL_TYPE_INT, 130, 310 }, - { "rgba8i", GL_RGBA8I, GLSL_TYPE_INT, 130, 310 }, - { "rg32i", GL_RG32I, GLSL_TYPE_INT, 130, 0 }, - { "rg16i", GL_RG16I, GLSL_TYPE_INT, 130, 0 }, - { "rg8i", GL_RG8I, GLSL_TYPE_INT, 130, 0 }, - { "r32i", GL_R32I, GLSL_TYPE_INT, 130, 310 }, - { "r16i", GL_R16I, GLSL_TYPE_INT, 130, 0 }, - { "r8i", GL_R8I, GLSL_TYPE_INT, 130, 0 }, - { "rgba16", GL_RGBA16, GLSL_TYPE_FLOAT, 130, 0 }, - { "rgb10_a2", GL_RGB10_A2, GLSL_TYPE_FLOAT, 130, 0 }, - { "rgba8", GL_RGBA8, GLSL_TYPE_FLOAT, 130, 310 }, - { "rg16", GL_RG16, GLSL_TYPE_FLOAT, 130, 0 }, - { "rg8", GL_RG8, GLSL_TYPE_FLOAT, 130, 0 }, - { "r16", GL_R16, GLSL_TYPE_FLOAT, 130, 0 }, - { "r8", GL_R8, GLSL_TYPE_FLOAT, 130, 0 }, - { "rgba16_snorm", GL_RGBA16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, - { "rgba8_snorm", GL_RGBA8_SNORM, GLSL_TYPE_FLOAT, 130, 310 }, - { "rg16_snorm", GL_RG16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, - { "rg8_snorm", GL_RG8_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, - { "r16_snorm", GL_R16_SNORM, GLSL_TYPE_FLOAT, 130, 0 }, - { "r8_snorm", GL_R8_SNORM, GLSL_TYPE_FLOAT, 130, 0 } - }; - - for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { - if (state->is_version(map[i].required_glsl, - map[i].required_essl) && - match_layout_qualifier($1, map[i].name, state) == 0) { - $$.flags.q.explicit_image_format = 1; - $$.image_format = map[i].format; - $$.image_base_type = map[i].base_type; - break; - } - } - } - - if (!$$.flags.i && - match_layout_qualifier($1, "early_fragment_tests", state) == 0) { - /* From section 4.4.1.3 of the GLSL 4.50 specification - * (Fragment Shader Inputs): - * - * "Fragment shaders also allow the following layout - * qualifier on in only (not with variable declarations) - * layout-qualifier-id - * early_fragment_tests - * [...]" - */ - if (state->stage != MESA_SHADER_FRAGMENT) { - _mesa_glsl_error(& @1, state, - "early_fragment_tests layout qualifier only " - "valid in fragment shaders"); - } - - $$.flags.q.early_fragment_tests = 1; - } - } - - /* Layout qualifiers for tessellation evaluation shaders. */ - if (!$$.flags.i) { - struct { - const char *s; - GLenum e; - } map[] = { - /* triangles already parsed by gs-specific code */ - { "quads", GL_QUADS }, - { "isolines", GL_ISOLINES }, - }; - for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { - if (match_layout_qualifier($1, map[i].s, state) == 0) { - $$.flags.q.prim_type = 1; - $$.prim_type = map[i].e; - break; - } - } - - if ($$.flags.i && - !state->ARB_tessellation_shader_enable && - !state->is_version(400, 0)) { - _mesa_glsl_error(& @1, state, - "primitive mode qualifier `%s' requires " - "GLSL 4.00 or ARB_tessellation_shader", $1); - } - } - if (!$$.flags.i) { - struct { - const char *s; - GLenum e; - } map[] = { - { "equal_spacing", GL_EQUAL }, - { "fractional_odd_spacing", GL_FRACTIONAL_ODD }, - { "fractional_even_spacing", GL_FRACTIONAL_EVEN }, - }; - for (unsigned i = 0; i < ARRAY_SIZE(map); i++) { - if (match_layout_qualifier($1, map[i].s, state) == 0) { - $$.flags.q.vertex_spacing = 1; - $$.vertex_spacing = map[i].e; - break; - } - } - - if ($$.flags.i && - !state->ARB_tessellation_shader_enable && - !state->is_version(400, 0)) { - _mesa_glsl_error(& @1, state, - "vertex spacing qualifier `%s' requires " - "GLSL 4.00 or ARB_tessellation_shader", $1); - } - } - if (!$$.flags.i) { - if (match_layout_qualifier($1, "cw", state) == 0) { - $$.flags.q.ordering = 1; - $$.ordering = GL_CW; - } else if (match_layout_qualifier($1, "ccw", state) == 0) { - $$.flags.q.ordering = 1; - $$.ordering = GL_CCW; - } - - if ($$.flags.i && - !state->ARB_tessellation_shader_enable && - !state->is_version(400, 0)) { - _mesa_glsl_error(& @1, state, - "ordering qualifier `%s' requires " - "GLSL 4.00 or ARB_tessellation_shader", $1); - } - } - if (!$$.flags.i) { - if (match_layout_qualifier($1, "point_mode", state) == 0) { - $$.flags.q.point_mode = 1; - $$.point_mode = true; - } - - if ($$.flags.i && - !state->ARB_tessellation_shader_enable && - !state->is_version(400, 0)) { - _mesa_glsl_error(& @1, state, - "qualifier `point_mode' requires " - "GLSL 4.00 or ARB_tessellation_shader"); - } - } - - if (!$$.flags.i) { - _mesa_glsl_error(& @1, state, "unrecognized layout identifier " - "`%s'", $1); - YYERROR; - } - } - | any_identifier '=' constant_expression - { - memset(& $$, 0, sizeof($$)); - void *ctx = state; - - if ($3->oper != ast_int_constant && - $3->oper != ast_uint_constant && - !state->has_enhanced_layouts()) { - _mesa_glsl_error(& @1, state, - "compile-time constant expressions require " - "GLSL 4.40 or ARB_enhanced_layouts"); - } - - if (match_layout_qualifier("location", $1, state) == 0) { - $$.flags.q.explicit_location = 1; - - if ($$.flags.q.attribute == 1 && - state->ARB_explicit_attrib_location_warn) { - _mesa_glsl_warning(& @1, state, - "GL_ARB_explicit_attrib_location layout " - "identifier `%s' used", $1); - } - $$.location = $3; - } - - if (match_layout_qualifier("index", $1, state) == 0) { - if (state->es_shader && !state->EXT_blend_func_extended_enable) { - _mesa_glsl_error(& @3, state, "index layout qualifier requires EXT_blend_func_extended"); - YYERROR; - } - - $$.flags.q.explicit_index = 1; - $$.index = $3; - } - - if ((state->has_420pack_or_es31() || - state->has_atomic_counters() || - state->has_shader_storage_buffer_objects()) && - match_layout_qualifier("binding", $1, state) == 0) { - $$.flags.q.explicit_binding = 1; - $$.binding = $3; - } - - if (state->has_atomic_counters() && - match_layout_qualifier("offset", $1, state) == 0) { - $$.flags.q.explicit_offset = 1; - $$.offset = $3; - } - - if (match_layout_qualifier("max_vertices", $1, state) == 0) { - $$.flags.q.max_vertices = 1; - $$.max_vertices = new(ctx) ast_layout_expression(@1, $3); - if (!state->has_geometry_shader()) { - _mesa_glsl_error(& @3, state, - "#version 150 max_vertices qualifier " - "specified", $3); - } - } - - if (state->stage == MESA_SHADER_GEOMETRY) { - if (match_layout_qualifier("stream", $1, state) == 0 && - state->check_explicit_attrib_stream_allowed(& @3)) { - $$.flags.q.stream = 1; - $$.flags.q.explicit_stream = 1; - $$.stream = $3; - } - } - - static const char * const local_size_qualifiers[3] = { - "local_size_x", - "local_size_y", - "local_size_z", - }; - for (int i = 0; i < 3; i++) { - if (match_layout_qualifier(local_size_qualifiers[i], $1, - state) == 0) { - if (!state->has_compute_shader()) { - _mesa_glsl_error(& @3, state, - "%s qualifier requires GLSL 4.30 or " - "GLSL ES 3.10 or ARB_compute_shader", - local_size_qualifiers[i]); - YYERROR; - } else { - $$.flags.q.local_size |= (1 << i); - $$.local_size[i] = new(ctx) ast_layout_expression(@1, $3); - } - break; - } - } - - if (match_layout_qualifier("invocations", $1, state) == 0) { - $$.flags.q.invocations = 1; - $$.invocations = new(ctx) ast_layout_expression(@1, $3); - if (!state->is_version(400, 0) && - !state->ARB_gpu_shader5_enable) { - _mesa_glsl_error(& @3, state, - "GL_ARB_gpu_shader5 invocations " - "qualifier specified", $3); - } - } - - /* Layout qualifiers for tessellation control shaders. */ - if (match_layout_qualifier("vertices", $1, state) == 0) { - $$.flags.q.vertices = 1; - $$.vertices = new(ctx) ast_layout_expression(@1, $3); - if (!state->ARB_tessellation_shader_enable && - !state->is_version(400, 0)) { - _mesa_glsl_error(& @1, state, - "vertices qualifier requires GLSL 4.00 or " - "ARB_tessellation_shader"); - } - } - - /* If the identifier didn't match any known layout identifiers, - * emit an error. - */ - if (!$$.flags.i) { - _mesa_glsl_error(& @1, state, "unrecognized layout identifier " - "`%s'", $1); - YYERROR; - } - } - | interface_block_layout_qualifier - { - $$ = $1; - /* Layout qualifiers for ARB_uniform_buffer_object. */ - if ($$.flags.q.uniform && !state->has_uniform_buffer_objects()) { - _mesa_glsl_error(& @1, state, - "#version 140 / GL_ARB_uniform_buffer_object " - "layout qualifier `%s' is used", $1); - } else if ($$.flags.q.uniform && state->ARB_uniform_buffer_object_warn) { - _mesa_glsl_warning(& @1, state, - "#version 140 / GL_ARB_uniform_buffer_object " - "layout qualifier `%s' is used", $1); - } - } - ; - -/* This is a separate language rule because we parse these as tokens - * (due to them being reserved keywords) instead of identifiers like - * most qualifiers. See the any_identifier path of - * layout_qualifier_id for the others. - * - * Note that since layout qualifiers are case-insensitive in desktop - * GLSL, all of these qualifiers need to be handled as identifiers as - * well (by the any_identifier path of layout_qualifier_id). - */ -interface_block_layout_qualifier: - ROW_MAJOR - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.row_major = 1; - } - | PACKED_TOK - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.packed = 1; - } - | SHARED - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.shared = 1; - } - ; - -subroutine_qualifier: - SUBROUTINE - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.subroutine = 1; - } - | SUBROUTINE '(' subroutine_type_list ')' - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.subroutine_def = 1; - $$.subroutine_list = $3; - } - ; - -subroutine_type_list: - any_identifier - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($1, NULL, NULL); - decl->set_location(@1); - - $$ = new(ctx) ast_subroutine_list(); - $$->declarations.push_tail(&decl->link); - } - | subroutine_type_list ',' any_identifier - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($3, NULL, NULL); - decl->set_location(@3); - - $$ = $1; - $$->declarations.push_tail(&decl->link); - } - ; - -interpolation_qualifier: - SMOOTH - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.smooth = 1; - } - | FLAT - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.flat = 1; - } - | NOPERSPECTIVE - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.noperspective = 1; - } - ; - -type_qualifier: - /* Single qualifiers */ - INVARIANT - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.invariant = 1; - } - | PRECISE - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.precise = 1; - } - | auxiliary_storage_qualifier - | storage_qualifier - | interpolation_qualifier - | layout_qualifier - | memory_qualifier - | subroutine_qualifier - | precision_qualifier - { - memset(&$$, 0, sizeof($$)); - $$.precision = $1; - } - - /* Multiple qualifiers: - * In GLSL 4.20, these can be specified in any order. In earlier versions, - * they appear in this order (see GLSL 1.50 section 4.7 & comments below): - * - * invariant interpolation auxiliary storage precision ...or... - * layout storage precision - * - * Each qualifier's rule ensures that the accumulated qualifiers on the right - * side don't contain any that must appear on the left hand side. - * For example, when processing a storage qualifier, we check that there are - * no auxiliary, interpolation, layout, invariant, or precise qualifiers to the right. - */ - | PRECISE type_qualifier - { - if ($2.flags.q.precise) - _mesa_glsl_error(&@1, state, "duplicate \"precise\" qualifier"); - - $$ = $2; - $$.flags.q.precise = 1; - } - | INVARIANT type_qualifier - { - if ($2.flags.q.invariant) - _mesa_glsl_error(&@1, state, "duplicate \"invariant\" qualifier"); - - if (!state->has_420pack_or_es31() && $2.flags.q.precise) - _mesa_glsl_error(&@1, state, - "\"invariant\" must come after \"precise\""); - - $$ = $2; - $$.flags.q.invariant = 1; - - /* GLSL ES 3.00 spec, section 4.6.1 "The Invariant Qualifier": - * - * "Only variables output from a shader can be candidates for invariance. - * This includes user-defined output variables and the built-in output - * variables. As only outputs can be declared as invariant, an invariant - * output from one shader stage will still match an input of a subsequent - * stage without the input being declared as invariant." - */ - if (state->es_shader && state->language_version >= 300 && $$.flags.q.in) - _mesa_glsl_error(&@1, state, "invariant qualifiers cannot be used with shader inputs"); - } - | interpolation_qualifier type_qualifier - { - /* Section 4.3 of the GLSL 1.40 specification states: - * "...qualified with one of these interpolation qualifiers" - * - * GLSL 1.30 claims to allow "one or more", but insists that: - * "These interpolation qualifiers may only precede the qualifiers in, - * centroid in, out, or centroid out in a declaration." - * - * ...which means that e.g. smooth can't precede smooth, so there can be - * only one after all, and the 1.40 text is a clarification, not a change. - */ - if ($2.has_interpolation()) - _mesa_glsl_error(&@1, state, "duplicate interpolation qualifier"); - - if (!state->has_420pack_or_es31() && - ($2.flags.q.precise || $2.flags.q.invariant)) { - _mesa_glsl_error(&@1, state, "interpolation qualifiers must come " - "after \"precise\" or \"invariant\""); - } - - $$ = $1; - $$.merge_qualifier(&@1, state, $2, false); - } - | layout_qualifier type_qualifier - { - /* In the absence of ARB_shading_language_420pack, layout qualifiers may - * appear no later than auxiliary storage qualifiers. There is no - * particularly clear spec language mandating this, but in all examples - * the layout qualifier precedes the storage qualifier. - * - * We allow combinations of layout with interpolation, invariant or - * precise qualifiers since these are useful in ARB_separate_shader_objects. - * There is no clear spec guidance on this either. - */ - if (!state->has_420pack_or_es31() && $2.has_layout()) - _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); - - $$ = $1; - $$.merge_qualifier(&@1, state, $2, false); - } - | subroutine_qualifier type_qualifier - { - $$ = $1; - $$.merge_qualifier(&@1, state, $2, false); - } - | auxiliary_storage_qualifier type_qualifier - { - if ($2.has_auxiliary_storage()) { - _mesa_glsl_error(&@1, state, - "duplicate auxiliary storage qualifier (centroid or sample)"); - } - - if (!state->has_420pack_or_es31() && - ($2.flags.q.precise || $2.flags.q.invariant || - $2.has_interpolation() || $2.has_layout())) { - _mesa_glsl_error(&@1, state, "auxiliary storage qualifiers must come " - "just before storage qualifiers"); - } - $$ = $1; - $$.merge_qualifier(&@1, state, $2, false); - } - | storage_qualifier type_qualifier - { - /* Section 4.3 of the GLSL 1.20 specification states: - * "Variable declarations may have a storage qualifier specified..." - * 1.30 clarifies this to "may have one storage qualifier". - */ - if ($2.has_storage()) - _mesa_glsl_error(&@1, state, "duplicate storage qualifier"); - - if (!state->has_420pack_or_es31() && - ($2.flags.q.precise || $2.flags.q.invariant || $2.has_interpolation() || - $2.has_layout() || $2.has_auxiliary_storage())) { - _mesa_glsl_error(&@1, state, "storage qualifiers must come after " - "precise, invariant, interpolation, layout and auxiliary " - "storage qualifiers"); - } - - $$ = $1; - $$.merge_qualifier(&@1, state, $2, false); - } - | precision_qualifier type_qualifier - { - if ($2.precision != ast_precision_none) - _mesa_glsl_error(&@1, state, "duplicate precision qualifier"); - - if (!(state->has_420pack_or_es31()) && - $2.flags.i != 0) - _mesa_glsl_error(&@1, state, "precision qualifiers must come last"); - - $$ = $2; - $$.precision = $1; - } - | memory_qualifier type_qualifier - { - $$ = $1; - $$.merge_qualifier(&@1, state, $2, false); - } - ; - -auxiliary_storage_qualifier: - CENTROID - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.centroid = 1; - } - | SAMPLE - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.sample = 1; - } - | PATCH - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.patch = 1; - } - -storage_qualifier: - CONST_TOK - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.constant = 1; - } - | ATTRIBUTE - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.attribute = 1; - } - | VARYING - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.varying = 1; - } - | IN_TOK - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.in = 1; - } - | OUT_TOK - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.out = 1; - - if (state->stage == MESA_SHADER_GEOMETRY && - state->has_explicit_attrib_stream()) { - /* Section 4.3.8.2 (Output Layout Qualifiers) of the GLSL 4.00 - * spec says: - * - * "If the block or variable is declared with the stream - * identifier, it is associated with the specified stream; - * otherwise, it is associated with the current default stream." - */ - $$.flags.q.stream = 1; - $$.flags.q.explicit_stream = 0; - $$.stream = state->out_qualifier->stream; - } - } - | UNIFORM - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.uniform = 1; - } - | BUFFER - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.buffer = 1; - } - | SHARED - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.shared_storage = 1; - } - ; - -memory_qualifier: - COHERENT - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.coherent = 1; - } - | VOLATILE - { - memset(& $$, 0, sizeof($$)); - $$.flags.q._volatile = 1; - } - | RESTRICT - { - STATIC_ASSERT(sizeof($$.flags.q) <= sizeof($$.flags.i)); - memset(& $$, 0, sizeof($$)); - $$.flags.q.restrict_flag = 1; - } - | READONLY - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.read_only = 1; - } - | WRITEONLY - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.write_only = 1; - } - ; - -array_specifier: - '[' ']' - { - void *ctx = state; - $$ = new(ctx) ast_array_specifier(@1, new(ctx) ast_expression( - ast_unsized_array_dim, NULL, - NULL, NULL)); - $$->set_location_range(@1, @2); - } - | '[' constant_expression ']' - { - void *ctx = state; - $$ = new(ctx) ast_array_specifier(@1, $2); - $$->set_location_range(@1, @3); - } - | array_specifier '[' ']' - { - void *ctx = state; - $$ = $1; - - if (state->check_arrays_of_arrays_allowed(& @1)) { - $$->add_dimension(new(ctx) ast_expression(ast_unsized_array_dim, NULL, - NULL, NULL)); - } - } - | array_specifier '[' constant_expression ']' - { - $$ = $1; - - if (state->check_arrays_of_arrays_allowed(& @1)) { - $$->add_dimension($3); - } - } - ; - -type_specifier: - type_specifier_nonarray - | type_specifier_nonarray array_specifier - { - $$ = $1; - $$->array_specifier = $2; - } - ; - -type_specifier_nonarray: - basic_type_specifier_nonarray - { - void *ctx = state; - $$ = new(ctx) ast_type_specifier($1); - $$->set_location(@1); - } - | struct_specifier - { - void *ctx = state; - $$ = new(ctx) ast_type_specifier($1); - $$->set_location(@1); - } - | TYPE_IDENTIFIER - { - void *ctx = state; - $$ = new(ctx) ast_type_specifier($1); - $$->set_location(@1); - } - ; - -basic_type_specifier_nonarray: - VOID_TOK { $$ = "void"; } - | FLOAT_TOK { $$ = "float"; } - | DOUBLE_TOK { $$ = "double"; } - | INT_TOK { $$ = "int"; } - | UINT_TOK { $$ = "uint"; } - | BOOL_TOK { $$ = "bool"; } - | VEC2 { $$ = "vec2"; } - | VEC3 { $$ = "vec3"; } - | VEC4 { $$ = "vec4"; } - | BVEC2 { $$ = "bvec2"; } - | BVEC3 { $$ = "bvec3"; } - | BVEC4 { $$ = "bvec4"; } - | IVEC2 { $$ = "ivec2"; } - | IVEC3 { $$ = "ivec3"; } - | IVEC4 { $$ = "ivec4"; } - | UVEC2 { $$ = "uvec2"; } - | UVEC3 { $$ = "uvec3"; } - | UVEC4 { $$ = "uvec4"; } - | DVEC2 { $$ = "dvec2"; } - | DVEC3 { $$ = "dvec3"; } - | DVEC4 { $$ = "dvec4"; } - | MAT2X2 { $$ = "mat2"; } - | MAT2X3 { $$ = "mat2x3"; } - | MAT2X4 { $$ = "mat2x4"; } - | MAT3X2 { $$ = "mat3x2"; } - | MAT3X3 { $$ = "mat3"; } - | MAT3X4 { $$ = "mat3x4"; } - | MAT4X2 { $$ = "mat4x2"; } - | MAT4X3 { $$ = "mat4x3"; } - | MAT4X4 { $$ = "mat4"; } - | DMAT2X2 { $$ = "dmat2"; } - | DMAT2X3 { $$ = "dmat2x3"; } - | DMAT2X4 { $$ = "dmat2x4"; } - | DMAT3X2 { $$ = "dmat3x2"; } - | DMAT3X3 { $$ = "dmat3"; } - | DMAT3X4 { $$ = "dmat3x4"; } - | DMAT4X2 { $$ = "dmat4x2"; } - | DMAT4X3 { $$ = "dmat4x3"; } - | DMAT4X4 { $$ = "dmat4"; } - | SAMPLER1D { $$ = "sampler1D"; } - | SAMPLER2D { $$ = "sampler2D"; } - | SAMPLER2DRECT { $$ = "sampler2DRect"; } - | SAMPLER3D { $$ = "sampler3D"; } - | SAMPLERCUBE { $$ = "samplerCube"; } - | SAMPLEREXTERNALOES { $$ = "samplerExternalOES"; } - | SAMPLER1DSHADOW { $$ = "sampler1DShadow"; } - | SAMPLER2DSHADOW { $$ = "sampler2DShadow"; } - | SAMPLER2DRECTSHADOW { $$ = "sampler2DRectShadow"; } - | SAMPLERCUBESHADOW { $$ = "samplerCubeShadow"; } - | SAMPLER1DARRAY { $$ = "sampler1DArray"; } - | SAMPLER2DARRAY { $$ = "sampler2DArray"; } - | SAMPLER1DARRAYSHADOW { $$ = "sampler1DArrayShadow"; } - | SAMPLER2DARRAYSHADOW { $$ = "sampler2DArrayShadow"; } - | SAMPLERBUFFER { $$ = "samplerBuffer"; } - | SAMPLERCUBEARRAY { $$ = "samplerCubeArray"; } - | SAMPLERCUBEARRAYSHADOW { $$ = "samplerCubeArrayShadow"; } - | ISAMPLER1D { $$ = "isampler1D"; } - | ISAMPLER2D { $$ = "isampler2D"; } - | ISAMPLER2DRECT { $$ = "isampler2DRect"; } - | ISAMPLER3D { $$ = "isampler3D"; } - | ISAMPLERCUBE { $$ = "isamplerCube"; } - | ISAMPLER1DARRAY { $$ = "isampler1DArray"; } - | ISAMPLER2DARRAY { $$ = "isampler2DArray"; } - | ISAMPLERBUFFER { $$ = "isamplerBuffer"; } - | ISAMPLERCUBEARRAY { $$ = "isamplerCubeArray"; } - | USAMPLER1D { $$ = "usampler1D"; } - | USAMPLER2D { $$ = "usampler2D"; } - | USAMPLER2DRECT { $$ = "usampler2DRect"; } - | USAMPLER3D { $$ = "usampler3D"; } - | USAMPLERCUBE { $$ = "usamplerCube"; } - | USAMPLER1DARRAY { $$ = "usampler1DArray"; } - | USAMPLER2DARRAY { $$ = "usampler2DArray"; } - | USAMPLERBUFFER { $$ = "usamplerBuffer"; } - | USAMPLERCUBEARRAY { $$ = "usamplerCubeArray"; } - | SAMPLER2DMS { $$ = "sampler2DMS"; } - | ISAMPLER2DMS { $$ = "isampler2DMS"; } - | USAMPLER2DMS { $$ = "usampler2DMS"; } - | SAMPLER2DMSARRAY { $$ = "sampler2DMSArray"; } - | ISAMPLER2DMSARRAY { $$ = "isampler2DMSArray"; } - | USAMPLER2DMSARRAY { $$ = "usampler2DMSArray"; } - | IMAGE1D { $$ = "image1D"; } - | IMAGE2D { $$ = "image2D"; } - | IMAGE3D { $$ = "image3D"; } - | IMAGE2DRECT { $$ = "image2DRect"; } - | IMAGECUBE { $$ = "imageCube"; } - | IMAGEBUFFER { $$ = "imageBuffer"; } - | IMAGE1DARRAY { $$ = "image1DArray"; } - | IMAGE2DARRAY { $$ = "image2DArray"; } - | IMAGECUBEARRAY { $$ = "imageCubeArray"; } - | IMAGE2DMS { $$ = "image2DMS"; } - | IMAGE2DMSARRAY { $$ = "image2DMSArray"; } - | IIMAGE1D { $$ = "iimage1D"; } - | IIMAGE2D { $$ = "iimage2D"; } - | IIMAGE3D { $$ = "iimage3D"; } - | IIMAGE2DRECT { $$ = "iimage2DRect"; } - | IIMAGECUBE { $$ = "iimageCube"; } - | IIMAGEBUFFER { $$ = "iimageBuffer"; } - | IIMAGE1DARRAY { $$ = "iimage1DArray"; } - | IIMAGE2DARRAY { $$ = "iimage2DArray"; } - | IIMAGECUBEARRAY { $$ = "iimageCubeArray"; } - | IIMAGE2DMS { $$ = "iimage2DMS"; } - | IIMAGE2DMSARRAY { $$ = "iimage2DMSArray"; } - | UIMAGE1D { $$ = "uimage1D"; } - | UIMAGE2D { $$ = "uimage2D"; } - | UIMAGE3D { $$ = "uimage3D"; } - | UIMAGE2DRECT { $$ = "uimage2DRect"; } - | UIMAGECUBE { $$ = "uimageCube"; } - | UIMAGEBUFFER { $$ = "uimageBuffer"; } - | UIMAGE1DARRAY { $$ = "uimage1DArray"; } - | UIMAGE2DARRAY { $$ = "uimage2DArray"; } - | UIMAGECUBEARRAY { $$ = "uimageCubeArray"; } - | UIMAGE2DMS { $$ = "uimage2DMS"; } - | UIMAGE2DMSARRAY { $$ = "uimage2DMSArray"; } - | ATOMIC_UINT { $$ = "atomic_uint"; } - ; - -precision_qualifier: - HIGHP - { - state->check_precision_qualifiers_allowed(&@1); - $$ = ast_precision_high; - } - | MEDIUMP - { - state->check_precision_qualifiers_allowed(&@1); - $$ = ast_precision_medium; - } - | LOWP - { - state->check_precision_qualifiers_allowed(&@1); - $$ = ast_precision_low; - } - ; - -struct_specifier: - STRUCT any_identifier '{' struct_declaration_list '}' - { - void *ctx = state; - $$ = new(ctx) ast_struct_specifier($2, $4); - $$->set_location_range(@2, @5); - state->symbols->add_type($2, glsl_type::void_type); - } - | STRUCT '{' struct_declaration_list '}' - { - void *ctx = state; - $$ = new(ctx) ast_struct_specifier(NULL, $3); - $$->set_location_range(@2, @4); - } - ; - -struct_declaration_list: - struct_declaration - { - $$ = $1; - $1->link.self_link(); - } - | struct_declaration_list struct_declaration - { - $$ = $1; - $$->link.insert_before(& $2->link); - } - ; - -struct_declaration: - fully_specified_type struct_declarator_list ';' - { - void *ctx = state; - ast_fully_specified_type *const type = $1; - type->set_location(@1); - - if (type->qualifier.flags.i != 0) - _mesa_glsl_error(&@1, state, - "only precision qualifiers may be applied to " - "structure members"); - - $$ = new(ctx) ast_declarator_list(type); - $$->set_location(@2); - - $$->declarations.push_degenerate_list_at_head(& $2->link); - } - ; - -struct_declarator_list: - struct_declarator - { - $$ = $1; - $1->link.self_link(); - } - | struct_declarator_list ',' struct_declarator - { - $$ = $1; - $$->link.insert_before(& $3->link); - } - ; - -struct_declarator: - any_identifier - { - void *ctx = state; - $$ = new(ctx) ast_declaration($1, NULL, NULL); - $$->set_location(@1); - } - | any_identifier array_specifier - { - void *ctx = state; - $$ = new(ctx) ast_declaration($1, $2, NULL); - $$->set_location_range(@1, @2); - } - ; - -initializer: - assignment_expression - | '{' initializer_list '}' - { - $$ = $2; - } - | '{' initializer_list ',' '}' - { - $$ = $2; - } - ; - -initializer_list: - initializer - { - void *ctx = state; - $$ = new(ctx) ast_aggregate_initializer(); - $$->set_location(@1); - $$->expressions.push_tail(& $1->link); - } - | initializer_list ',' initializer - { - $1->expressions.push_tail(& $3->link); - } - ; - -declaration_statement: - declaration - ; - - // Grammar Note: labeled statements for SWITCH only; 'goto' is not - // supported. -statement: - compound_statement { $$ = (ast_node *) $1; } - | simple_statement - ; - -simple_statement: - declaration_statement - | expression_statement - | selection_statement - | switch_statement - | iteration_statement - | jump_statement - ; - -compound_statement: - '{' '}' - { - void *ctx = state; - $$ = new(ctx) ast_compound_statement(true, NULL); - $$->set_location_range(@1, @2); - } - | '{' - { - state->symbols->push_scope(); - } - statement_list '}' - { - void *ctx = state; - $$ = new(ctx) ast_compound_statement(true, $3); - $$->set_location_range(@1, @4); - state->symbols->pop_scope(); - } - ; - -statement_no_new_scope: - compound_statement_no_new_scope { $$ = (ast_node *) $1; } - | simple_statement - ; - -compound_statement_no_new_scope: - '{' '}' - { - void *ctx = state; - $$ = new(ctx) ast_compound_statement(false, NULL); - $$->set_location_range(@1, @2); - } - | '{' statement_list '}' - { - void *ctx = state; - $$ = new(ctx) ast_compound_statement(false, $2); - $$->set_location_range(@1, @3); - } - ; - -statement_list: - statement - { - if ($1 == NULL) { - _mesa_glsl_error(& @1, state, " statement"); - assert($1 != NULL); - } - - $$ = $1; - $$->link.self_link(); - } - | statement_list statement - { - if ($2 == NULL) { - _mesa_glsl_error(& @2, state, " statement"); - assert($2 != NULL); - } - $$ = $1; - $$->link.insert_before(& $2->link); - } - ; - -expression_statement: - ';' - { - void *ctx = state; - $$ = new(ctx) ast_expression_statement(NULL); - $$->set_location(@1); - } - | expression ';' - { - void *ctx = state; - $$ = new(ctx) ast_expression_statement($1); - $$->set_location(@1); - } - ; - -selection_statement: - IF '(' expression ')' selection_rest_statement - { - $$ = new(state) ast_selection_statement($3, $5.then_statement, - $5.else_statement); - $$->set_location_range(@1, @5); - } - ; - -selection_rest_statement: - statement ELSE statement - { - $$.then_statement = $1; - $$.else_statement = $3; - } - | statement %prec THEN - { - $$.then_statement = $1; - $$.else_statement = NULL; - } - ; - -condition: - expression - { - $$ = (ast_node *) $1; - } - | fully_specified_type any_identifier '=' initializer - { - void *ctx = state; - ast_declaration *decl = new(ctx) ast_declaration($2, NULL, $4); - ast_declarator_list *declarator = new(ctx) ast_declarator_list($1); - decl->set_location_range(@2, @4); - declarator->set_location(@1); - - declarator->declarations.push_tail(&decl->link); - $$ = declarator; - } - ; - -/* - * switch_statement grammar is based on the syntax described in the body - * of the GLSL spec, not in it's appendix!!! - */ -switch_statement: - SWITCH '(' expression ')' switch_body - { - $$ = new(state) ast_switch_statement($3, $5); - $$->set_location_range(@1, @5); - } - ; - -switch_body: - '{' '}' - { - $$ = new(state) ast_switch_body(NULL); - $$->set_location_range(@1, @2); - } - | '{' case_statement_list '}' - { - $$ = new(state) ast_switch_body($2); - $$->set_location_range(@1, @3); - } - ; - -case_label: - CASE expression ':' - { - $$ = new(state) ast_case_label($2); - $$->set_location(@2); - } - | DEFAULT ':' - { - $$ = new(state) ast_case_label(NULL); - $$->set_location(@2); - } - ; - -case_label_list: - case_label - { - ast_case_label_list *labels = new(state) ast_case_label_list(); - - labels->labels.push_tail(& $1->link); - $$ = labels; - $$->set_location(@1); - } - | case_label_list case_label - { - $$ = $1; - $$->labels.push_tail(& $2->link); - } - ; - -case_statement: - case_label_list statement - { - ast_case_statement *stmts = new(state) ast_case_statement($1); - stmts->set_location(@2); - - stmts->stmts.push_tail(& $2->link); - $$ = stmts; - } - | case_statement statement - { - $$ = $1; - $$->stmts.push_tail(& $2->link); - } - ; - -case_statement_list: - case_statement - { - ast_case_statement_list *cases= new(state) ast_case_statement_list(); - cases->set_location(@1); - - cases->cases.push_tail(& $1->link); - $$ = cases; - } - | case_statement_list case_statement - { - $$ = $1; - $$->cases.push_tail(& $2->link); - } - ; - -iteration_statement: - WHILE '(' condition ')' statement_no_new_scope - { - void *ctx = state; - $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_while, - NULL, $3, NULL, $5); - $$->set_location_range(@1, @4); - } - | DO statement WHILE '(' expression ')' ';' - { - void *ctx = state; - $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_do_while, - NULL, $5, NULL, $2); - $$->set_location_range(@1, @6); - } - | FOR '(' for_init_statement for_rest_statement ')' statement_no_new_scope - { - void *ctx = state; - $$ = new(ctx) ast_iteration_statement(ast_iteration_statement::ast_for, - $3, $4.cond, $4.rest, $6); - $$->set_location_range(@1, @6); - } - ; - -for_init_statement: - expression_statement - | declaration_statement - ; - -conditionopt: - condition - | /* empty */ - { - $$ = NULL; - } - ; - -for_rest_statement: - conditionopt ';' - { - $$.cond = $1; - $$.rest = NULL; - } - | conditionopt ';' expression - { - $$.cond = $1; - $$.rest = $3; - } - ; - - // Grammar Note: No 'goto'. Gotos are not supported. -jump_statement: - CONTINUE ';' - { - void *ctx = state; - $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_continue, NULL); - $$->set_location(@1); - } - | BREAK ';' - { - void *ctx = state; - $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_break, NULL); - $$->set_location(@1); - } - | RETURN ';' - { - void *ctx = state; - $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, NULL); - $$->set_location(@1); - } - | RETURN expression ';' - { - void *ctx = state; - $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_return, $2); - $$->set_location_range(@1, @2); - } - | DISCARD ';' // Fragment shader only. - { - void *ctx = state; - $$ = new(ctx) ast_jump_statement(ast_jump_statement::ast_discard, NULL); - $$->set_location(@1); - } - ; - -external_declaration: - function_definition { $$ = $1; } - | declaration { $$ = $1; } - | pragma_statement { $$ = NULL; } - | layout_defaults { $$ = $1; } - ; - -function_definition: - function_prototype compound_statement_no_new_scope - { - void *ctx = state; - $$ = new(ctx) ast_function_definition(); - $$->set_location_range(@1, @2); - $$->prototype = $1; - $$->body = $2; - - state->symbols->pop_scope(); - } - ; - -/* layout_qualifieropt is packed into this rule */ -interface_block: - basic_interface_block - { - $$ = $1; - } - | layout_qualifier interface_block - { - ast_interface_block *block = (ast_interface_block *) $2; - - if (!state->has_420pack_or_es31() && block->layout.has_layout() && - !block->layout.is_default_qualifier) { - _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); - YYERROR; - } - - if (!block->layout.merge_qualifier(& @1, state, $1, false)) { - YYERROR; - } - - block->layout.is_default_qualifier = false; - - $$ = block; - } - | memory_qualifier interface_block - { - ast_interface_block *block = (ast_interface_block *)$2; - - if (!block->layout.flags.q.buffer) { - _mesa_glsl_error(& @1, state, - "memory qualifiers can only be used in the " - "declaration of shader storage blocks"); - } - if (!block->layout.merge_qualifier(& @1, state, $1, false)) { - YYERROR; - } - $$ = block; - } - ; - -basic_interface_block: - interface_qualifier NEW_IDENTIFIER '{' member_list '}' instance_name_opt ';' - { - ast_interface_block *const block = $6; - - block->block_name = $2; - block->declarations.push_degenerate_list_at_head(& $4->link); - - _mesa_ast_process_interface_block(& @1, state, block, $1); - - $$ = block; - } - | buffer_interface_qualifier NEW_IDENTIFIER '{' member_list '}' buffer_instance_name_opt ';' - { - ast_interface_block *const block = $6; - - block->block_name = $2; - block->declarations.push_degenerate_list_at_head(& $4->link); - - _mesa_ast_process_interface_block(& @1, state, block, $1); - - $$ = block; - } - ; - -interface_qualifier: - IN_TOK - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.in = 1; - } - | OUT_TOK - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.out = 1; - } - | UNIFORM - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.uniform = 1; - } - ; - -buffer_interface_qualifier: - BUFFER - { - memset(& $$, 0, sizeof($$)); - $$.flags.q.buffer = 1; - } - ; - -instance_name_opt: - /* empty */ - { - $$ = new(state) ast_interface_block(*state->default_uniform_qualifier, - NULL, NULL); - } - | NEW_IDENTIFIER - { - $$ = new(state) ast_interface_block(*state->default_uniform_qualifier, - $1, NULL); - $$->set_location(@1); - } - | NEW_IDENTIFIER array_specifier - { - $$ = new(state) ast_interface_block(*state->default_uniform_qualifier, - $1, $2); - $$->set_location_range(@1, @2); - } - ; - -buffer_instance_name_opt: - /* empty */ - { - $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier, - NULL, NULL); - } - | NEW_IDENTIFIER - { - $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier, - $1, NULL); - $$->set_location(@1); - } - | NEW_IDENTIFIER array_specifier - { - $$ = new(state) ast_interface_block(*state->default_shader_storage_qualifier, - $1, $2); - $$->set_location_range(@1, @2); - } - ; - -member_list: - member_declaration - { - $$ = $1; - $1->link.self_link(); - } - | member_declaration member_list - { - $$ = $1; - $2->link.insert_before(& $$->link); - } - ; - -member_declaration: - fully_specified_type struct_declarator_list ';' - { - void *ctx = state; - ast_fully_specified_type *type = $1; - type->set_location(@1); - - if (type->qualifier.flags.q.attribute) { - _mesa_glsl_error(& @1, state, - "keyword 'attribute' cannot be used with " - "interface block member"); - } else if (type->qualifier.flags.q.varying) { - _mesa_glsl_error(& @1, state, - "keyword 'varying' cannot be used with " - "interface block member"); - } - - $$ = new(ctx) ast_declarator_list(type); - $$->set_location(@2); - - $$->declarations.push_degenerate_list_at_head(& $2->link); - } - ; - -layout_uniform_defaults: - layout_qualifier layout_uniform_defaults - { - $$ = NULL; - if (!state->has_420pack_or_es31()) { - _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); - YYERROR; - } else { - if (!state->default_uniform_qualifier-> - merge_qualifier(& @1, state, $1, false)) { - YYERROR; - } - } - } - | layout_qualifier UNIFORM ';' - { - if (!state->default_uniform_qualifier-> - merge_qualifier(& @1, state, $1, false)) { - YYERROR; - } - $$ = NULL; - } - ; - -layout_buffer_defaults: - layout_qualifier layout_buffer_defaults - { - $$ = NULL; - if (!state->has_420pack_or_es31()) { - _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); - YYERROR; - } else { - if (!state->default_shader_storage_qualifier-> - merge_qualifier(& @1, state, $1, false)) { - YYERROR; - } - } - } - | layout_qualifier BUFFER ';' - { - if (!state->default_shader_storage_qualifier-> - merge_qualifier(& @1, state, $1, false)) { - YYERROR; - } - - /* From the GLSL 4.50 spec, section 4.4.5: - * - * "It is a compile-time error to specify the binding identifier for - * the global scope or for block member declarations." - */ - if (state->default_shader_storage_qualifier->flags.q.explicit_binding) { - _mesa_glsl_error(& @1, state, - "binding qualifier cannot be set for default layout"); - } - - $$ = NULL; - } - ; - -layout_in_defaults: - layout_qualifier layout_in_defaults - { - $$ = NULL; - if (!state->has_420pack_or_es31()) { - _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); - YYERROR; - } else { - if (!state->in_qualifier-> - merge_in_qualifier(& @1, state, $1, $$, false)) { - YYERROR; - } - } - } - | layout_qualifier IN_TOK ';' - { - $$ = NULL; - if (!state->in_qualifier-> - merge_in_qualifier(& @1, state, $1, $$, true)) { - YYERROR; - } - } - ; - -layout_out_defaults: - layout_qualifier layout_out_defaults - { - $$ = NULL; - if (!state->has_420pack_or_es31()) { - _mesa_glsl_error(&@1, state, "duplicate layout(...) qualifiers"); - YYERROR; - } else { - if (!state->out_qualifier-> - merge_out_qualifier(& @1, state, $1, $$, false)) { - YYERROR; - } - } - } - | layout_qualifier OUT_TOK ';' - { - $$ = NULL; - if (!state->out_qualifier-> - merge_out_qualifier(& @1, state, $1, $$, true)) - YYERROR; - } - ; - -layout_defaults: - layout_uniform_defaults - | layout_buffer_defaults - | layout_in_defaults - | layout_out_defaults - ; diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp deleted file mode 100644 index 603895497d1..00000000000 --- a/src/glsl/glsl_parser_extras.cpp +++ /dev/null @@ -1,1952 +0,0 @@ -/* - * Copyright © 2008, 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include -#include -#include - -#include "main/core.h" /* for struct gl_context */ -#include "main/context.h" -#include "main/shaderobj.h" -#include "util/u_atomic.h" /* for p_atomic_cmpxchg */ -#include "util/ralloc.h" -#include "ast.h" -#include "glsl_parser_extras.h" -#include "glsl_parser.h" -#include "ir_optimization.h" -#include "loop_analysis.h" - -/** - * Format a short human-readable description of the given GLSL version. - */ -const char * -glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version) -{ - return ralloc_asprintf(mem_ctx, "GLSL%s %d.%02d", is_es ? " ES" : "", - version / 100, version % 100); -} - - -static const unsigned known_desktop_glsl_versions[] = - { 110, 120, 130, 140, 150, 330, 400, 410, 420, 430, 440, 450 }; - - -_mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, - gl_shader_stage stage, - void *mem_ctx) - : ctx(_ctx), cs_input_local_size_specified(false), cs_input_local_size(), - switch_state() -{ - assert(stage < MESA_SHADER_STAGES); - this->stage = stage; - - this->scanner = NULL; - this->translation_unit.make_empty(); - this->symbols = new(mem_ctx) glsl_symbol_table; - - this->info_log = ralloc_strdup(mem_ctx, ""); - this->error = false; - this->loop_nesting_ast = NULL; - - this->struct_specifier_depth = 0; - - this->uses_builtin_functions = false; - - /* Set default language version and extensions */ - this->language_version = 110; - this->forced_language_version = ctx->Const.ForceGLSLVersion; - this->es_shader = false; - this->ARB_texture_rectangle_enable = true; - - /* OpenGL ES 2.0 has different defaults from desktop GL. */ - if (ctx->API == API_OPENGLES2) { - this->language_version = 100; - this->es_shader = true; - this->ARB_texture_rectangle_enable = false; - } - - this->extensions = &ctx->Extensions; - - this->Const.MaxLights = ctx->Const.MaxLights; - this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; - this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; - this->Const.MaxTextureCoords = ctx->Const.MaxTextureCoordUnits; - this->Const.MaxVertexAttribs = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs; - this->Const.MaxVertexUniformComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents; - this->Const.MaxVertexTextureImageUnits = ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits; - this->Const.MaxCombinedTextureImageUnits = ctx->Const.MaxCombinedTextureImageUnits; - this->Const.MaxTextureImageUnits = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; - this->Const.MaxFragmentUniformComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents; - this->Const.MinProgramTexelOffset = ctx->Const.MinProgramTexelOffset; - this->Const.MaxProgramTexelOffset = ctx->Const.MaxProgramTexelOffset; - - this->Const.MaxDrawBuffers = ctx->Const.MaxDrawBuffers; - - this->Const.MaxDualSourceDrawBuffers = ctx->Const.MaxDualSourceDrawBuffers; - - /* 1.50 constants */ - this->Const.MaxVertexOutputComponents = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; - this->Const.MaxGeometryInputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents; - this->Const.MaxGeometryOutputComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents; - this->Const.MaxFragmentInputComponents = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents; - this->Const.MaxGeometryTextureImageUnits = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits; - this->Const.MaxGeometryOutputVertices = ctx->Const.MaxGeometryOutputVertices; - this->Const.MaxGeometryTotalOutputComponents = ctx->Const.MaxGeometryTotalOutputComponents; - this->Const.MaxGeometryUniformComponents = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents; - - this->Const.MaxVertexAtomicCounters = ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicCounters; - this->Const.MaxTessControlAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicCounters; - this->Const.MaxTessEvaluationAtomicCounters = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicCounters; - this->Const.MaxGeometryAtomicCounters = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicCounters; - this->Const.MaxFragmentAtomicCounters = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicCounters; - this->Const.MaxCombinedAtomicCounters = ctx->Const.MaxCombinedAtomicCounters; - this->Const.MaxAtomicBufferBindings = ctx->Const.MaxAtomicBufferBindings; - this->Const.MaxVertexAtomicCounterBuffers = - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAtomicBuffers; - this->Const.MaxTessControlAtomicCounterBuffers = - ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxAtomicBuffers; - this->Const.MaxTessEvaluationAtomicCounterBuffers = - ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxAtomicBuffers; - this->Const.MaxGeometryAtomicCounterBuffers = - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxAtomicBuffers; - this->Const.MaxFragmentAtomicCounterBuffers = - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxAtomicBuffers; - this->Const.MaxCombinedAtomicCounterBuffers = - ctx->Const.MaxCombinedAtomicBuffers; - this->Const.MaxAtomicCounterBufferSize = - ctx->Const.MaxAtomicBufferSize; - - /* Compute shader constants */ - for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupCount); i++) - this->Const.MaxComputeWorkGroupCount[i] = ctx->Const.MaxComputeWorkGroupCount[i]; - for (unsigned i = 0; i < ARRAY_SIZE(this->Const.MaxComputeWorkGroupSize); i++) - this->Const.MaxComputeWorkGroupSize[i] = ctx->Const.MaxComputeWorkGroupSize[i]; - - this->Const.MaxImageUnits = ctx->Const.MaxImageUnits; - this->Const.MaxCombinedShaderOutputResources = ctx->Const.MaxCombinedShaderOutputResources; - this->Const.MaxImageSamples = ctx->Const.MaxImageSamples; - this->Const.MaxVertexImageUniforms = ctx->Const.Program[MESA_SHADER_VERTEX].MaxImageUniforms; - this->Const.MaxTessControlImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxImageUniforms; - this->Const.MaxTessEvaluationImageUniforms = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxImageUniforms; - this->Const.MaxGeometryImageUniforms = ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxImageUniforms; - this->Const.MaxFragmentImageUniforms = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxImageUniforms; - this->Const.MaxCombinedImageUniforms = ctx->Const.MaxCombinedImageUniforms; - - /* ARB_viewport_array */ - this->Const.MaxViewports = ctx->Const.MaxViewports; - - /* tessellation shader constants */ - this->Const.MaxPatchVertices = ctx->Const.MaxPatchVertices; - this->Const.MaxTessGenLevel = ctx->Const.MaxTessGenLevel; - this->Const.MaxTessControlInputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxInputComponents; - this->Const.MaxTessControlOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxOutputComponents; - this->Const.MaxTessControlTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxTextureImageUnits; - this->Const.MaxTessEvaluationInputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxInputComponents; - this->Const.MaxTessEvaluationOutputComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxOutputComponents; - this->Const.MaxTessEvaluationTextureImageUnits = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxTextureImageUnits; - this->Const.MaxTessPatchComponents = ctx->Const.MaxTessPatchComponents; - this->Const.MaxTessControlTotalOutputComponents = ctx->Const.MaxTessControlTotalOutputComponents; - this->Const.MaxTessControlUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_CTRL].MaxUniformComponents; - this->Const.MaxTessEvaluationUniformComponents = ctx->Const.Program[MESA_SHADER_TESS_EVAL].MaxUniformComponents; - - this->current_function = NULL; - this->toplevel_ir = NULL; - this->found_return = false; - this->all_invariant = false; - this->user_structures = NULL; - this->num_user_structures = 0; - this->num_subroutines = 0; - this->subroutines = NULL; - this->num_subroutine_types = 0; - this->subroutine_types = NULL; - - /* supported_versions should be large enough to support the known desktop - * GLSL versions plus 3 GLES versions (ES 1.00, ES 3.00, and ES 3.10)) - */ - STATIC_ASSERT((ARRAY_SIZE(known_desktop_glsl_versions) + 3) == - ARRAY_SIZE(this->supported_versions)); - - /* Populate the list of supported GLSL versions */ - /* FINISHME: Once the OpenGL 3.0 'forward compatible' context or - * the OpenGL 3.2 Core context is supported, this logic will need - * change. Older versions of GLSL are no longer supported - * outside the compatibility contexts of 3.x. - */ - this->num_supported_versions = 0; - if (_mesa_is_desktop_gl(ctx)) { - for (unsigned i = 0; i < ARRAY_SIZE(known_desktop_glsl_versions); i++) { - if (known_desktop_glsl_versions[i] <= ctx->Const.GLSLVersion) { - this->supported_versions[this->num_supported_versions].ver - = known_desktop_glsl_versions[i]; - this->supported_versions[this->num_supported_versions].es = false; - this->num_supported_versions++; - } - } - } - if (ctx->API == API_OPENGLES2 || ctx->Extensions.ARB_ES2_compatibility) { - this->supported_versions[this->num_supported_versions].ver = 100; - this->supported_versions[this->num_supported_versions].es = true; - this->num_supported_versions++; - } - if (_mesa_is_gles3(ctx) || ctx->Extensions.ARB_ES3_compatibility) { - this->supported_versions[this->num_supported_versions].ver = 300; - this->supported_versions[this->num_supported_versions].es = true; - this->num_supported_versions++; - } - if (_mesa_is_gles31(ctx)) { - this->supported_versions[this->num_supported_versions].ver = 310; - this->supported_versions[this->num_supported_versions].es = true; - this->num_supported_versions++; - } - - /* Create a string for use in error messages to tell the user which GLSL - * versions are supported. - */ - char *supported = ralloc_strdup(this, ""); - for (unsigned i = 0; i < this->num_supported_versions; i++) { - unsigned ver = this->supported_versions[i].ver; - const char *const prefix = (i == 0) - ? "" - : ((i == this->num_supported_versions - 1) ? ", and " : ", "); - const char *const suffix = (this->supported_versions[i].es) ? " ES" : ""; - - ralloc_asprintf_append(& supported, "%s%u.%02u%s", - prefix, - ver / 100, ver % 100, - suffix); - } - - this->supported_version_string = supported; - - if (ctx->Const.ForceGLSLExtensionsWarn) - _mesa_glsl_process_extension("all", NULL, "warn", NULL, this); - - this->default_uniform_qualifier = new(this) ast_type_qualifier(); - this->default_uniform_qualifier->flags.q.shared = 1; - this->default_uniform_qualifier->flags.q.column_major = 1; - this->default_uniform_qualifier->is_default_qualifier = true; - - this->default_shader_storage_qualifier = new(this) ast_type_qualifier(); - this->default_shader_storage_qualifier->flags.q.shared = 1; - this->default_shader_storage_qualifier->flags.q.column_major = 1; - this->default_shader_storage_qualifier->is_default_qualifier = true; - - this->fs_uses_gl_fragcoord = false; - this->fs_redeclares_gl_fragcoord = false; - this->fs_origin_upper_left = false; - this->fs_pixel_center_integer = false; - this->fs_redeclares_gl_fragcoord_with_no_layout_qualifiers = false; - - this->gs_input_prim_type_specified = false; - this->tcs_output_vertices_specified = false; - this->gs_input_size = 0; - this->in_qualifier = new(this) ast_type_qualifier(); - this->out_qualifier = new(this) ast_type_qualifier(); - this->fs_early_fragment_tests = false; - memset(this->atomic_counter_offsets, 0, - sizeof(this->atomic_counter_offsets)); - this->allow_extension_directive_midshader = - ctx->Const.AllowGLSLExtensionDirectiveMidShader; -} - -/** - * Determine whether the current GLSL version is sufficiently high to support - * a certain feature, and generate an error message if it isn't. - * - * \param required_glsl_version and \c required_glsl_es_version are - * interpreted as they are in _mesa_glsl_parse_state::is_version(). - * - * \param locp is the parser location where the error should be reported. - * - * \param fmt (and additional arguments) constitute a printf-style error - * message to report if the version check fails. Information about the - * current and required GLSL versions will be appended. So, for example, if - * the GLSL version being compiled is 1.20, and check_version(130, 300, locp, - * "foo unsupported") is called, the error message will be "foo unsupported in - * GLSL 1.20 (GLSL 1.30 or GLSL 3.00 ES required)". - */ -bool -_mesa_glsl_parse_state::check_version(unsigned required_glsl_version, - unsigned required_glsl_es_version, - YYLTYPE *locp, const char *fmt, ...) -{ - if (this->is_version(required_glsl_version, required_glsl_es_version)) - return true; - - va_list args; - va_start(args, fmt); - char *problem = ralloc_vasprintf(this, fmt, args); - va_end(args); - const char *glsl_version_string - = glsl_compute_version_string(this, false, required_glsl_version); - const char *glsl_es_version_string - = glsl_compute_version_string(this, true, required_glsl_es_version); - const char *requirement_string = ""; - if (required_glsl_version && required_glsl_es_version) { - requirement_string = ralloc_asprintf(this, " (%s or %s required)", - glsl_version_string, - glsl_es_version_string); - } else if (required_glsl_version) { - requirement_string = ralloc_asprintf(this, " (%s required)", - glsl_version_string); - } else if (required_glsl_es_version) { - requirement_string = ralloc_asprintf(this, " (%s required)", - glsl_es_version_string); - } - _mesa_glsl_error(locp, this, "%s in %s%s", - problem, this->get_version_string(), - requirement_string); - - return false; -} - -/** - * Process a GLSL #version directive. - * - * \param version is the integer that follows the #version token. - * - * \param ident is a string identifier that follows the integer, if any is - * present. Otherwise NULL. - */ -void -_mesa_glsl_parse_state::process_version_directive(YYLTYPE *locp, int version, - const char *ident) -{ - bool es_token_present = false; - if (ident) { - if (strcmp(ident, "es") == 0) { - es_token_present = true; - } else if (version >= 150) { - if (strcmp(ident, "core") == 0) { - /* Accept the token. There's no need to record that this is - * a core profile shader since that's the only profile we support. - */ - } else if (strcmp(ident, "compatibility") == 0) { - _mesa_glsl_error(locp, this, - "the compatibility profile is not supported"); - } else { - _mesa_glsl_error(locp, this, - "\"%s\" is not a valid shading language profile; " - "if present, it must be \"core\"", ident); - } - } else { - _mesa_glsl_error(locp, this, - "illegal text following version number"); - } - } - - this->es_shader = es_token_present; - if (version == 100) { - if (es_token_present) { - _mesa_glsl_error(locp, this, - "GLSL 1.00 ES should be selected using " - "`#version 100'"); - } else { - this->es_shader = true; - } - } - - if (this->es_shader) { - this->ARB_texture_rectangle_enable = false; - } - - if (this->forced_language_version) - this->language_version = this->forced_language_version; - else - this->language_version = version; - - bool supported = false; - for (unsigned i = 0; i < this->num_supported_versions; i++) { - if (this->supported_versions[i].ver == this->language_version - && this->supported_versions[i].es == this->es_shader) { - supported = true; - break; - } - } - - if (!supported) { - _mesa_glsl_error(locp, this, "%s is not supported. " - "Supported versions are: %s", - this->get_version_string(), - this->supported_version_string); - - /* On exit, the language_version must be set to a valid value. - * Later calls to _mesa_glsl_initialize_types will misbehave if - * the version is invalid. - */ - switch (this->ctx->API) { - case API_OPENGL_COMPAT: - case API_OPENGL_CORE: - this->language_version = this->ctx->Const.GLSLVersion; - break; - - case API_OPENGLES: - assert(!"Should not get here."); - /* FALLTHROUGH */ - - case API_OPENGLES2: - this->language_version = 100; - break; - } - } -} - - -/* This helper function will append the given message to the shader's - info log and report it via GL_ARB_debug_output. Per that extension, - 'type' is one of the enum values classifying the message, and - 'id' is the implementation-defined ID of the given message. */ -static void -_mesa_glsl_msg(const YYLTYPE *locp, _mesa_glsl_parse_state *state, - GLenum type, const char *fmt, va_list ap) -{ - bool error = (type == MESA_DEBUG_TYPE_ERROR); - GLuint msg_id = 0; - - assert(state->info_log != NULL); - - /* Get the offset that the new message will be written to. */ - int msg_offset = strlen(state->info_log); - - ralloc_asprintf_append(&state->info_log, "%u:%u(%u): %s: ", - locp->source, - locp->first_line, - locp->first_column, - error ? "error" : "warning"); - ralloc_vasprintf_append(&state->info_log, fmt, ap); - - const char *const msg = &state->info_log[msg_offset]; - struct gl_context *ctx = state->ctx; - - /* Report the error via GL_ARB_debug_output. */ - _mesa_shader_debug(ctx, type, &msg_id, msg); - - ralloc_strcat(&state->info_log, "\n"); -} - -void -_mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state, - const char *fmt, ...) -{ - va_list ap; - - state->error = true; - - va_start(ap, fmt); - _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_ERROR, fmt, ap); - va_end(ap); -} - - -void -_mesa_glsl_warning(const YYLTYPE *locp, _mesa_glsl_parse_state *state, - const char *fmt, ...) -{ - va_list ap; - - va_start(ap, fmt); - _mesa_glsl_msg(locp, state, MESA_DEBUG_TYPE_OTHER, fmt, ap); - va_end(ap); -} - - -/** - * Enum representing the possible behaviors that can be specified in - * an #extension directive. - */ -enum ext_behavior { - extension_disable, - extension_enable, - extension_require, - extension_warn -}; - -/** - * Element type for _mesa_glsl_supported_extensions - */ -struct _mesa_glsl_extension { - /** - * Name of the extension when referred to in a GLSL extension - * statement - */ - const char *name; - - /** True if this extension is available to desktop GL shaders */ - bool avail_in_GL; - - /** True if this extension is available to GLES shaders */ - bool avail_in_ES; - - /** - * Flag in the gl_extensions struct indicating whether this - * extension is supported by the driver, or - * &gl_extensions::dummy_true if supported by all drivers. - * - * Note: the type (GLboolean gl_extensions::*) is a "pointer to - * member" type, the type-safe alternative to the "offsetof" macro. - * In a nutshell: - * - * - foo bar::* p declares p to be an "offset" to a field of type - * foo that exists within struct bar - * - &bar::baz computes the "offset" of field baz within struct bar - * - x.*p accesses the field of x that exists at "offset" p - * - x->*p is equivalent to (*x).*p - */ - const GLboolean gl_extensions::* supported_flag; - - /** - * Flag in the _mesa_glsl_parse_state struct that should be set - * when this extension is enabled. - * - * See note in _mesa_glsl_extension::supported_flag about "pointer - * to member" types. - */ - bool _mesa_glsl_parse_state::* enable_flag; - - /** - * Flag in the _mesa_glsl_parse_state struct that should be set - * when the shader requests "warn" behavior for this extension. - * - * See note in _mesa_glsl_extension::supported_flag about "pointer - * to member" types. - */ - bool _mesa_glsl_parse_state::* warn_flag; - - - bool compatible_with_state(const _mesa_glsl_parse_state *state) const; - void set_flags(_mesa_glsl_parse_state *state, ext_behavior behavior) const; -}; - -#define EXT(NAME, GL, ES, SUPPORTED_FLAG) \ - { "GL_" #NAME, GL, ES, &gl_extensions::SUPPORTED_FLAG, \ - &_mesa_glsl_parse_state::NAME##_enable, \ - &_mesa_glsl_parse_state::NAME##_warn } - -/** - * Table of extensions that can be enabled/disabled within a shader, - * and the conditions under which they are supported. - */ -static const _mesa_glsl_extension _mesa_glsl_supported_extensions[] = { - /* API availability */ - /* name GL ES supported flag */ - - /* ARB extensions go here, sorted alphabetically. - */ - EXT(ARB_arrays_of_arrays, true, false, ARB_arrays_of_arrays), - EXT(ARB_compute_shader, true, false, ARB_compute_shader), - EXT(ARB_conservative_depth, true, false, ARB_conservative_depth), - EXT(ARB_derivative_control, true, false, ARB_derivative_control), - EXT(ARB_draw_buffers, true, false, dummy_true), - EXT(ARB_draw_instanced, true, false, ARB_draw_instanced), - EXT(ARB_enhanced_layouts, true, false, ARB_enhanced_layouts), - EXT(ARB_explicit_attrib_location, true, false, ARB_explicit_attrib_location), - EXT(ARB_explicit_uniform_location, true, false, ARB_explicit_uniform_location), - EXT(ARB_fragment_coord_conventions, true, false, ARB_fragment_coord_conventions), - EXT(ARB_fragment_layer_viewport, true, false, ARB_fragment_layer_viewport), - EXT(ARB_gpu_shader5, true, false, ARB_gpu_shader5), - EXT(ARB_gpu_shader_fp64, true, false, ARB_gpu_shader_fp64), - EXT(ARB_sample_shading, true, false, ARB_sample_shading), - EXT(ARB_separate_shader_objects, true, false, dummy_true), - EXT(ARB_shader_atomic_counters, true, false, ARB_shader_atomic_counters), - EXT(ARB_shader_bit_encoding, true, false, ARB_shader_bit_encoding), - EXT(ARB_shader_clock, true, false, ARB_shader_clock), - EXT(ARB_shader_draw_parameters, true, false, ARB_shader_draw_parameters), - EXT(ARB_shader_image_load_store, true, false, ARB_shader_image_load_store), - EXT(ARB_shader_image_size, true, false, ARB_shader_image_size), - EXT(ARB_shader_precision, true, false, ARB_shader_precision), - EXT(ARB_shader_stencil_export, true, false, ARB_shader_stencil_export), - EXT(ARB_shader_storage_buffer_object, true, true, ARB_shader_storage_buffer_object), - EXT(ARB_shader_subroutine, true, false, ARB_shader_subroutine), - EXT(ARB_shader_texture_image_samples, true, false, ARB_shader_texture_image_samples), - EXT(ARB_shader_texture_lod, true, false, ARB_shader_texture_lod), - EXT(ARB_shading_language_420pack, true, false, ARB_shading_language_420pack), - EXT(ARB_shading_language_packing, true, false, ARB_shading_language_packing), - EXT(ARB_tessellation_shader, true, false, ARB_tessellation_shader), - EXT(ARB_texture_cube_map_array, true, false, ARB_texture_cube_map_array), - EXT(ARB_texture_gather, true, false, ARB_texture_gather), - EXT(ARB_texture_multisample, true, false, ARB_texture_multisample), - EXT(ARB_texture_query_levels, true, false, ARB_texture_query_levels), - EXT(ARB_texture_query_lod, true, false, ARB_texture_query_lod), - EXT(ARB_texture_rectangle, true, false, dummy_true), - EXT(ARB_uniform_buffer_object, true, false, ARB_uniform_buffer_object), - EXT(ARB_vertex_attrib_64bit, true, false, ARB_vertex_attrib_64bit), - EXT(ARB_viewport_array, true, false, ARB_viewport_array), - - /* KHR extensions go here, sorted alphabetically. - */ - - /* OES extensions go here, sorted alphabetically. - */ - EXT(OES_EGL_image_external, false, true, OES_EGL_image_external), - EXT(OES_geometry_shader, false, true, OES_geometry_shader), - EXT(OES_standard_derivatives, false, true, OES_standard_derivatives), - EXT(OES_texture_3D, false, true, dummy_true), - EXT(OES_texture_storage_multisample_2d_array, false, true, ARB_texture_multisample), - - /* All other extensions go here, sorted alphabetically. - */ - EXT(AMD_conservative_depth, true, false, ARB_conservative_depth), - EXT(AMD_shader_stencil_export, true, false, ARB_shader_stencil_export), - EXT(AMD_shader_trinary_minmax, true, false, dummy_true), - EXT(AMD_vertex_shader_layer, true, false, AMD_vertex_shader_layer), - EXT(AMD_vertex_shader_viewport_index, true, false, AMD_vertex_shader_viewport_index), - EXT(EXT_blend_func_extended, false, true, ARB_blend_func_extended), - EXT(EXT_draw_buffers, false, true, dummy_true), - EXT(EXT_separate_shader_objects, false, true, dummy_true), - EXT(EXT_shader_integer_mix, true, true, EXT_shader_integer_mix), - EXT(EXT_shader_samples_identical, true, true, EXT_shader_samples_identical), - EXT(EXT_texture_array, true, false, EXT_texture_array), -}; - -#undef EXT - - -/** - * Determine whether a given extension is compatible with the target, - * API, and extension information in the current parser state. - */ -bool _mesa_glsl_extension::compatible_with_state(const _mesa_glsl_parse_state * - state) const -{ - /* Check that this extension matches whether we are compiling - * for desktop GL or GLES. - */ - if (state->es_shader) { - if (!this->avail_in_ES) return false; - } else { - if (!this->avail_in_GL) return false; - } - - /* Check that this extension is supported by the OpenGL - * implementation. - * - * Note: the ->* operator indexes into state->extensions by the - * offset this->supported_flag. See - * _mesa_glsl_extension::supported_flag for more info. - */ - return state->extensions->*(this->supported_flag); -} - -/** - * Set the appropriate flags in the parser state to establish the - * given behavior for this extension. - */ -void _mesa_glsl_extension::set_flags(_mesa_glsl_parse_state *state, - ext_behavior behavior) const -{ - /* Note: the ->* operator indexes into state by the - * offsets this->enable_flag and this->warn_flag. See - * _mesa_glsl_extension::supported_flag for more info. - */ - state->*(this->enable_flag) = (behavior != extension_disable); - state->*(this->warn_flag) = (behavior == extension_warn); -} - -/** - * Find an extension by name in _mesa_glsl_supported_extensions. If - * the name is not found, return NULL. - */ -static const _mesa_glsl_extension *find_extension(const char *name) -{ - for (unsigned i = 0; i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) { - if (strcmp(name, _mesa_glsl_supported_extensions[i].name) == 0) { - return &_mesa_glsl_supported_extensions[i]; - } - } - return NULL; -} - - -bool -_mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp, - const char *behavior_string, YYLTYPE *behavior_locp, - _mesa_glsl_parse_state *state) -{ - ext_behavior behavior; - if (strcmp(behavior_string, "warn") == 0) { - behavior = extension_warn; - } else if (strcmp(behavior_string, "require") == 0) { - behavior = extension_require; - } else if (strcmp(behavior_string, "enable") == 0) { - behavior = extension_enable; - } else if (strcmp(behavior_string, "disable") == 0) { - behavior = extension_disable; - } else { - _mesa_glsl_error(behavior_locp, state, - "unknown extension behavior `%s'", - behavior_string); - return false; - } - - if (strcmp(name, "all") == 0) { - if ((behavior == extension_enable) || (behavior == extension_require)) { - _mesa_glsl_error(name_locp, state, "cannot %s all extensions", - (behavior == extension_enable) - ? "enable" : "require"); - return false; - } else { - for (unsigned i = 0; - i < ARRAY_SIZE(_mesa_glsl_supported_extensions); ++i) { - const _mesa_glsl_extension *extension - = &_mesa_glsl_supported_extensions[i]; - if (extension->compatible_with_state(state)) { - _mesa_glsl_supported_extensions[i].set_flags(state, behavior); - } - } - } - } else { - const _mesa_glsl_extension *extension = find_extension(name); - if (extension && extension->compatible_with_state(state)) { - extension->set_flags(state, behavior); - } else { - static const char fmt[] = "extension `%s' unsupported in %s shader"; - - if (behavior == extension_require) { - _mesa_glsl_error(name_locp, state, fmt, - name, _mesa_shader_stage_to_string(state->stage)); - return false; - } else { - _mesa_glsl_warning(name_locp, state, fmt, - name, _mesa_shader_stage_to_string(state->stage)); - } - } - } - - return true; -} - - -/** - * Recurses through and if is an aggregate initializer - * and sets 's field to . Gives later functions - * (process_array_constructor, et al) sufficient information to do type - * checking. - * - * Operates on assignments involving an aggregate initializer. E.g., - * - * vec4 pos = {1.0, -1.0, 0.0, 1.0}; - * - * or more ridiculously, - * - * struct S { - * vec4 v[2]; - * }; - * - * struct { - * S a[2], b; - * int c; - * } aggregate = { - * { - * { - * { - * {1.0, 2.0, 3.0, 4.0}, // a[0].v[0] - * {5.0, 6.0, 7.0, 8.0} // a[0].v[1] - * } // a[0].v - * }, // a[0] - * { - * { - * {1.0, 2.0, 3.0, 4.0}, // a[1].v[0] - * {5.0, 6.0, 7.0, 8.0} // a[1].v[1] - * } // a[1].v - * } // a[1] - * }, // a - * { - * { - * {1.0, 2.0, 3.0, 4.0}, // b.v[0] - * {5.0, 6.0, 7.0, 8.0} // b.v[1] - * } // b.v - * }, // b - * 4 // c - * }; - * - * This pass is necessary because the right-hand side of e = { ... } - * doesn't contain sufficient information to determine if the types match. - */ -void -_mesa_ast_set_aggregate_type(const glsl_type *type, - ast_expression *expr) -{ - ast_aggregate_initializer *ai = (ast_aggregate_initializer *)expr; - ai->constructor_type = type; - - /* If the aggregate is an array, recursively set its elements' types. */ - if (type->is_array()) { - /* Each array element has the type type->fields.array. - * - * E.g., if if struct S[2] we want to set each element's type to - * struct S. - */ - for (exec_node *expr_node = ai->expressions.head; - !expr_node->is_tail_sentinel(); - expr_node = expr_node->next) { - ast_expression *expr = exec_node_data(ast_expression, expr_node, - link); - - if (expr->oper == ast_aggregate) - _mesa_ast_set_aggregate_type(type->fields.array, expr); - } - - /* If the aggregate is a struct, recursively set its fields' types. */ - } else if (type->is_record()) { - exec_node *expr_node = ai->expressions.head; - - /* Iterate through the struct's fields. */ - for (unsigned i = 0; !expr_node->is_tail_sentinel() && i < type->length; - i++, expr_node = expr_node->next) { - ast_expression *expr = exec_node_data(ast_expression, expr_node, - link); - - if (expr->oper == ast_aggregate) { - _mesa_ast_set_aggregate_type(type->fields.structure[i].type, expr); - } - } - /* If the aggregate is a matrix, set its columns' types. */ - } else if (type->is_matrix()) { - for (exec_node *expr_node = ai->expressions.head; - !expr_node->is_tail_sentinel(); - expr_node = expr_node->next) { - ast_expression *expr = exec_node_data(ast_expression, expr_node, - link); - - if (expr->oper == ast_aggregate) - _mesa_ast_set_aggregate_type(type->column_type(), expr); - } - } -} - -void -_mesa_ast_process_interface_block(YYLTYPE *locp, - _mesa_glsl_parse_state *state, - ast_interface_block *const block, - const struct ast_type_qualifier &q) -{ - if (q.flags.q.buffer) { - if (!state->has_shader_storage_buffer_objects()) { - _mesa_glsl_error(locp, state, - "#version 430 / GL_ARB_shader_storage_buffer_object " - "required for defining shader storage blocks"); - } else if (state->ARB_shader_storage_buffer_object_warn) { - _mesa_glsl_warning(locp, state, - "#version 430 / GL_ARB_shader_storage_buffer_object " - "required for defining shader storage blocks"); - } - } else if (q.flags.q.uniform) { - if (!state->has_uniform_buffer_objects()) { - _mesa_glsl_error(locp, state, - "#version 140 / GL_ARB_uniform_buffer_object " - "required for defining uniform blocks"); - } else if (state->ARB_uniform_buffer_object_warn) { - _mesa_glsl_warning(locp, state, - "#version 140 / GL_ARB_uniform_buffer_object " - "required for defining uniform blocks"); - } - } else { - if (state->es_shader || state->language_version < 150) { - _mesa_glsl_error(locp, state, - "#version 150 required for using " - "interface blocks"); - } - } - - /* From the GLSL 1.50.11 spec, section 4.3.7 ("Interface Blocks"): - * "It is illegal to have an input block in a vertex shader - * or an output block in a fragment shader" - */ - if ((state->stage == MESA_SHADER_VERTEX) && q.flags.q.in) { - _mesa_glsl_error(locp, state, - "`in' interface block is not allowed for " - "a vertex shader"); - } else if ((state->stage == MESA_SHADER_FRAGMENT) && q.flags.q.out) { - _mesa_glsl_error(locp, state, - "`out' interface block is not allowed for " - "a fragment shader"); - } - - /* Since block arrays require names, and both features are added in - * the same language versions, we don't have to explicitly - * version-check both things. - */ - if (block->instance_name != NULL) { - state->check_version(150, 300, locp, "interface blocks with " - "an instance name are not allowed"); - } - - uint64_t interface_type_mask; - struct ast_type_qualifier temp_type_qualifier; - - /* Get a bitmask containing only the in/out/uniform/buffer - * flags, allowing us to ignore other irrelevant flags like - * interpolation qualifiers. - */ - temp_type_qualifier.flags.i = 0; - temp_type_qualifier.flags.q.uniform = true; - temp_type_qualifier.flags.q.in = true; - temp_type_qualifier.flags.q.out = true; - temp_type_qualifier.flags.q.buffer = true; - interface_type_mask = temp_type_qualifier.flags.i; - - /* Get the block's interface qualifier. The interface_qualifier - * production rule guarantees that only one bit will be set (and - * it will be in/out/uniform). - */ - uint64_t block_interface_qualifier = q.flags.i; - - block->layout.flags.i |= block_interface_qualifier; - - if (state->stage == MESA_SHADER_GEOMETRY && - state->has_explicit_attrib_stream()) { - /* Assign global layout's stream value. */ - block->layout.flags.q.stream = 1; - block->layout.flags.q.explicit_stream = 0; - block->layout.stream = state->out_qualifier->stream; - } - - foreach_list_typed (ast_declarator_list, member, link, &block->declarations) { - ast_type_qualifier& qualifier = member->type->qualifier; - if ((qualifier.flags.i & interface_type_mask) == 0) { - /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks): - * "If no optional qualifier is used in a member declaration, the - * qualifier of the variable is just in, out, or uniform as declared - * by interface-qualifier." - */ - qualifier.flags.i |= block_interface_qualifier; - } else if ((qualifier.flags.i & interface_type_mask) != - block_interface_qualifier) { - /* GLSLangSpec.1.50.11, 4.3.7 (Interface Blocks): - * "If optional qualifiers are used, they can include interpolation - * and storage qualifiers and they must declare an input, output, - * or uniform variable consistent with the interface qualifier of - * the block." - */ - _mesa_glsl_error(locp, state, - "uniform/in/out qualifier on " - "interface block member does not match " - "the interface block"); - } - - /* From GLSL ES 3.0, chapter 4.3.7 "Interface Blocks": - * - * "GLSL ES 3.0 does not support interface blocks for shader inputs or - * outputs." - * - * And from GLSL ES 3.0, chapter 4.6.1 "The invariant qualifier":. - * - * "Only variables output from a shader can be candidates for - * invariance." - * - * From GLSL 4.40 and GLSL 1.50, section "Interface Blocks": - * - * "If optional qualifiers are used, they can include interpolation - * qualifiers, auxiliary storage qualifiers, and storage qualifiers - * and they must declare an input, output, or uniform member - * consistent with the interface qualifier of the block" - */ - if (qualifier.flags.q.invariant) - _mesa_glsl_error(locp, state, - "invariant qualifiers cannot be used " - "with interface blocks members"); - } -} - -void -_mesa_ast_type_qualifier_print(const struct ast_type_qualifier *q) -{ - if (q->flags.q.subroutine) - printf("subroutine "); - - if (q->flags.q.subroutine_def) { - printf("subroutine ("); - q->subroutine_list->print(); - printf(")"); - } - - if (q->flags.q.constant) - printf("const "); - - if (q->flags.q.invariant) - printf("invariant "); - - if (q->flags.q.attribute) - printf("attribute "); - - if (q->flags.q.varying) - printf("varying "); - - if (q->flags.q.in && q->flags.q.out) - printf("inout "); - else { - if (q->flags.q.in) - printf("in "); - - if (q->flags.q.out) - printf("out "); - } - - if (q->flags.q.centroid) - printf("centroid "); - if (q->flags.q.sample) - printf("sample "); - if (q->flags.q.patch) - printf("patch "); - if (q->flags.q.uniform) - printf("uniform "); - if (q->flags.q.buffer) - printf("buffer "); - if (q->flags.q.smooth) - printf("smooth "); - if (q->flags.q.flat) - printf("flat "); - if (q->flags.q.noperspective) - printf("noperspective "); -} - - -void -ast_node::print(void) const -{ - printf("unhandled node "); -} - - -ast_node::ast_node(void) -{ - this->location.source = 0; - this->location.first_line = 0; - this->location.first_column = 0; - this->location.last_line = 0; - this->location.last_column = 0; -} - - -static void -ast_opt_array_dimensions_print(const ast_array_specifier *array_specifier) -{ - if (array_specifier) - array_specifier->print(); -} - - -void -ast_compound_statement::print(void) const -{ - printf("{\n"); - - foreach_list_typed(ast_node, ast, link, &this->statements) { - ast->print(); - } - - printf("}\n"); -} - - -ast_compound_statement::ast_compound_statement(int new_scope, - ast_node *statements) -{ - this->new_scope = new_scope; - - if (statements != NULL) { - this->statements.push_degenerate_list_at_head(&statements->link); - } -} - - -void -ast_expression::print(void) const -{ - switch (oper) { - case ast_assign: - case ast_mul_assign: - case ast_div_assign: - case ast_mod_assign: - case ast_add_assign: - case ast_sub_assign: - case ast_ls_assign: - case ast_rs_assign: - case ast_and_assign: - case ast_xor_assign: - case ast_or_assign: - subexpressions[0]->print(); - printf("%s ", operator_string(oper)); - subexpressions[1]->print(); - break; - - case ast_field_selection: - subexpressions[0]->print(); - printf(". %s ", primary_expression.identifier); - break; - - case ast_plus: - case ast_neg: - case ast_bit_not: - case ast_logic_not: - case ast_pre_inc: - case ast_pre_dec: - printf("%s ", operator_string(oper)); - subexpressions[0]->print(); - break; - - case ast_post_inc: - case ast_post_dec: - subexpressions[0]->print(); - printf("%s ", operator_string(oper)); - break; - - case ast_conditional: - subexpressions[0]->print(); - printf("? "); - subexpressions[1]->print(); - printf(": "); - subexpressions[2]->print(); - break; - - case ast_array_index: - subexpressions[0]->print(); - printf("[ "); - subexpressions[1]->print(); - printf("] "); - break; - - case ast_function_call: { - subexpressions[0]->print(); - printf("( "); - - foreach_list_typed (ast_node, ast, link, &this->expressions) { - if (&ast->link != this->expressions.get_head()) - printf(", "); - - ast->print(); - } - - printf(") "); - break; - } - - case ast_identifier: - printf("%s ", primary_expression.identifier); - break; - - case ast_int_constant: - printf("%d ", primary_expression.int_constant); - break; - - case ast_uint_constant: - printf("%u ", primary_expression.uint_constant); - break; - - case ast_float_constant: - printf("%f ", primary_expression.float_constant); - break; - - case ast_double_constant: - printf("%f ", primary_expression.double_constant); - break; - - case ast_bool_constant: - printf("%s ", - primary_expression.bool_constant - ? "true" : "false"); - break; - - case ast_sequence: { - printf("( "); - foreach_list_typed (ast_node, ast, link, & this->expressions) { - if (&ast->link != this->expressions.get_head()) - printf(", "); - - ast->print(); - } - printf(") "); - break; - } - - case ast_aggregate: { - printf("{ "); - foreach_list_typed (ast_node, ast, link, & this->expressions) { - if (&ast->link != this->expressions.get_head()) - printf(", "); - - ast->print(); - } - printf("} "); - break; - } - - default: - assert(0); - break; - } -} - -ast_expression::ast_expression(int oper, - ast_expression *ex0, - ast_expression *ex1, - ast_expression *ex2) : - primary_expression() -{ - this->oper = ast_operators(oper); - this->subexpressions[0] = ex0; - this->subexpressions[1] = ex1; - this->subexpressions[2] = ex2; - this->non_lvalue_description = NULL; -} - - -void -ast_expression_statement::print(void) const -{ - if (expression) - expression->print(); - - printf("; "); -} - - -ast_expression_statement::ast_expression_statement(ast_expression *ex) : - expression(ex) -{ - /* empty */ -} - - -void -ast_function::print(void) const -{ - return_type->print(); - printf(" %s (", identifier); - - foreach_list_typed(ast_node, ast, link, & this->parameters) { - ast->print(); - } - - printf(")"); -} - - -ast_function::ast_function(void) - : return_type(NULL), identifier(NULL), is_definition(false), - signature(NULL) -{ - /* empty */ -} - - -void -ast_fully_specified_type::print(void) const -{ - _mesa_ast_type_qualifier_print(& qualifier); - specifier->print(); -} - - -void -ast_parameter_declarator::print(void) const -{ - type->print(); - if (identifier) - printf("%s ", identifier); - ast_opt_array_dimensions_print(array_specifier); -} - - -void -ast_function_definition::print(void) const -{ - prototype->print(); - body->print(); -} - - -void -ast_declaration::print(void) const -{ - printf("%s ", identifier); - ast_opt_array_dimensions_print(array_specifier); - - if (initializer) { - printf("= "); - initializer->print(); - } -} - - -ast_declaration::ast_declaration(const char *identifier, - ast_array_specifier *array_specifier, - ast_expression *initializer) -{ - this->identifier = identifier; - this->array_specifier = array_specifier; - this->initializer = initializer; -} - - -void -ast_declarator_list::print(void) const -{ - assert(type || invariant); - - if (type) - type->print(); - else if (invariant) - printf("invariant "); - else - printf("precise "); - - foreach_list_typed (ast_node, ast, link, & this->declarations) { - if (&ast->link != this->declarations.get_head()) - printf(", "); - - ast->print(); - } - - printf("; "); -} - - -ast_declarator_list::ast_declarator_list(ast_fully_specified_type *type) -{ - this->type = type; - this->invariant = false; - this->precise = false; -} - -void -ast_jump_statement::print(void) const -{ - switch (mode) { - case ast_continue: - printf("continue; "); - break; - case ast_break: - printf("break; "); - break; - case ast_return: - printf("return "); - if (opt_return_value) - opt_return_value->print(); - - printf("; "); - break; - case ast_discard: - printf("discard; "); - break; - } -} - - -ast_jump_statement::ast_jump_statement(int mode, ast_expression *return_value) - : opt_return_value(NULL) -{ - this->mode = ast_jump_modes(mode); - - if (mode == ast_return) - opt_return_value = return_value; -} - - -void -ast_selection_statement::print(void) const -{ - printf("if ( "); - condition->print(); - printf(") "); - - then_statement->print(); - - if (else_statement) { - printf("else "); - else_statement->print(); - } -} - - -ast_selection_statement::ast_selection_statement(ast_expression *condition, - ast_node *then_statement, - ast_node *else_statement) -{ - this->condition = condition; - this->then_statement = then_statement; - this->else_statement = else_statement; -} - - -void -ast_switch_statement::print(void) const -{ - printf("switch ( "); - test_expression->print(); - printf(") "); - - body->print(); -} - - -ast_switch_statement::ast_switch_statement(ast_expression *test_expression, - ast_node *body) -{ - this->test_expression = test_expression; - this->body = body; -} - - -void -ast_switch_body::print(void) const -{ - printf("{\n"); - if (stmts != NULL) { - stmts->print(); - } - printf("}\n"); -} - - -ast_switch_body::ast_switch_body(ast_case_statement_list *stmts) -{ - this->stmts = stmts; -} - - -void ast_case_label::print(void) const -{ - if (test_value != NULL) { - printf("case "); - test_value->print(); - printf(": "); - } else { - printf("default: "); - } -} - - -ast_case_label::ast_case_label(ast_expression *test_value) -{ - this->test_value = test_value; -} - - -void ast_case_label_list::print(void) const -{ - foreach_list_typed(ast_node, ast, link, & this->labels) { - ast->print(); - } - printf("\n"); -} - - -ast_case_label_list::ast_case_label_list(void) -{ -} - - -void ast_case_statement::print(void) const -{ - labels->print(); - foreach_list_typed(ast_node, ast, link, & this->stmts) { - ast->print(); - printf("\n"); - } -} - - -ast_case_statement::ast_case_statement(ast_case_label_list *labels) -{ - this->labels = labels; -} - - -void ast_case_statement_list::print(void) const -{ - foreach_list_typed(ast_node, ast, link, & this->cases) { - ast->print(); - } -} - - -ast_case_statement_list::ast_case_statement_list(void) -{ -} - - -void -ast_iteration_statement::print(void) const -{ - switch (mode) { - case ast_for: - printf("for( "); - if (init_statement) - init_statement->print(); - printf("; "); - - if (condition) - condition->print(); - printf("; "); - - if (rest_expression) - rest_expression->print(); - printf(") "); - - body->print(); - break; - - case ast_while: - printf("while ( "); - if (condition) - condition->print(); - printf(") "); - body->print(); - break; - - case ast_do_while: - printf("do "); - body->print(); - printf("while ( "); - if (condition) - condition->print(); - printf("); "); - break; - } -} - - -ast_iteration_statement::ast_iteration_statement(int mode, - ast_node *init, - ast_node *condition, - ast_expression *rest_expression, - ast_node *body) -{ - this->mode = ast_iteration_modes(mode); - this->init_statement = init; - this->condition = condition; - this->rest_expression = rest_expression; - this->body = body; -} - - -void -ast_struct_specifier::print(void) const -{ - printf("struct %s { ", name); - foreach_list_typed(ast_node, ast, link, &this->declarations) { - ast->print(); - } - printf("} "); -} - - -ast_struct_specifier::ast_struct_specifier(const char *identifier, - ast_declarator_list *declarator_list) -{ - if (identifier == NULL) { - static mtx_t mutex = _MTX_INITIALIZER_NP; - static unsigned anon_count = 1; - unsigned count; - - mtx_lock(&mutex); - count = anon_count++; - mtx_unlock(&mutex); - - identifier = ralloc_asprintf(this, "#anon_struct_%04x", count); - } - name = identifier; - this->declarations.push_degenerate_list_at_head(&declarator_list->link); - is_declaration = true; -} - -void ast_subroutine_list::print(void) const -{ - foreach_list_typed (ast_node, ast, link, & this->declarations) { - if (&ast->link != this->declarations.get_head()) - printf(", "); - ast->print(); - } -} - -static void -set_shader_inout_layout(struct gl_shader *shader, - struct _mesa_glsl_parse_state *state) -{ - /* Should have been prevented by the parser. */ - if (shader->Stage == MESA_SHADER_TESS_CTRL) { - assert(!state->in_qualifier->flags.i); - } else if (shader->Stage == MESA_SHADER_TESS_EVAL) { - assert(!state->out_qualifier->flags.i); - } else if (shader->Stage != MESA_SHADER_GEOMETRY) { - assert(!state->in_qualifier->flags.i); - assert(!state->out_qualifier->flags.i); - } - - if (shader->Stage != MESA_SHADER_COMPUTE) { - /* Should have been prevented by the parser. */ - assert(!state->cs_input_local_size_specified); - } - - if (shader->Stage != MESA_SHADER_FRAGMENT) { - /* Should have been prevented by the parser. */ - assert(!state->fs_uses_gl_fragcoord); - assert(!state->fs_redeclares_gl_fragcoord); - assert(!state->fs_pixel_center_integer); - assert(!state->fs_origin_upper_left); - assert(!state->fs_early_fragment_tests); - } - - switch (shader->Stage) { - case MESA_SHADER_TESS_CTRL: - shader->TessCtrl.VerticesOut = 0; - if (state->tcs_output_vertices_specified) { - unsigned vertices; - if (state->out_qualifier->vertices-> - process_qualifier_constant(state, "vertices", &vertices, - false)) { - - YYLTYPE loc = state->out_qualifier->vertices->get_location(); - if (vertices > state->Const.MaxPatchVertices) { - _mesa_glsl_error(&loc, state, "vertices (%d) exceeds " - "GL_MAX_PATCH_VERTICES", vertices); - } - shader->TessCtrl.VerticesOut = vertices; - } - } - break; - case MESA_SHADER_TESS_EVAL: - shader->TessEval.PrimitiveMode = PRIM_UNKNOWN; - if (state->in_qualifier->flags.q.prim_type) - shader->TessEval.PrimitiveMode = state->in_qualifier->prim_type; - - shader->TessEval.Spacing = 0; - if (state->in_qualifier->flags.q.vertex_spacing) - shader->TessEval.Spacing = state->in_qualifier->vertex_spacing; - - shader->TessEval.VertexOrder = 0; - if (state->in_qualifier->flags.q.ordering) - shader->TessEval.VertexOrder = state->in_qualifier->ordering; - - shader->TessEval.PointMode = -1; - if (state->in_qualifier->flags.q.point_mode) - shader->TessEval.PointMode = state->in_qualifier->point_mode; - break; - case MESA_SHADER_GEOMETRY: - shader->Geom.VerticesOut = 0; - if (state->out_qualifier->flags.q.max_vertices) { - unsigned qual_max_vertices; - if (state->out_qualifier->max_vertices-> - process_qualifier_constant(state, "max_vertices", - &qual_max_vertices, true)) { - shader->Geom.VerticesOut = qual_max_vertices; - } - } - - if (state->gs_input_prim_type_specified) { - shader->Geom.InputType = state->in_qualifier->prim_type; - } else { - shader->Geom.InputType = PRIM_UNKNOWN; - } - - if (state->out_qualifier->flags.q.prim_type) { - shader->Geom.OutputType = state->out_qualifier->prim_type; - } else { - shader->Geom.OutputType = PRIM_UNKNOWN; - } - - shader->Geom.Invocations = 0; - if (state->in_qualifier->flags.q.invocations) { - unsigned invocations; - if (state->in_qualifier->invocations-> - process_qualifier_constant(state, "invocations", - &invocations, false)) { - - YYLTYPE loc = state->in_qualifier->invocations->get_location(); - if (invocations > MAX_GEOMETRY_SHADER_INVOCATIONS) { - _mesa_glsl_error(&loc, state, - "invocations (%d) exceeds " - "GL_MAX_GEOMETRY_SHADER_INVOCATIONS", - invocations); - } - shader->Geom.Invocations = invocations; - } - } - break; - - case MESA_SHADER_COMPUTE: - if (state->cs_input_local_size_specified) { - for (int i = 0; i < 3; i++) - shader->Comp.LocalSize[i] = state->cs_input_local_size[i]; - } else { - for (int i = 0; i < 3; i++) - shader->Comp.LocalSize[i] = 0; - } - break; - - case MESA_SHADER_FRAGMENT: - shader->redeclares_gl_fragcoord = state->fs_redeclares_gl_fragcoord; - shader->uses_gl_fragcoord = state->fs_uses_gl_fragcoord; - shader->pixel_center_integer = state->fs_pixel_center_integer; - shader->origin_upper_left = state->fs_origin_upper_left; - shader->ARB_fragment_coord_conventions_enable = - state->ARB_fragment_coord_conventions_enable; - shader->EarlyFragmentTests = state->fs_early_fragment_tests; - break; - - default: - /* Nothing to do. */ - break; - } -} - -extern "C" { - -void -_mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, - bool dump_ast, bool dump_hir) -{ - struct _mesa_glsl_parse_state *state = - new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); - const char *source = shader->Source; - - if (ctx->Const.GenerateTemporaryNames) - (void) p_atomic_cmpxchg(&ir_variable::temporaries_allocate_names, - false, true); - - state->error = glcpp_preprocess(state, &source, &state->info_log, - &ctx->Extensions, ctx); - - if (!state->error) { - _mesa_glsl_lexer_ctor(state, source); - _mesa_glsl_parse(state); - _mesa_glsl_lexer_dtor(state); - } - - if (dump_ast) { - foreach_list_typed(ast_node, ast, link, &state->translation_unit) { - ast->print(); - } - printf("\n\n"); - } - - ralloc_free(shader->ir); - shader->ir = new(shader) exec_list; - if (!state->error && !state->translation_unit.is_empty()) - _mesa_ast_to_hir(shader->ir, state); - - if (!state->error) { - validate_ir_tree(shader->ir); - - /* Print out the unoptimized IR. */ - if (dump_hir) { - _mesa_print_ir(stdout, shader->ir, state); - } - } - - - if (!state->error && !shader->ir->is_empty()) { - struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[shader->Stage]; - - lower_subroutine(shader->ir, state); - /* Do some optimization at compile time to reduce shader IR size - * and reduce later work if the same shader is linked multiple times - */ - while (do_common_optimization(shader->ir, false, false, options, - ctx->Const.NativeIntegers)) - ; - - validate_ir_tree(shader->ir); - - enum ir_variable_mode other; - switch (shader->Stage) { - case MESA_SHADER_VERTEX: - other = ir_var_shader_in; - break; - case MESA_SHADER_FRAGMENT: - other = ir_var_shader_out; - break; - default: - /* Something invalid to ensure optimize_dead_builtin_uniforms - * doesn't remove anything other than uniforms or constants. - */ - other = ir_var_mode_count; - break; - } - - optimize_dead_builtin_variables(shader->ir, other); - - validate_ir_tree(shader->ir); - } - - if (shader->InfoLog) - ralloc_free(shader->InfoLog); - - if (!state->error) - set_shader_inout_layout(shader, state); - - shader->symbols = new(shader->ir) glsl_symbol_table; - shader->CompileStatus = !state->error; - shader->InfoLog = state->info_log; - shader->Version = state->language_version; - shader->IsES = state->es_shader; - shader->uses_builtin_functions = state->uses_builtin_functions; - - /* Retain any live IR, but trash the rest. */ - reparent_ir(shader->ir, shader->ir); - - /* Destroy the symbol table. Create a new symbol table that contains only - * the variables and functions that still exist in the IR. The symbol - * table will be used later during linking. - * - * There must NOT be any freed objects still referenced by the symbol - * table. That could cause the linker to dereference freed memory. - * - * We don't have to worry about types or interface-types here because those - * are fly-weights that are looked up by glsl_type. - */ - foreach_in_list (ir_instruction, ir, shader->ir) { - switch (ir->ir_type) { - case ir_type_function: - shader->symbols->add_function((ir_function *) ir); - break; - case ir_type_variable: { - ir_variable *const var = (ir_variable *) ir; - - if (var->data.mode != ir_var_temporary) - shader->symbols->add_variable(var); - break; - } - default: - break; - } - } - - _mesa_glsl_initialize_derived_variables(shader); - - delete state->symbols; - ralloc_free(state); -} - -} /* extern "C" */ -/** - * Do the set of common optimizations passes - * - * \param ir List of instructions to be optimized - * \param linked Is the shader linked? This enables - * optimizations passes that remove code at - * global scope and could cause linking to - * fail. - * \param uniform_locations_assigned Have locations already been assigned for - * uniforms? This prevents the declarations - * of unused uniforms from being removed. - * The setting of this flag only matters if - * \c linked is \c true. - * \param max_unroll_iterations Maximum number of loop iterations to be - * unrolled. Setting to 0 disables loop - * unrolling. - * \param options The driver's preferred shader options. - */ -bool -do_common_optimization(exec_list *ir, bool linked, - bool uniform_locations_assigned, - const struct gl_shader_compiler_options *options, - bool native_integers) -{ - GLboolean progress = GL_FALSE; - - progress = lower_instructions(ir, SUB_TO_ADD_NEG) || progress; - - if (linked) { - progress = do_function_inlining(ir) || progress; - progress = do_dead_functions(ir) || progress; - progress = do_structure_splitting(ir) || progress; - } - progress = do_if_simplification(ir) || progress; - progress = opt_flatten_nested_if_blocks(ir) || progress; - progress = opt_conditional_discard(ir) || progress; - progress = do_copy_propagation(ir) || progress; - progress = do_copy_propagation_elements(ir) || progress; - - if (options->OptimizeForAOS && !linked) - progress = opt_flip_matrices(ir) || progress; - - if (linked && options->OptimizeForAOS) { - progress = do_vectorize(ir) || progress; - } - - if (linked) - progress = do_dead_code(ir, uniform_locations_assigned) || progress; - else - progress = do_dead_code_unlinked(ir) || progress; - progress = do_dead_code_local(ir) || progress; - progress = do_tree_grafting(ir) || progress; - progress = do_constant_propagation(ir) || progress; - if (linked) - progress = do_constant_variable(ir) || progress; - else - progress = do_constant_variable_unlinked(ir) || progress; - progress = do_constant_folding(ir) || progress; - progress = do_minmax_prune(ir) || progress; - progress = do_rebalance_tree(ir) || progress; - progress = do_algebraic(ir, native_integers, options) || progress; - progress = do_lower_jumps(ir) || progress; - progress = do_vec_index_to_swizzle(ir) || progress; - progress = lower_vector_insert(ir, false) || progress; - progress = do_swizzle_swizzle(ir) || progress; - progress = do_noop_swizzle(ir) || progress; - - progress = optimize_split_arrays(ir, linked) || progress; - progress = optimize_redundant_jumps(ir) || progress; - - loop_state *ls = analyze_loop_variables(ir); - if (ls->loop_found) { - progress = set_loop_controls(ir, ls) || progress; - progress = unroll_loops(ir, ls, options) || progress; - } - delete ls; - - return progress; -} - -extern "C" { - -/** - * To be called at GL teardown time, this frees compiler datastructures. - * - * After calling this, any previously compiled shaders and shader - * programs would be invalid. So this should happen at approximately - * program exit. - */ -void -_mesa_destroy_shader_compiler(void) -{ - _mesa_destroy_shader_compiler_caches(); - - _mesa_glsl_release_types(); -} - -/** - * Releases compiler caches to trade off performance for memory. - * - * Intended to be used with glReleaseShaderCompiler(). - */ -void -_mesa_destroy_shader_compiler_caches(void) -{ - _mesa_glsl_release_builtin_functions(); -} - -} diff --git a/src/glsl/glsl_parser_extras.h b/src/glsl/glsl_parser_extras.h deleted file mode 100644 index 3f88e01d599..00000000000 --- a/src/glsl/glsl_parser_extras.h +++ /dev/null @@ -1,752 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef GLSL_PARSER_EXTRAS_H -#define GLSL_PARSER_EXTRAS_H - -/* - * Most of the definitions here only apply to C++ - */ -#ifdef __cplusplus - - -#include -#include "glsl_symbol_table.h" - -struct gl_context; - -struct glsl_switch_state { - /** Temporary variables needed for switch statement. */ - ir_variable *test_var; - ir_variable *is_fallthru_var; - class ast_switch_statement *switch_nesting_ast; - - /** Used to detect if 'continue' was called inside a switch. */ - ir_variable *continue_inside; - - /** Used to set condition if 'default' label should be chosen. */ - ir_variable *run_default; - - /** Table of constant values already used in case labels */ - struct hash_table *labels_ht; - class ast_case_label *previous_default; - - bool is_switch_innermost; // if switch stmt is closest to break, ... -}; - -const char * -glsl_compute_version_string(void *mem_ctx, bool is_es, unsigned version); - -typedef struct YYLTYPE { - int first_line; - int first_column; - int last_line; - int last_column; - unsigned source; -} YYLTYPE; -# define YYLTYPE_IS_DECLARED 1 -# define YYLTYPE_IS_TRIVIAL 1 - -extern void _mesa_glsl_error(YYLTYPE *locp, _mesa_glsl_parse_state *state, - const char *fmt, ...); - - -struct _mesa_glsl_parse_state { - _mesa_glsl_parse_state(struct gl_context *_ctx, gl_shader_stage stage, - void *mem_ctx); - - DECLARE_RALLOC_CXX_OPERATORS(_mesa_glsl_parse_state); - - /** - * Generate a string representing the GLSL version currently being compiled - * (useful for error messages). - */ - const char *get_version_string() - { - return glsl_compute_version_string(this, this->es_shader, - this->language_version); - } - - /** - * Determine whether the current GLSL version is sufficiently high to - * support a certain feature. - * - * \param required_glsl_version is the desktop GLSL version that is - * required to support the feature, or 0 if no version of desktop GLSL - * supports the feature. - * - * \param required_glsl_es_version is the GLSL ES version that is required - * to support the feature, or 0 if no version of GLSL ES supports the - * feature. - */ - bool is_version(unsigned required_glsl_version, - unsigned required_glsl_es_version) const - { - unsigned required_version = this->es_shader ? - required_glsl_es_version : required_glsl_version; - unsigned this_version = this->forced_language_version - ? this->forced_language_version : this->language_version; - return required_version != 0 - && this_version >= required_version; - } - - bool check_version(unsigned required_glsl_version, - unsigned required_glsl_es_version, - YYLTYPE *locp, const char *fmt, ...) PRINTFLIKE(5, 6); - - bool check_arrays_of_arrays_allowed(YYLTYPE *locp) - { - if (!(ARB_arrays_of_arrays_enable || is_version(430, 310))) { - const char *const requirement = this->es_shader - ? "GLSL ES 3.10" - : "GL_ARB_arrays_of_arrays or GLSL 4.30"; - _mesa_glsl_error(locp, this, - "%s required for defining arrays of arrays.", - requirement); - return false; - } - return true; - } - - bool check_precision_qualifiers_allowed(YYLTYPE *locp) - { - return check_version(130, 100, locp, - "precision qualifiers are forbidden"); - } - - bool check_bitwise_operations_allowed(YYLTYPE *locp) - { - return check_version(130, 300, locp, "bit-wise operations are forbidden"); - } - - bool check_explicit_attrib_stream_allowed(YYLTYPE *locp) - { - if (!this->has_explicit_attrib_stream()) { - const char *const requirement = "GL_ARB_gpu_shader5 extension or GLSL 4.00"; - - _mesa_glsl_error(locp, this, "explicit stream requires %s", - requirement); - return false; - } - - return true; - } - - bool check_explicit_attrib_location_allowed(YYLTYPE *locp, - const ir_variable *var) - { - if (!this->has_explicit_attrib_location()) { - const char *const requirement = this->es_shader - ? "GLSL ES 3.00" - : "GL_ARB_explicit_attrib_location extension or GLSL 3.30"; - - _mesa_glsl_error(locp, this, "%s explicit location requires %s", - mode_string(var), requirement); - return false; - } - - return true; - } - - bool check_separate_shader_objects_allowed(YYLTYPE *locp, - const ir_variable *var) - { - if (!this->has_separate_shader_objects()) { - const char *const requirement = this->es_shader - ? "GL_EXT_separate_shader_objects extension or GLSL ES 3.10" - : "GL_ARB_separate_shader_objects extension or GLSL 4.20"; - - _mesa_glsl_error(locp, this, "%s explicit location requires %s", - mode_string(var), requirement); - return false; - } - - return true; - } - - bool check_explicit_uniform_location_allowed(YYLTYPE *locp, - const ir_variable *) - { - if (!this->has_explicit_attrib_location() || - !this->has_explicit_uniform_location()) { - const char *const requirement = this->es_shader - ? "GLSL ES 3.10" - : "GL_ARB_explicit_uniform_location and either " - "GL_ARB_explicit_attrib_location or GLSL 3.30."; - - _mesa_glsl_error(locp, this, - "uniform explicit location requires %s", - requirement); - return false; - } - - return true; - } - - bool has_atomic_counters() const - { - return ARB_shader_atomic_counters_enable || is_version(420, 310); - } - - bool has_enhanced_layouts() const - { - return ARB_enhanced_layouts_enable || is_version(440, 0); - } - - bool has_explicit_attrib_stream() const - { - return ARB_gpu_shader5_enable || is_version(400, 0); - } - - bool has_explicit_attrib_location() const - { - return ARB_explicit_attrib_location_enable || is_version(330, 300); - } - - bool has_explicit_uniform_location() const - { - return ARB_explicit_uniform_location_enable || is_version(430, 310); - } - - bool has_uniform_buffer_objects() const - { - return ARB_uniform_buffer_object_enable || is_version(140, 300); - } - - bool has_shader_storage_buffer_objects() const - { - return ARB_shader_storage_buffer_object_enable || is_version(430, 310); - } - - bool has_separate_shader_objects() const - { - return ARB_separate_shader_objects_enable || is_version(410, 310) - || EXT_separate_shader_objects_enable; - } - - bool has_double() const - { - return ARB_gpu_shader_fp64_enable || is_version(400, 0); - } - - bool has_420pack() const - { - return ARB_shading_language_420pack_enable || is_version(420, 0); - } - - bool has_420pack_or_es31() const - { - return ARB_shading_language_420pack_enable || is_version(420, 310); - } - - bool has_compute_shader() const - { - return ARB_compute_shader_enable || is_version(430, 310); - } - - bool has_geometry_shader() const - { - return OES_geometry_shader_enable || is_version(150, 320); - } - - void process_version_directive(YYLTYPE *locp, int version, - const char *ident); - - struct gl_context *const ctx; - void *scanner; - exec_list translation_unit; - glsl_symbol_table *symbols; - - unsigned num_supported_versions; - struct { - unsigned ver; - bool es; - } supported_versions[15]; - - bool es_shader; - unsigned language_version; - unsigned forced_language_version; - gl_shader_stage stage; - - /** - * Number of nested struct_specifier levels - * - * Outside a struct_specifier, this is zero. - */ - unsigned struct_specifier_depth; - - /** - * Default uniform layout qualifiers tracked during parsing. - * Currently affects uniform blocks and uniform buffer variables in - * those blocks. - */ - struct ast_type_qualifier *default_uniform_qualifier; - - /** - * Default shader storage layout qualifiers tracked during parsing. - * Currently affects shader storage blocks and shader storage buffer - * variables in those blocks. - */ - struct ast_type_qualifier *default_shader_storage_qualifier; - - /** - * Variables to track different cases if a fragment shader redeclares - * built-in variable gl_FragCoord. - * - * Note: These values are computed at ast_to_hir time rather than at parse - * time. - */ - bool fs_redeclares_gl_fragcoord; - bool fs_origin_upper_left; - bool fs_pixel_center_integer; - bool fs_redeclares_gl_fragcoord_with_no_layout_qualifiers; - - /** - * True if a geometry shader input primitive type or tessellation control - * output vertices were specified using a layout directive. - * - * Note: these values are computed at ast_to_hir time rather than at parse - * time. - */ - bool gs_input_prim_type_specified; - bool tcs_output_vertices_specified; - - /** - * Input layout qualifiers from GLSL 1.50 (geometry shader controls), - * and GLSL 4.00 (tessellation evaluation shader) - */ - struct ast_type_qualifier *in_qualifier; - - /** - * True if a compute shader input local size was specified using a layout - * directive. - * - * Note: this value is computed at ast_to_hir time rather than at parse - * time. - */ - bool cs_input_local_size_specified; - - /** - * If cs_input_local_size_specified is true, the local size that was - * specified. Otherwise ignored. - */ - unsigned cs_input_local_size[3]; - - /** - * Output layout qualifiers from GLSL 1.50 (geometry shader controls), - * and GLSL 4.00 (tessellation control shader). - */ - struct ast_type_qualifier *out_qualifier; - - /** - * Printable list of GLSL versions supported by the current context - * - * \note - * This string should probably be generated per-context instead of per - * invokation of the compiler. This should be changed when the method of - * tracking supported GLSL versions changes. - */ - const char *supported_version_string; - - /** - * Implementation defined limits that affect built-in variables, etc. - * - * \sa struct gl_constants (in mtypes.h) - */ - struct { - /* 1.10 */ - unsigned MaxLights; - unsigned MaxClipPlanes; - unsigned MaxTextureUnits; - unsigned MaxTextureCoords; - unsigned MaxVertexAttribs; - unsigned MaxVertexUniformComponents; - unsigned MaxVertexTextureImageUnits; - unsigned MaxCombinedTextureImageUnits; - unsigned MaxTextureImageUnits; - unsigned MaxFragmentUniformComponents; - - /* ARB_draw_buffers */ - unsigned MaxDrawBuffers; - - /* ARB_blend_func_extended */ - unsigned MaxDualSourceDrawBuffers; - - /* 3.00 ES */ - int MinProgramTexelOffset; - int MaxProgramTexelOffset; - - /* 1.50 */ - unsigned MaxVertexOutputComponents; - unsigned MaxGeometryInputComponents; - unsigned MaxGeometryOutputComponents; - unsigned MaxFragmentInputComponents; - unsigned MaxGeometryTextureImageUnits; - unsigned MaxGeometryOutputVertices; - unsigned MaxGeometryTotalOutputComponents; - unsigned MaxGeometryUniformComponents; - - /* ARB_shader_atomic_counters */ - unsigned MaxVertexAtomicCounters; - unsigned MaxTessControlAtomicCounters; - unsigned MaxTessEvaluationAtomicCounters; - unsigned MaxGeometryAtomicCounters; - unsigned MaxFragmentAtomicCounters; - unsigned MaxCombinedAtomicCounters; - unsigned MaxAtomicBufferBindings; - - /* These are also atomic counter related, but they weren't added to - * until atomic counters were added to core in GLSL 4.20 and GLSL ES - * 3.10. - */ - unsigned MaxVertexAtomicCounterBuffers; - unsigned MaxTessControlAtomicCounterBuffers; - unsigned MaxTessEvaluationAtomicCounterBuffers; - unsigned MaxGeometryAtomicCounterBuffers; - unsigned MaxFragmentAtomicCounterBuffers; - unsigned MaxCombinedAtomicCounterBuffers; - unsigned MaxAtomicCounterBufferSize; - - /* ARB_compute_shader */ - unsigned MaxComputeWorkGroupCount[3]; - unsigned MaxComputeWorkGroupSize[3]; - - /* ARB_shader_image_load_store */ - unsigned MaxImageUnits; - unsigned MaxCombinedShaderOutputResources; - unsigned MaxImageSamples; - unsigned MaxVertexImageUniforms; - unsigned MaxTessControlImageUniforms; - unsigned MaxTessEvaluationImageUniforms; - unsigned MaxGeometryImageUniforms; - unsigned MaxFragmentImageUniforms; - unsigned MaxCombinedImageUniforms; - - /* ARB_viewport_array */ - unsigned MaxViewports; - - /* ARB_tessellation_shader */ - unsigned MaxPatchVertices; - unsigned MaxTessGenLevel; - unsigned MaxTessControlInputComponents; - unsigned MaxTessControlOutputComponents; - unsigned MaxTessControlTextureImageUnits; - unsigned MaxTessEvaluationInputComponents; - unsigned MaxTessEvaluationOutputComponents; - unsigned MaxTessEvaluationTextureImageUnits; - unsigned MaxTessPatchComponents; - unsigned MaxTessControlTotalOutputComponents; - unsigned MaxTessControlUniformComponents; - unsigned MaxTessEvaluationUniformComponents; - } Const; - - /** - * During AST to IR conversion, pointer to current IR function - * - * Will be \c NULL whenever the AST to IR conversion is not inside a - * function definition. - */ - class ir_function_signature *current_function; - - /** - * During AST to IR conversion, pointer to the toplevel IR - * instruction list being generated. - */ - exec_list *toplevel_ir; - - /** Have we found a return statement in this function? */ - bool found_return; - - /** Was there an error during compilation? */ - bool error; - - /** - * Are all shader inputs / outputs invariant? - * - * This is set when the 'STDGL invariant(all)' pragma is used. - */ - bool all_invariant; - - /** Loop or switch statement containing the current instructions. */ - class ast_iteration_statement *loop_nesting_ast; - - struct glsl_switch_state switch_state; - - /** List of structures defined in user code. */ - const glsl_type **user_structures; - unsigned num_user_structures; - - char *info_log; - - /** - * \name Enable bits for GLSL extensions - */ - /*@{*/ - /* ARB extensions go here, sorted alphabetically. - */ - bool ARB_arrays_of_arrays_enable; - bool ARB_arrays_of_arrays_warn; - bool ARB_compute_shader_enable; - bool ARB_compute_shader_warn; - bool ARB_conservative_depth_enable; - bool ARB_conservative_depth_warn; - bool ARB_derivative_control_enable; - bool ARB_derivative_control_warn; - bool ARB_draw_buffers_enable; - bool ARB_draw_buffers_warn; - bool ARB_draw_instanced_enable; - bool ARB_draw_instanced_warn; - bool ARB_enhanced_layouts_enable; - bool ARB_enhanced_layouts_warn; - bool ARB_explicit_attrib_location_enable; - bool ARB_explicit_attrib_location_warn; - bool ARB_explicit_uniform_location_enable; - bool ARB_explicit_uniform_location_warn; - bool ARB_fragment_coord_conventions_enable; - bool ARB_fragment_coord_conventions_warn; - bool ARB_fragment_layer_viewport_enable; - bool ARB_fragment_layer_viewport_warn; - bool ARB_gpu_shader5_enable; - bool ARB_gpu_shader5_warn; - bool ARB_gpu_shader_fp64_enable; - bool ARB_gpu_shader_fp64_warn; - bool ARB_sample_shading_enable; - bool ARB_sample_shading_warn; - bool ARB_separate_shader_objects_enable; - bool ARB_separate_shader_objects_warn; - bool ARB_shader_atomic_counters_enable; - bool ARB_shader_atomic_counters_warn; - bool ARB_shader_bit_encoding_enable; - bool ARB_shader_bit_encoding_warn; - bool ARB_shader_clock_enable; - bool ARB_shader_clock_warn; - bool ARB_shader_draw_parameters_enable; - bool ARB_shader_draw_parameters_warn; - bool ARB_shader_image_load_store_enable; - bool ARB_shader_image_load_store_warn; - bool ARB_shader_image_size_enable; - bool ARB_shader_image_size_warn; - bool ARB_shader_precision_enable; - bool ARB_shader_precision_warn; - bool ARB_shader_stencil_export_enable; - bool ARB_shader_stencil_export_warn; - bool ARB_shader_storage_buffer_object_enable; - bool ARB_shader_storage_buffer_object_warn; - bool ARB_shader_subroutine_enable; - bool ARB_shader_subroutine_warn; - bool ARB_shader_texture_image_samples_enable; - bool ARB_shader_texture_image_samples_warn; - bool ARB_shader_texture_lod_enable; - bool ARB_shader_texture_lod_warn; - bool ARB_shading_language_420pack_enable; - bool ARB_shading_language_420pack_warn; - bool ARB_shading_language_packing_enable; - bool ARB_shading_language_packing_warn; - bool ARB_tessellation_shader_enable; - bool ARB_tessellation_shader_warn; - bool ARB_texture_cube_map_array_enable; - bool ARB_texture_cube_map_array_warn; - bool ARB_texture_gather_enable; - bool ARB_texture_gather_warn; - bool ARB_texture_multisample_enable; - bool ARB_texture_multisample_warn; - bool ARB_texture_query_levels_enable; - bool ARB_texture_query_levels_warn; - bool ARB_texture_query_lod_enable; - bool ARB_texture_query_lod_warn; - bool ARB_texture_rectangle_enable; - bool ARB_texture_rectangle_warn; - bool ARB_uniform_buffer_object_enable; - bool ARB_uniform_buffer_object_warn; - bool ARB_vertex_attrib_64bit_enable; - bool ARB_vertex_attrib_64bit_warn; - bool ARB_viewport_array_enable; - bool ARB_viewport_array_warn; - - /* KHR extensions go here, sorted alphabetically. - */ - - /* OES extensions go here, sorted alphabetically. - */ - bool OES_EGL_image_external_enable; - bool OES_EGL_image_external_warn; - bool OES_geometry_shader_enable; - bool OES_geometry_shader_warn; - bool OES_standard_derivatives_enable; - bool OES_standard_derivatives_warn; - bool OES_texture_3D_enable; - bool OES_texture_3D_warn; - bool OES_texture_storage_multisample_2d_array_enable; - bool OES_texture_storage_multisample_2d_array_warn; - - /* All other extensions go here, sorted alphabetically. - */ - bool AMD_conservative_depth_enable; - bool AMD_conservative_depth_warn; - bool AMD_shader_stencil_export_enable; - bool AMD_shader_stencil_export_warn; - bool AMD_shader_trinary_minmax_enable; - bool AMD_shader_trinary_minmax_warn; - bool AMD_vertex_shader_layer_enable; - bool AMD_vertex_shader_layer_warn; - bool AMD_vertex_shader_viewport_index_enable; - bool AMD_vertex_shader_viewport_index_warn; - bool EXT_blend_func_extended_enable; - bool EXT_blend_func_extended_warn; - bool EXT_draw_buffers_enable; - bool EXT_draw_buffers_warn; - bool EXT_separate_shader_objects_enable; - bool EXT_separate_shader_objects_warn; - bool EXT_shader_integer_mix_enable; - bool EXT_shader_integer_mix_warn; - bool EXT_shader_samples_identical_enable; - bool EXT_shader_samples_identical_warn; - bool EXT_texture_array_enable; - bool EXT_texture_array_warn; - /*@}*/ - - /** Extensions supported by the OpenGL implementation. */ - const struct gl_extensions *extensions; - - bool uses_builtin_functions; - bool fs_uses_gl_fragcoord; - - /** - * For geometry shaders, size of the most recently seen input declaration - * that was a sized array, or 0 if no sized input array declarations have - * been seen. - * - * Unused for other shader types. - */ - unsigned gs_input_size; - - bool fs_early_fragment_tests; - - /** - * For tessellation control shaders, size of the most recently seen output - * declaration that was a sized array, or 0 if no sized output array - * declarations have been seen. - * - * Unused for other shader types. - */ - unsigned tcs_output_size; - - /** Atomic counter offsets by binding */ - unsigned atomic_counter_offsets[MAX_COMBINED_ATOMIC_BUFFERS]; - - bool allow_extension_directive_midshader; - - /** - * Known subroutine type declarations. - */ - int num_subroutine_types; - ir_function **subroutine_types; - - /** - * Functions that are associated with - * subroutine types. - */ - int num_subroutines; - ir_function **subroutines; - - /** - * field selection temporary parser storage - - * did the parser just parse a dot. - */ - bool is_field; -}; - -# define YYLLOC_DEFAULT(Current, Rhs, N) \ -do { \ - if (N) \ - { \ - (Current).first_line = YYRHSLOC(Rhs, 1).first_line; \ - (Current).first_column = YYRHSLOC(Rhs, 1).first_column; \ - (Current).last_line = YYRHSLOC(Rhs, N).last_line; \ - (Current).last_column = YYRHSLOC(Rhs, N).last_column; \ - } \ - else \ - { \ - (Current).first_line = (Current).last_line = \ - YYRHSLOC(Rhs, 0).last_line; \ - (Current).first_column = (Current).last_column = \ - YYRHSLOC(Rhs, 0).last_column; \ - } \ - (Current).source = 0; \ -} while (0) - -/** - * Emit a warning to the shader log - * - * \sa _mesa_glsl_error - */ -extern void _mesa_glsl_warning(const YYLTYPE *locp, - _mesa_glsl_parse_state *state, - const char *fmt, ...); - -extern void _mesa_glsl_lexer_ctor(struct _mesa_glsl_parse_state *state, - const char *string); - -extern void _mesa_glsl_lexer_dtor(struct _mesa_glsl_parse_state *state); - -union YYSTYPE; -extern int _mesa_glsl_lexer_lex(union YYSTYPE *yylval, YYLTYPE *yylloc, - void *scanner); - -extern int _mesa_glsl_parse(struct _mesa_glsl_parse_state *); - -/** - * Process elements of the #extension directive - * - * \return - * If \c name and \c behavior are valid, \c true is returned. Otherwise - * \c false is returned. - */ -extern bool _mesa_glsl_process_extension(const char *name, YYLTYPE *name_locp, - const char *behavior, - YYLTYPE *behavior_locp, - _mesa_glsl_parse_state *state); - -#endif /* __cplusplus */ - - -/* - * These definitions apply to C and C++ - */ -#ifdef __cplusplus -extern "C" { -#endif - -extern int glcpp_preprocess(void *ctx, const char **shader, char **info_log, - const struct gl_extensions *extensions, struct gl_context *gl_ctx); - -extern void _mesa_destroy_shader_compiler(void); -extern void _mesa_destroy_shader_compiler_caches(void); - -#ifdef __cplusplus -} -#endif - - -#endif /* GLSL_PARSER_EXTRAS_H */ diff --git a/src/glsl/glsl_symbol_table.cpp b/src/glsl/glsl_symbol_table.cpp deleted file mode 100644 index 6c682acf560..00000000000 --- a/src/glsl/glsl_symbol_table.cpp +++ /dev/null @@ -1,280 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "glsl_symbol_table.h" -#include "ast.h" - -class symbol_table_entry { -public: - DECLARE_RALLOC_CXX_OPERATORS(symbol_table_entry); - - bool add_interface(const glsl_type *i, enum ir_variable_mode mode) - { - const glsl_type **dest; - - switch (mode) { - case ir_var_uniform: - dest = &ibu; - break; - case ir_var_shader_storage: - dest = &iss; - break; - case ir_var_shader_in: - dest = &ibi; - break; - case ir_var_shader_out: - dest = &ibo; - break; - default: - assert(!"Unsupported interface variable mode!"); - return false; - } - - if (*dest != NULL) { - return false; - } else { - *dest = i; - return true; - } - } - - const glsl_type *get_interface(enum ir_variable_mode mode) - { - switch (mode) { - case ir_var_uniform: - return ibu; - case ir_var_shader_storage: - return iss; - case ir_var_shader_in: - return ibi; - case ir_var_shader_out: - return ibo; - default: - assert(!"Unsupported interface variable mode!"); - return NULL; - } - } - - symbol_table_entry(ir_variable *v) : - v(v), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {} - symbol_table_entry(ir_function *f) : - v(0), f(f), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) {} - symbol_table_entry(const glsl_type *t) : - v(0), f(0), t(t), ibu(0), iss(0), ibi(0), ibo(0), a(0) {} - symbol_table_entry(const glsl_type *t, enum ir_variable_mode mode) : - v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(0) - { - assert(t->is_interface()); - add_interface(t, mode); - } - symbol_table_entry(const class ast_type_specifier *a): - v(0), f(0), t(0), ibu(0), iss(0), ibi(0), ibo(0), a(a) {} - - ir_variable *v; - ir_function *f; - const glsl_type *t; - const glsl_type *ibu; - const glsl_type *iss; - const glsl_type *ibi; - const glsl_type *ibo; - const class ast_type_specifier *a; -}; - -glsl_symbol_table::glsl_symbol_table() -{ - this->separate_function_namespace = false; - this->table = _mesa_symbol_table_ctor(); - this->mem_ctx = ralloc_context(NULL); -} - -glsl_symbol_table::~glsl_symbol_table() -{ - _mesa_symbol_table_dtor(table); - ralloc_free(mem_ctx); -} - -void glsl_symbol_table::push_scope() -{ - _mesa_symbol_table_push_scope(table); -} - -void glsl_symbol_table::pop_scope() -{ - _mesa_symbol_table_pop_scope(table); -} - -bool glsl_symbol_table::name_declared_this_scope(const char *name) -{ - return _mesa_symbol_table_symbol_scope(table, -1, name) == 0; -} - -bool glsl_symbol_table::add_variable(ir_variable *v) -{ - assert(v->data.mode != ir_var_temporary); - - if (this->separate_function_namespace) { - /* In 1.10, functions and variables have separate namespaces. */ - symbol_table_entry *existing = get_entry(v->name); - if (name_declared_this_scope(v->name)) { - /* If there's already an existing function (not a constructor!) in - * the current scope, just update the existing entry to include 'v'. - */ - if (existing->v == NULL && existing->t == NULL) { - existing->v = v; - return true; - } - } else { - /* If not declared at this scope, add a new entry. But if an existing - * entry includes a function, propagate that to this block - otherwise - * the new variable declaration would shadow the function. - */ - symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v); - if (existing != NULL) - entry->f = existing->f; - int added = _mesa_symbol_table_add_symbol(table, -1, v->name, entry); - assert(added == 0); - (void)added; - return true; - } - return false; - } - - /* 1.20+ rules: */ - symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(v); - return _mesa_symbol_table_add_symbol(table, -1, v->name, entry) == 0; -} - -bool glsl_symbol_table::add_type(const char *name, const glsl_type *t) -{ - symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(t); - return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0; -} - -bool glsl_symbol_table::add_interface(const char *name, const glsl_type *i, - enum ir_variable_mode mode) -{ - assert(i->is_interface()); - symbol_table_entry *entry = get_entry(name); - if (entry == NULL) { - symbol_table_entry *entry = - new(mem_ctx) symbol_table_entry(i, mode); - bool add_interface_symbol_result = - _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0; - assert(add_interface_symbol_result); - return add_interface_symbol_result; - } else { - return entry->add_interface(i, mode); - } -} - -bool glsl_symbol_table::add_function(ir_function *f) -{ - if (this->separate_function_namespace && name_declared_this_scope(f->name)) { - /* In 1.10, functions and variables have separate namespaces. */ - symbol_table_entry *existing = get_entry(f->name); - if ((existing->f == NULL) && (existing->t == NULL)) { - existing->f = f; - return true; - } - } - symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f); - return _mesa_symbol_table_add_symbol(table, -1, f->name, entry) == 0; -} - -bool glsl_symbol_table::add_default_precision_qualifier(const char *type_name, - int precision) -{ - char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name); - - ast_type_specifier *default_specifier = new(mem_ctx) ast_type_specifier(name); - default_specifier->default_precision = precision; - - symbol_table_entry *entry = - new(mem_ctx) symbol_table_entry(default_specifier); - - return _mesa_symbol_table_add_symbol(table, -1, name, entry) == 0; -} - -void glsl_symbol_table::add_global_function(ir_function *f) -{ - symbol_table_entry *entry = new(mem_ctx) symbol_table_entry(f); - int added = _mesa_symbol_table_add_global_symbol(table, -1, f->name, entry); - assert(added == 0); - (void)added; -} - -ir_variable *glsl_symbol_table::get_variable(const char *name) -{ - symbol_table_entry *entry = get_entry(name); - return entry != NULL ? entry->v : NULL; -} - -const glsl_type *glsl_symbol_table::get_type(const char *name) -{ - symbol_table_entry *entry = get_entry(name); - return entry != NULL ? entry->t : NULL; -} - -const glsl_type *glsl_symbol_table::get_interface(const char *name, - enum ir_variable_mode mode) -{ - symbol_table_entry *entry = get_entry(name); - return entry != NULL ? entry->get_interface(mode) : NULL; -} - -ir_function *glsl_symbol_table::get_function(const char *name) -{ - symbol_table_entry *entry = get_entry(name); - return entry != NULL ? entry->f : NULL; -} - -int glsl_symbol_table::get_default_precision_qualifier(const char *type_name) -{ - char *name = ralloc_asprintf(mem_ctx, "#default_precision_%s", type_name); - symbol_table_entry *entry = get_entry(name); - if (!entry) - return ast_precision_none; - return entry->a->default_precision; -} - -symbol_table_entry *glsl_symbol_table::get_entry(const char *name) -{ - return (symbol_table_entry *) - _mesa_symbol_table_find_symbol(table, -1, name); -} - -void -glsl_symbol_table::disable_variable(const char *name) -{ - /* Ideally we would remove the variable's entry from the symbol table, but - * that would be difficult. Fortunately, since this is only used for - * built-in variables, it won't be possible for the shader to re-introduce - * the variable later, so all we really need to do is to make sure that - * further attempts to access it using get_variable() will return NULL. - */ - symbol_table_entry *entry = get_entry(name); - if (entry != NULL) { - entry->v = NULL; - } -} diff --git a/src/glsl/glsl_symbol_table.h b/src/glsl/glsl_symbol_table.h deleted file mode 100644 index 5d654e5e6a7..00000000000 --- a/src/glsl/glsl_symbol_table.h +++ /dev/null @@ -1,110 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef GLSL_SYMBOL_TABLE -#define GLSL_SYMBOL_TABLE - -#include - -extern "C" { -#include "program/symbol_table.h" -} -#include "ir.h" - -class symbol_table_entry; -struct glsl_type; - -/** - * Facade class for _mesa_symbol_table - * - * Wraps the existing \c _mesa_symbol_table data structure to enforce some - * type safe and some symbol table invariants. - */ -struct glsl_symbol_table { - DECLARE_RALLOC_CXX_OPERATORS(glsl_symbol_table) - - glsl_symbol_table(); - ~glsl_symbol_table(); - - /* In 1.10, functions and variables have separate namespaces. */ - bool separate_function_namespace; - - void push_scope(); - void pop_scope(); - - /** - * Determine whether a name was declared at the current scope - */ - bool name_declared_this_scope(const char *name); - - /** - * \name Methods to add symbols to the table - * - * There is some temptation to rename all these functions to \c add_symbol - * or similar. However, this breaks symmetry with the getter functions and - * reduces the clarity of the intention of code that uses these methods. - */ - /*@{*/ - bool add_variable(ir_variable *v); - bool add_type(const char *name, const glsl_type *t); - bool add_function(ir_function *f); - bool add_interface(const char *name, const glsl_type *i, - enum ir_variable_mode mode); - bool add_default_precision_qualifier(const char *type_name, int precision); - /*@}*/ - - /** - * Add an function at global scope without checking for scoping conflicts. - */ - void add_global_function(ir_function *f); - - /** - * \name Methods to get symbols from the table - */ - /*@{*/ - ir_variable *get_variable(const char *name); - const glsl_type *get_type(const char *name); - ir_function *get_function(const char *name); - const glsl_type *get_interface(const char *name, - enum ir_variable_mode mode); - int get_default_precision_qualifier(const char *type_name); - /*@}*/ - - /** - * Disable a previously-added variable so that it no longer appears to be - * in the symbol table. This is necessary when gl_PerVertex is redeclared, - * to ensure that previously-available built-in variables are no longer - * available. - */ - void disable_variable(const char *name); - -private: - symbol_table_entry *get_entry(const char *name); - - struct _mesa_symbol_table *table; - void *mem_ctx; -}; - -#endif /* GLSL_SYMBOL_TABLE */ diff --git a/src/glsl/hir_field_selection.cpp b/src/glsl/hir_field_selection.cpp deleted file mode 100644 index eab08ad8235..00000000000 --- a/src/glsl/hir_field_selection.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir.h" -#include "glsl_parser_extras.h" -#include "ast.h" -#include "compiler/glsl_types.h" - -ir_rvalue * -_mesa_ast_field_selection_to_hir(const ast_expression *expr, - exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - void *ctx = state; - ir_rvalue *result = NULL; - ir_rvalue *op; - - op = expr->subexpressions[0]->hir(instructions, state); - - /* There are two kinds of field selection. There is the selection of a - * specific field from a structure, and there is the selection of a - * swizzle / mask from a vector. Which is which is determined entirely - * by the base type of the thing to which the field selection operator is - * being applied. - */ - YYLTYPE loc = expr->get_location(); - if (op->type->is_error()) { - /* silently propagate the error */ - } else if (op->type->base_type == GLSL_TYPE_STRUCT - || op->type->base_type == GLSL_TYPE_INTERFACE) { - result = new(ctx) ir_dereference_record(op, - expr->primary_expression.identifier); - - if (result->type->is_error()) { - _mesa_glsl_error(& loc, state, "cannot access field `%s' of " - "structure", - expr->primary_expression.identifier); - } - } else if (op->type->is_vector() || - (state->has_420pack() && op->type->is_scalar())) { - ir_swizzle *swiz = ir_swizzle::create(op, - expr->primary_expression.identifier, - op->type->vector_elements); - if (swiz != NULL) { - result = swiz; - } else { - /* FINISHME: Logging of error messages should be moved into - * FINISHME: ir_swizzle::create. This allows the generation of more - * FINISHME: specific error messages. - */ - _mesa_glsl_error(& loc, state, "invalid swizzle / mask `%s'", - expr->primary_expression.identifier); - } - } else { - _mesa_glsl_error(& loc, state, "cannot access field `%s' of " - "non-structure / non-vector", - expr->primary_expression.identifier); - } - - return result ? result : ir_rvalue::error_value(ctx); -} diff --git a/src/glsl/ir.cpp b/src/glsl/ir.cpp deleted file mode 100644 index de9d314bae4..00000000000 --- a/src/glsl/ir.cpp +++ /dev/null @@ -1,2039 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "main/core.h" /* for MAX2 */ -#include "ir.h" -#include "compiler/glsl_types.h" - -ir_rvalue::ir_rvalue(enum ir_node_type t) - : ir_instruction(t) -{ - this->type = glsl_type::error_type; -} - -bool ir_rvalue::is_zero() const -{ - return false; -} - -bool ir_rvalue::is_one() const -{ - return false; -} - -bool ir_rvalue::is_negative_one() const -{ - return false; -} - -/** - * Modify the swizzle make to move one component to another - * - * \param m IR swizzle to be modified - * \param from Component in the RHS that is to be swizzled - * \param to Desired swizzle location of \c from - */ -static void -update_rhs_swizzle(ir_swizzle_mask &m, unsigned from, unsigned to) -{ - switch (to) { - case 0: m.x = from; break; - case 1: m.y = from; break; - case 2: m.z = from; break; - case 3: m.w = from; break; - default: assert(!"Should not get here."); - } -} - -void -ir_assignment::set_lhs(ir_rvalue *lhs) -{ - void *mem_ctx = this; - bool swizzled = false; - - while (lhs != NULL) { - ir_swizzle *swiz = lhs->as_swizzle(); - - if (swiz == NULL) - break; - - unsigned write_mask = 0; - ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 }; - - for (unsigned i = 0; i < swiz->mask.num_components; i++) { - unsigned c = 0; - - switch (i) { - case 0: c = swiz->mask.x; break; - case 1: c = swiz->mask.y; break; - case 2: c = swiz->mask.z; break; - case 3: c = swiz->mask.w; break; - default: assert(!"Should not get here."); - } - - write_mask |= (((this->write_mask >> i) & 1) << c); - update_rhs_swizzle(rhs_swiz, i, c); - rhs_swiz.num_components = swiz->val->type->vector_elements; - } - - this->write_mask = write_mask; - lhs = swiz->val; - - this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz); - swizzled = true; - } - - if (swizzled) { - /* Now, RHS channels line up with the LHS writemask. Collapse it - * to just the channels that will be written. - */ - ir_swizzle_mask rhs_swiz = { 0, 0, 0, 0, 0, 0 }; - int rhs_chan = 0; - for (int i = 0; i < 4; i++) { - if (write_mask & (1 << i)) - update_rhs_swizzle(rhs_swiz, i, rhs_chan++); - } - rhs_swiz.num_components = rhs_chan; - this->rhs = new(mem_ctx) ir_swizzle(this->rhs, rhs_swiz); - } - - assert((lhs == NULL) || lhs->as_dereference()); - - this->lhs = (ir_dereference *) lhs; -} - -ir_variable * -ir_assignment::whole_variable_written() -{ - ir_variable *v = this->lhs->whole_variable_referenced(); - - if (v == NULL) - return NULL; - - if (v->type->is_scalar()) - return v; - - if (v->type->is_vector()) { - const unsigned mask = (1U << v->type->vector_elements) - 1; - - if (mask != this->write_mask) - return NULL; - } - - /* Either all the vector components are assigned or the variable is some - * composite type (and the whole thing is assigned. - */ - return v; -} - -ir_assignment::ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, - ir_rvalue *condition, unsigned write_mask) - : ir_instruction(ir_type_assignment) -{ - this->condition = condition; - this->rhs = rhs; - this->lhs = lhs; - this->write_mask = write_mask; - - if (lhs->type->is_scalar() || lhs->type->is_vector()) { - int lhs_components = 0; - for (int i = 0; i < 4; i++) { - if (write_mask & (1 << i)) - lhs_components++; - } - - assert(lhs_components == this->rhs->type->vector_elements); - } -} - -ir_assignment::ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, - ir_rvalue *condition) - : ir_instruction(ir_type_assignment) -{ - this->condition = condition; - this->rhs = rhs; - - /* If the RHS is a vector type, assume that all components of the vector - * type are being written to the LHS. The write mask comes from the RHS - * because we can have a case where the LHS is a vec4 and the RHS is a - * vec3. In that case, the assignment is: - * - * (assign (...) (xyz) (var_ref lhs) (var_ref rhs)) - */ - if (rhs->type->is_vector()) - this->write_mask = (1U << rhs->type->vector_elements) - 1; - else if (rhs->type->is_scalar()) - this->write_mask = 1; - else - this->write_mask = 0; - - this->set_lhs(lhs); -} - -ir_expression::ir_expression(int op, const struct glsl_type *type, - ir_rvalue *op0, ir_rvalue *op1, - ir_rvalue *op2, ir_rvalue *op3) - : ir_rvalue(ir_type_expression) -{ - this->type = type; - this->operation = ir_expression_operation(op); - this->operands[0] = op0; - this->operands[1] = op1; - this->operands[2] = op2; - this->operands[3] = op3; -#ifndef NDEBUG - int num_operands = get_num_operands(this->operation); - for (int i = num_operands; i < 4; i++) { - assert(this->operands[i] == NULL); - } -#endif -} - -ir_expression::ir_expression(int op, ir_rvalue *op0) - : ir_rvalue(ir_type_expression) -{ - this->operation = ir_expression_operation(op); - this->operands[0] = op0; - this->operands[1] = NULL; - this->operands[2] = NULL; - this->operands[3] = NULL; - - assert(op <= ir_last_unop); - - switch (this->operation) { - case ir_unop_bit_not: - case ir_unop_logic_not: - case ir_unop_neg: - case ir_unop_abs: - case ir_unop_sign: - case ir_unop_rcp: - case ir_unop_rsq: - case ir_unop_sqrt: - case ir_unop_exp: - case ir_unop_log: - case ir_unop_exp2: - case ir_unop_log2: - case ir_unop_trunc: - case ir_unop_ceil: - case ir_unop_floor: - case ir_unop_fract: - case ir_unop_round_even: - case ir_unop_sin: - case ir_unop_cos: - case ir_unop_dFdx: - case ir_unop_dFdx_coarse: - case ir_unop_dFdx_fine: - case ir_unop_dFdy: - case ir_unop_dFdy_coarse: - case ir_unop_dFdy_fine: - case ir_unop_bitfield_reverse: - case ir_unop_interpolate_at_centroid: - case ir_unop_saturate: - this->type = op0->type; - break; - - case ir_unop_f2i: - case ir_unop_b2i: - case ir_unop_u2i: - case ir_unop_d2i: - case ir_unop_bitcast_f2i: - case ir_unop_bit_count: - case ir_unop_find_msb: - case ir_unop_find_lsb: - case ir_unop_subroutine_to_int: - this->type = glsl_type::get_instance(GLSL_TYPE_INT, - op0->type->vector_elements, 1); - break; - - case ir_unop_b2f: - case ir_unop_i2f: - case ir_unop_u2f: - case ir_unop_d2f: - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_u2f: - this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, - op0->type->vector_elements, 1); - break; - - case ir_unop_f2b: - case ir_unop_i2b: - case ir_unop_d2b: - this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, - op0->type->vector_elements, 1); - break; - - case ir_unop_f2d: - case ir_unop_i2d: - case ir_unop_u2d: - this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, - op0->type->vector_elements, 1); - break; - - case ir_unop_i2u: - case ir_unop_f2u: - case ir_unop_d2u: - case ir_unop_bitcast_f2u: - this->type = glsl_type::get_instance(GLSL_TYPE_UINT, - op0->type->vector_elements, 1); - break; - - case ir_unop_noise: - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - this->type = glsl_type::float_type; - break; - - case ir_unop_unpack_double_2x32: - this->type = glsl_type::uvec2_type; - break; - - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_unorm_4x8: - case ir_unop_pack_half_2x16: - this->type = glsl_type::uint_type; - break; - - case ir_unop_pack_double_2x32: - this->type = glsl_type::double_type; - break; - - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_half_2x16: - this->type = glsl_type::vec2_type; - break; - - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_4x8: - this->type = glsl_type::vec4_type; - break; - - case ir_unop_frexp_sig: - this->type = op0->type; - break; - case ir_unop_frexp_exp: - this->type = glsl_type::get_instance(GLSL_TYPE_INT, - op0->type->vector_elements, 1); - break; - - case ir_unop_get_buffer_size: - case ir_unop_ssbo_unsized_array_length: - this->type = glsl_type::int_type; - break; - - default: - assert(!"not reached: missing automatic type setup for ir_expression"); - this->type = op0->type; - break; - } -} - -ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1) - : ir_rvalue(ir_type_expression) -{ - this->operation = ir_expression_operation(op); - this->operands[0] = op0; - this->operands[1] = op1; - this->operands[2] = NULL; - this->operands[3] = NULL; - - assert(op > ir_last_unop); - - switch (this->operation) { - case ir_binop_all_equal: - case ir_binop_any_nequal: - this->type = glsl_type::bool_type; - break; - - case ir_binop_add: - case ir_binop_sub: - case ir_binop_min: - case ir_binop_max: - case ir_binop_pow: - case ir_binop_mul: - case ir_binop_div: - case ir_binop_mod: - if (op0->type->is_scalar()) { - this->type = op1->type; - } else if (op1->type->is_scalar()) { - this->type = op0->type; - } else { - if (this->operation == ir_binop_mul) { - this->type = glsl_type::get_mul_type(op0->type, op1->type); - } else { - assert(op0->type == op1->type); - this->type = op0->type; - } - } - break; - - case ir_binop_logic_and: - case ir_binop_logic_xor: - case ir_binop_logic_or: - case ir_binop_bit_and: - case ir_binop_bit_xor: - case ir_binop_bit_or: - assert(!op0->type->is_matrix()); - assert(!op1->type->is_matrix()); - if (op0->type->is_scalar()) { - this->type = op1->type; - } else if (op1->type->is_scalar()) { - this->type = op0->type; - } else { - assert(op0->type->vector_elements == op1->type->vector_elements); - this->type = op0->type; - } - break; - - case ir_binop_equal: - case ir_binop_nequal: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_greater: - assert(op0->type == op1->type); - this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, - op0->type->vector_elements, 1); - break; - - case ir_binop_dot: - this->type = op0->type->get_base_type(); - break; - - case ir_binop_pack_half_2x16_split: - this->type = glsl_type::uint_type; - break; - - case ir_binop_imul_high: - case ir_binop_carry: - case ir_binop_borrow: - case ir_binop_lshift: - case ir_binop_rshift: - case ir_binop_ldexp: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - this->type = op0->type; - break; - - case ir_binop_vector_extract: - this->type = op0->type->get_scalar_type(); - break; - - default: - assert(!"not reached: missing automatic type setup for ir_expression"); - this->type = glsl_type::float_type; - } -} - -ir_expression::ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, - ir_rvalue *op2) - : ir_rvalue(ir_type_expression) -{ - this->operation = ir_expression_operation(op); - this->operands[0] = op0; - this->operands[1] = op1; - this->operands[2] = op2; - this->operands[3] = NULL; - - assert(op > ir_last_binop && op <= ir_last_triop); - - switch (this->operation) { - case ir_triop_fma: - case ir_triop_lrp: - case ir_triop_bitfield_extract: - case ir_triop_vector_insert: - this->type = op0->type; - break; - - case ir_triop_csel: - this->type = op1->type; - break; - - default: - assert(!"not reached: missing automatic type setup for ir_expression"); - this->type = glsl_type::float_type; - } -} - -unsigned int -ir_expression::get_num_operands(ir_expression_operation op) -{ - assert(op <= ir_last_opcode); - - if (op <= ir_last_unop) - return 1; - - if (op <= ir_last_binop) - return 2; - - if (op <= ir_last_triop) - return 3; - - if (op <= ir_last_quadop) - return 4; - - assert(false); - return 0; -} - -static const char *const operator_strs[] = { - "~", - "!", - "neg", - "abs", - "sign", - "rcp", - "rsq", - "sqrt", - "exp", - "log", - "exp2", - "log2", - "f2i", - "f2u", - "i2f", - "f2b", - "b2f", - "i2b", - "b2i", - "u2f", - "i2u", - "u2i", - "d2f", - "f2d", - "d2i", - "i2d", - "d2u", - "u2d", - "d2b", - "bitcast_i2f", - "bitcast_f2i", - "bitcast_u2f", - "bitcast_f2u", - "trunc", - "ceil", - "floor", - "fract", - "round_even", - "sin", - "cos", - "dFdx", - "dFdxCoarse", - "dFdxFine", - "dFdy", - "dFdyCoarse", - "dFdyFine", - "packSnorm2x16", - "packSnorm4x8", - "packUnorm2x16", - "packUnorm4x8", - "packHalf2x16", - "unpackSnorm2x16", - "unpackSnorm4x8", - "unpackUnorm2x16", - "unpackUnorm4x8", - "unpackHalf2x16", - "unpackHalf2x16_split_x", - "unpackHalf2x16_split_y", - "bitfield_reverse", - "bit_count", - "find_msb", - "find_lsb", - "sat", - "packDouble2x32", - "unpackDouble2x32", - "frexp_sig", - "frexp_exp", - "noise", - "subroutine_to_int", - "interpolate_at_centroid", - "get_buffer_size", - "ssbo_unsized_array_length", - "+", - "-", - "*", - "imul_high", - "/", - "carry", - "borrow", - "%", - "<", - ">", - "<=", - ">=", - "==", - "!=", - "all_equal", - "any_nequal", - "<<", - ">>", - "&", - "^", - "|", - "&&", - "^^", - "||", - "dot", - "min", - "max", - "pow", - "packHalf2x16_split", - "ubo_load", - "ldexp", - "vector_extract", - "interpolate_at_offset", - "interpolate_at_sample", - "fma", - "lrp", - "csel", - "bitfield_extract", - "vector_insert", - "bitfield_insert", - "vector", -}; - -const char *ir_expression::operator_string(ir_expression_operation op) -{ - assert((unsigned int) op < ARRAY_SIZE(operator_strs)); - assert(ARRAY_SIZE(operator_strs) == (ir_quadop_vector + 1)); - return operator_strs[op]; -} - -const char *ir_expression::operator_string() -{ - return operator_string(this->operation); -} - -const char* -depth_layout_string(ir_depth_layout layout) -{ - switch(layout) { - case ir_depth_layout_none: return ""; - case ir_depth_layout_any: return "depth_any"; - case ir_depth_layout_greater: return "depth_greater"; - case ir_depth_layout_less: return "depth_less"; - case ir_depth_layout_unchanged: return "depth_unchanged"; - - default: - assert(0); - return ""; - } -} - -ir_expression_operation -ir_expression::get_operator(const char *str) -{ - const int operator_count = sizeof(operator_strs) / sizeof(operator_strs[0]); - for (int op = 0; op < operator_count; op++) { - if (strcmp(str, operator_strs[op]) == 0) - return (ir_expression_operation) op; - } - return (ir_expression_operation) -1; -} - -ir_variable * -ir_expression::variable_referenced() const -{ - switch (operation) { - case ir_binop_vector_extract: - case ir_triop_vector_insert: - /* We get these for things like a[0] where a is a vector type. In these - * cases we want variable_referenced() to return the actual vector - * variable this is wrapping. - */ - return operands[0]->variable_referenced(); - default: - return ir_rvalue::variable_referenced(); - } -} - -ir_constant::ir_constant() - : ir_rvalue(ir_type_constant) -{ -} - -ir_constant::ir_constant(const struct glsl_type *type, - const ir_constant_data *data) - : ir_rvalue(ir_type_constant) -{ - assert((type->base_type >= GLSL_TYPE_UINT) - && (type->base_type <= GLSL_TYPE_BOOL)); - - this->type = type; - memcpy(& this->value, data, sizeof(this->value)); -} - -ir_constant::ir_constant(float f, unsigned vector_elements) - : ir_rvalue(ir_type_constant) -{ - assert(vector_elements <= 4); - this->type = glsl_type::get_instance(GLSL_TYPE_FLOAT, vector_elements, 1); - for (unsigned i = 0; i < vector_elements; i++) { - this->value.f[i] = f; - } - for (unsigned i = vector_elements; i < 16; i++) { - this->value.f[i] = 0; - } -} - -ir_constant::ir_constant(double d, unsigned vector_elements) - : ir_rvalue(ir_type_constant) -{ - assert(vector_elements <= 4); - this->type = glsl_type::get_instance(GLSL_TYPE_DOUBLE, vector_elements, 1); - for (unsigned i = 0; i < vector_elements; i++) { - this->value.d[i] = d; - } - for (unsigned i = vector_elements; i < 16; i++) { - this->value.d[i] = 0.0; - } -} - -ir_constant::ir_constant(unsigned int u, unsigned vector_elements) - : ir_rvalue(ir_type_constant) -{ - assert(vector_elements <= 4); - this->type = glsl_type::get_instance(GLSL_TYPE_UINT, vector_elements, 1); - for (unsigned i = 0; i < vector_elements; i++) { - this->value.u[i] = u; - } - for (unsigned i = vector_elements; i < 16; i++) { - this->value.u[i] = 0; - } -} - -ir_constant::ir_constant(int integer, unsigned vector_elements) - : ir_rvalue(ir_type_constant) -{ - assert(vector_elements <= 4); - this->type = glsl_type::get_instance(GLSL_TYPE_INT, vector_elements, 1); - for (unsigned i = 0; i < vector_elements; i++) { - this->value.i[i] = integer; - } - for (unsigned i = vector_elements; i < 16; i++) { - this->value.i[i] = 0; - } -} - -ir_constant::ir_constant(bool b, unsigned vector_elements) - : ir_rvalue(ir_type_constant) -{ - assert(vector_elements <= 4); - this->type = glsl_type::get_instance(GLSL_TYPE_BOOL, vector_elements, 1); - for (unsigned i = 0; i < vector_elements; i++) { - this->value.b[i] = b; - } - for (unsigned i = vector_elements; i < 16; i++) { - this->value.b[i] = false; - } -} - -ir_constant::ir_constant(const ir_constant *c, unsigned i) - : ir_rvalue(ir_type_constant) -{ - this->type = c->type->get_base_type(); - - switch (this->type->base_type) { - case GLSL_TYPE_UINT: this->value.u[0] = c->value.u[i]; break; - case GLSL_TYPE_INT: this->value.i[0] = c->value.i[i]; break; - case GLSL_TYPE_FLOAT: this->value.f[0] = c->value.f[i]; break; - case GLSL_TYPE_BOOL: this->value.b[0] = c->value.b[i]; break; - case GLSL_TYPE_DOUBLE: this->value.d[0] = c->value.d[i]; break; - default: assert(!"Should not get here."); break; - } -} - -ir_constant::ir_constant(const struct glsl_type *type, exec_list *value_list) - : ir_rvalue(ir_type_constant) -{ - this->type = type; - - assert(type->is_scalar() || type->is_vector() || type->is_matrix() - || type->is_record() || type->is_array()); - - if (type->is_array()) { - this->array_elements = ralloc_array(this, ir_constant *, type->length); - unsigned i = 0; - foreach_in_list(ir_constant, value, value_list) { - assert(value->as_constant() != NULL); - - this->array_elements[i++] = value; - } - return; - } - - /* If the constant is a record, the types of each of the entries in - * value_list must be a 1-for-1 match with the structure components. Each - * entry must also be a constant. Just move the nodes from the value_list - * to the list in the ir_constant. - */ - /* FINISHME: Should there be some type checking and / or assertions here? */ - /* FINISHME: Should the new constant take ownership of the nodes from - * FINISHME: value_list, or should it make copies? - */ - if (type->is_record()) { - value_list->move_nodes_to(& this->components); - return; - } - - for (unsigned i = 0; i < 16; i++) { - this->value.u[i] = 0; - } - - ir_constant *value = (ir_constant *) (value_list->head); - - /* Constructors with exactly one scalar argument are special for vectors - * and matrices. For vectors, the scalar value is replicated to fill all - * the components. For matrices, the scalar fills the components of the - * diagonal while the rest is filled with 0. - */ - if (value->type->is_scalar() && value->next->is_tail_sentinel()) { - if (type->is_matrix()) { - /* Matrix - fill diagonal (rest is already set to 0) */ - assert(type->base_type == GLSL_TYPE_FLOAT || - type->base_type == GLSL_TYPE_DOUBLE); - for (unsigned i = 0; i < type->matrix_columns; i++) { - if (type->base_type == GLSL_TYPE_FLOAT) - this->value.f[i * type->vector_elements + i] = - value->value.f[0]; - else - this->value.d[i * type->vector_elements + i] = - value->value.d[0]; - } - } else { - /* Vector or scalar - fill all components */ - switch (type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - for (unsigned i = 0; i < type->components(); i++) - this->value.u[i] = value->value.u[0]; - break; - case GLSL_TYPE_FLOAT: - for (unsigned i = 0; i < type->components(); i++) - this->value.f[i] = value->value.f[0]; - break; - case GLSL_TYPE_DOUBLE: - for (unsigned i = 0; i < type->components(); i++) - this->value.d[i] = value->value.d[0]; - break; - case GLSL_TYPE_BOOL: - for (unsigned i = 0; i < type->components(); i++) - this->value.b[i] = value->value.b[0]; - break; - default: - assert(!"Should not get here."); - break; - } - } - return; - } - - if (type->is_matrix() && value->type->is_matrix()) { - assert(value->next->is_tail_sentinel()); - - /* From section 5.4.2 of the GLSL 1.20 spec: - * "If a matrix is constructed from a matrix, then each component - * (column i, row j) in the result that has a corresponding component - * (column i, row j) in the argument will be initialized from there." - */ - unsigned cols = MIN2(type->matrix_columns, value->type->matrix_columns); - unsigned rows = MIN2(type->vector_elements, value->type->vector_elements); - for (unsigned i = 0; i < cols; i++) { - for (unsigned j = 0; j < rows; j++) { - const unsigned src = i * value->type->vector_elements + j; - const unsigned dst = i * type->vector_elements + j; - this->value.f[dst] = value->value.f[src]; - } - } - - /* "All other components will be initialized to the identity matrix." */ - for (unsigned i = cols; i < type->matrix_columns; i++) - this->value.f[i * type->vector_elements + i] = 1.0; - - return; - } - - /* Use each component from each entry in the value_list to initialize one - * component of the constant being constructed. - */ - for (unsigned i = 0; i < type->components(); /* empty */) { - assert(value->as_constant() != NULL); - assert(!value->is_tail_sentinel()); - - for (unsigned j = 0; j < value->type->components(); j++) { - switch (type->base_type) { - case GLSL_TYPE_UINT: - this->value.u[i] = value->get_uint_component(j); - break; - case GLSL_TYPE_INT: - this->value.i[i] = value->get_int_component(j); - break; - case GLSL_TYPE_FLOAT: - this->value.f[i] = value->get_float_component(j); - break; - case GLSL_TYPE_BOOL: - this->value.b[i] = value->get_bool_component(j); - break; - case GLSL_TYPE_DOUBLE: - this->value.d[i] = value->get_double_component(j); - break; - default: - /* FINISHME: What to do? Exceptions are not the answer. - */ - break; - } - - i++; - if (i >= type->components()) - break; - } - - value = (ir_constant *) value->next; - } -} - -ir_constant * -ir_constant::zero(void *mem_ctx, const glsl_type *type) -{ - assert(type->is_scalar() || type->is_vector() || type->is_matrix() - || type->is_record() || type->is_array()); - - ir_constant *c = new(mem_ctx) ir_constant; - c->type = type; - memset(&c->value, 0, sizeof(c->value)); - - if (type->is_array()) { - c->array_elements = ralloc_array(c, ir_constant *, type->length); - - for (unsigned i = 0; i < type->length; i++) - c->array_elements[i] = ir_constant::zero(c, type->fields.array); - } - - if (type->is_record()) { - for (unsigned i = 0; i < type->length; i++) { - ir_constant *comp = ir_constant::zero(mem_ctx, type->fields.structure[i].type); - c->components.push_tail(comp); - } - } - - return c; -} - -bool -ir_constant::get_bool_component(unsigned i) const -{ - switch (this->type->base_type) { - case GLSL_TYPE_UINT: return this->value.u[i] != 0; - case GLSL_TYPE_INT: return this->value.i[i] != 0; - case GLSL_TYPE_FLOAT: return ((int)this->value.f[i]) != 0; - case GLSL_TYPE_BOOL: return this->value.b[i]; - case GLSL_TYPE_DOUBLE: return this->value.d[i] != 0.0; - default: assert(!"Should not get here."); break; - } - - /* Must return something to make the compiler happy. This is clearly an - * error case. - */ - return false; -} - -float -ir_constant::get_float_component(unsigned i) const -{ - switch (this->type->base_type) { - case GLSL_TYPE_UINT: return (float) this->value.u[i]; - case GLSL_TYPE_INT: return (float) this->value.i[i]; - case GLSL_TYPE_FLOAT: return this->value.f[i]; - case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0f : 0.0f; - case GLSL_TYPE_DOUBLE: return (float) this->value.d[i]; - default: assert(!"Should not get here."); break; - } - - /* Must return something to make the compiler happy. This is clearly an - * error case. - */ - return 0.0; -} - -double -ir_constant::get_double_component(unsigned i) const -{ - switch (this->type->base_type) { - case GLSL_TYPE_UINT: return (double) this->value.u[i]; - case GLSL_TYPE_INT: return (double) this->value.i[i]; - case GLSL_TYPE_FLOAT: return (double) this->value.f[i]; - case GLSL_TYPE_BOOL: return this->value.b[i] ? 1.0 : 0.0; - case GLSL_TYPE_DOUBLE: return this->value.d[i]; - default: assert(!"Should not get here."); break; - } - - /* Must return something to make the compiler happy. This is clearly an - * error case. - */ - return 0.0; -} - -int -ir_constant::get_int_component(unsigned i) const -{ - switch (this->type->base_type) { - case GLSL_TYPE_UINT: return this->value.u[i]; - case GLSL_TYPE_INT: return this->value.i[i]; - case GLSL_TYPE_FLOAT: return (int) this->value.f[i]; - case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; - case GLSL_TYPE_DOUBLE: return (int) this->value.d[i]; - default: assert(!"Should not get here."); break; - } - - /* Must return something to make the compiler happy. This is clearly an - * error case. - */ - return 0; -} - -unsigned -ir_constant::get_uint_component(unsigned i) const -{ - switch (this->type->base_type) { - case GLSL_TYPE_UINT: return this->value.u[i]; - case GLSL_TYPE_INT: return this->value.i[i]; - case GLSL_TYPE_FLOAT: return (unsigned) this->value.f[i]; - case GLSL_TYPE_BOOL: return this->value.b[i] ? 1 : 0; - case GLSL_TYPE_DOUBLE: return (unsigned) this->value.d[i]; - default: assert(!"Should not get here."); break; - } - - /* Must return something to make the compiler happy. This is clearly an - * error case. - */ - return 0; -} - -ir_constant * -ir_constant::get_array_element(unsigned i) const -{ - assert(this->type->is_array()); - - /* From page 35 (page 41 of the PDF) of the GLSL 1.20 spec: - * - * "Behavior is undefined if a shader subscripts an array with an index - * less than 0 or greater than or equal to the size the array was - * declared with." - * - * Most out-of-bounds accesses are removed before things could get this far. - * There are cases where non-constant array index values can get constant - * folded. - */ - if (int(i) < 0) - i = 0; - else if (i >= this->type->length) - i = this->type->length - 1; - - return array_elements[i]; -} - -ir_constant * -ir_constant::get_record_field(const char *name) -{ - int idx = this->type->field_index(name); - - if (idx < 0) - return NULL; - - if (this->components.is_empty()) - return NULL; - - exec_node *node = this->components.head; - for (int i = 0; i < idx; i++) { - node = node->next; - - /* If the end of the list is encountered before the element matching the - * requested field is found, return NULL. - */ - if (node->is_tail_sentinel()) - return NULL; - } - - return (ir_constant *) node; -} - -void -ir_constant::copy_offset(ir_constant *src, int offset) -{ - switch (this->type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: { - unsigned int size = src->type->components(); - assert (size <= this->type->components() - offset); - for (unsigned int i=0; itype->base_type) { - case GLSL_TYPE_UINT: - value.u[i+offset] = src->get_uint_component(i); - break; - case GLSL_TYPE_INT: - value.i[i+offset] = src->get_int_component(i); - break; - case GLSL_TYPE_FLOAT: - value.f[i+offset] = src->get_float_component(i); - break; - case GLSL_TYPE_BOOL: - value.b[i+offset] = src->get_bool_component(i); - break; - case GLSL_TYPE_DOUBLE: - value.d[i+offset] = src->get_double_component(i); - break; - default: // Shut up the compiler - break; - } - } - break; - } - - case GLSL_TYPE_STRUCT: { - assert (src->type == this->type); - this->components.make_empty(); - foreach_in_list(ir_constant, orig, &src->components) { - this->components.push_tail(orig->clone(this, NULL)); - } - break; - } - - case GLSL_TYPE_ARRAY: { - assert (src->type == this->type); - for (unsigned i = 0; i < this->type->length; i++) { - this->array_elements[i] = src->array_elements[i]->clone(this, NULL); - } - break; - } - - default: - assert(!"Should not get here."); - break; - } -} - -void -ir_constant::copy_masked_offset(ir_constant *src, int offset, unsigned int mask) -{ - assert (!type->is_array() && !type->is_record()); - - if (!type->is_vector() && !type->is_matrix()) { - offset = 0; - mask = 1; - } - - int id = 0; - for (int i=0; i<4; i++) { - if (mask & (1 << i)) { - switch (this->type->base_type) { - case GLSL_TYPE_UINT: - value.u[i+offset] = src->get_uint_component(id++); - break; - case GLSL_TYPE_INT: - value.i[i+offset] = src->get_int_component(id++); - break; - case GLSL_TYPE_FLOAT: - value.f[i+offset] = src->get_float_component(id++); - break; - case GLSL_TYPE_BOOL: - value.b[i+offset] = src->get_bool_component(id++); - break; - case GLSL_TYPE_DOUBLE: - value.d[i+offset] = src->get_double_component(id++); - break; - default: - assert(!"Should not get here."); - return; - } - } - } -} - -bool -ir_constant::has_value(const ir_constant *c) const -{ - if (this->type != c->type) - return false; - - if (this->type->is_array()) { - for (unsigned i = 0; i < this->type->length; i++) { - if (!this->array_elements[i]->has_value(c->array_elements[i])) - return false; - } - return true; - } - - if (this->type->base_type == GLSL_TYPE_STRUCT) { - const exec_node *a_node = this->components.head; - const exec_node *b_node = c->components.head; - - while (!a_node->is_tail_sentinel()) { - assert(!b_node->is_tail_sentinel()); - - const ir_constant *const a_field = (ir_constant *) a_node; - const ir_constant *const b_field = (ir_constant *) b_node; - - if (!a_field->has_value(b_field)) - return false; - - a_node = a_node->next; - b_node = b_node->next; - } - - return true; - } - - for (unsigned i = 0; i < this->type->components(); i++) { - switch (this->type->base_type) { - case GLSL_TYPE_UINT: - if (this->value.u[i] != c->value.u[i]) - return false; - break; - case GLSL_TYPE_INT: - if (this->value.i[i] != c->value.i[i]) - return false; - break; - case GLSL_TYPE_FLOAT: - if (this->value.f[i] != c->value.f[i]) - return false; - break; - case GLSL_TYPE_BOOL: - if (this->value.b[i] != c->value.b[i]) - return false; - break; - case GLSL_TYPE_DOUBLE: - if (this->value.d[i] != c->value.d[i]) - return false; - break; - default: - assert(!"Should not get here."); - return false; - } - } - - return true; -} - -bool -ir_constant::is_value(float f, int i) const -{ - if (!this->type->is_scalar() && !this->type->is_vector()) - return false; - - /* Only accept boolean values for 0/1. */ - if (int(bool(i)) != i && this->type->is_boolean()) - return false; - - for (unsigned c = 0; c < this->type->vector_elements; c++) { - switch (this->type->base_type) { - case GLSL_TYPE_FLOAT: - if (this->value.f[c] != f) - return false; - break; - case GLSL_TYPE_INT: - if (this->value.i[c] != i) - return false; - break; - case GLSL_TYPE_UINT: - if (this->value.u[c] != unsigned(i)) - return false; - break; - case GLSL_TYPE_BOOL: - if (this->value.b[c] != bool(i)) - return false; - break; - case GLSL_TYPE_DOUBLE: - if (this->value.d[c] != double(f)) - return false; - break; - default: - /* The only other base types are structures, arrays, and samplers. - * Samplers cannot be constants, and the others should have been - * filtered out above. - */ - assert(!"Should not get here."); - return false; - } - } - - return true; -} - -bool -ir_constant::is_zero() const -{ - return is_value(0.0, 0); -} - -bool -ir_constant::is_one() const -{ - return is_value(1.0, 1); -} - -bool -ir_constant::is_negative_one() const -{ - return is_value(-1.0, -1); -} - -bool -ir_constant::is_uint16_constant() const -{ - if (!type->is_integer()) - return false; - - return value.u[0] < (1 << 16); -} - -ir_loop::ir_loop() - : ir_instruction(ir_type_loop) -{ -} - - -ir_dereference_variable::ir_dereference_variable(ir_variable *var) - : ir_dereference(ir_type_dereference_variable) -{ - assert(var != NULL); - - this->var = var; - this->type = var->type; -} - - -ir_dereference_array::ir_dereference_array(ir_rvalue *value, - ir_rvalue *array_index) - : ir_dereference(ir_type_dereference_array) -{ - this->array_index = array_index; - this->set_array(value); -} - - -ir_dereference_array::ir_dereference_array(ir_variable *var, - ir_rvalue *array_index) - : ir_dereference(ir_type_dereference_array) -{ - void *ctx = ralloc_parent(var); - - this->array_index = array_index; - this->set_array(new(ctx) ir_dereference_variable(var)); -} - - -void -ir_dereference_array::set_array(ir_rvalue *value) -{ - assert(value != NULL); - - this->array = value; - - const glsl_type *const vt = this->array->type; - - if (vt->is_array()) { - type = vt->fields.array; - } else if (vt->is_matrix()) { - type = vt->column_type(); - } else if (vt->is_vector()) { - type = vt->get_base_type(); - } -} - - -ir_dereference_record::ir_dereference_record(ir_rvalue *value, - const char *field) - : ir_dereference(ir_type_dereference_record) -{ - assert(value != NULL); - - this->record = value; - this->field = ralloc_strdup(this, field); - this->type = this->record->type->field_type(field); -} - - -ir_dereference_record::ir_dereference_record(ir_variable *var, - const char *field) - : ir_dereference(ir_type_dereference_record) -{ - void *ctx = ralloc_parent(var); - - this->record = new(ctx) ir_dereference_variable(var); - this->field = ralloc_strdup(this, field); - this->type = this->record->type->field_type(field); -} - -bool -ir_dereference::is_lvalue() const -{ - ir_variable *var = this->variable_referenced(); - - /* Every l-value derference chain eventually ends in a variable. - */ - if ((var == NULL) || var->data.read_only) - return false; - - /* From section 4.1.7 of the GLSL 4.40 spec: - * - * "Opaque variables cannot be treated as l-values; hence cannot - * be used as out or inout function parameters, nor can they be - * assigned into." - */ - if (this->type->contains_opaque()) - return false; - - return true; -} - - -static const char * const tex_opcode_strs[] = { "tex", "txb", "txl", "txd", "txf", "txf_ms", "txs", "lod", "tg4", "query_levels", "texture_samples", "samples_identical" }; - -const char *ir_texture::opcode_string() -{ - assert((unsigned int) op < ARRAY_SIZE(tex_opcode_strs)); - return tex_opcode_strs[op]; -} - -ir_texture_opcode -ir_texture::get_opcode(const char *str) -{ - const int count = sizeof(tex_opcode_strs) / sizeof(tex_opcode_strs[0]); - for (int op = 0; op < count; op++) { - if (strcmp(str, tex_opcode_strs[op]) == 0) - return (ir_texture_opcode) op; - } - return (ir_texture_opcode) -1; -} - - -void -ir_texture::set_sampler(ir_dereference *sampler, const glsl_type *type) -{ - assert(sampler != NULL); - assert(type != NULL); - this->sampler = sampler; - this->type = type; - - if (this->op == ir_txs || this->op == ir_query_levels || - this->op == ir_texture_samples) { - assert(type->base_type == GLSL_TYPE_INT); - } else if (this->op == ir_lod) { - assert(type->vector_elements == 2); - assert(type->base_type == GLSL_TYPE_FLOAT); - } else if (this->op == ir_samples_identical) { - assert(type == glsl_type::bool_type); - assert(sampler->type->base_type == GLSL_TYPE_SAMPLER); - assert(sampler->type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS); - } else { - assert(sampler->type->sampler_type == (int) type->base_type); - if (sampler->type->sampler_shadow) - assert(type->vector_elements == 4 || type->vector_elements == 1); - else - assert(type->vector_elements == 4); - } -} - - -void -ir_swizzle::init_mask(const unsigned *comp, unsigned count) -{ - assert((count >= 1) && (count <= 4)); - - memset(&this->mask, 0, sizeof(this->mask)); - this->mask.num_components = count; - - unsigned dup_mask = 0; - switch (count) { - case 4: - assert(comp[3] <= 3); - dup_mask |= (1U << comp[3]) - & ((1U << comp[0]) | (1U << comp[1]) | (1U << comp[2])); - this->mask.w = comp[3]; - - case 3: - assert(comp[2] <= 3); - dup_mask |= (1U << comp[2]) - & ((1U << comp[0]) | (1U << comp[1])); - this->mask.z = comp[2]; - - case 2: - assert(comp[1] <= 3); - dup_mask |= (1U << comp[1]) - & ((1U << comp[0])); - this->mask.y = comp[1]; - - case 1: - assert(comp[0] <= 3); - this->mask.x = comp[0]; - } - - this->mask.has_duplicates = dup_mask != 0; - - /* Based on the number of elements in the swizzle and the base type - * (i.e., float, int, unsigned, or bool) of the vector being swizzled, - * generate the type of the resulting value. - */ - type = glsl_type::get_instance(val->type->base_type, mask.num_components, 1); -} - -ir_swizzle::ir_swizzle(ir_rvalue *val, unsigned x, unsigned y, unsigned z, - unsigned w, unsigned count) - : ir_rvalue(ir_type_swizzle), val(val) -{ - const unsigned components[4] = { x, y, z, w }; - this->init_mask(components, count); -} - -ir_swizzle::ir_swizzle(ir_rvalue *val, const unsigned *comp, - unsigned count) - : ir_rvalue(ir_type_swizzle), val(val) -{ - this->init_mask(comp, count); -} - -ir_swizzle::ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask) - : ir_rvalue(ir_type_swizzle) -{ - this->val = val; - this->mask = mask; - this->type = glsl_type::get_instance(val->type->base_type, - mask.num_components, 1); -} - -#define X 1 -#define R 5 -#define S 9 -#define I 13 - -ir_swizzle * -ir_swizzle::create(ir_rvalue *val, const char *str, unsigned vector_length) -{ - void *ctx = ralloc_parent(val); - - /* For each possible swizzle character, this table encodes the value in - * \c idx_map that represents the 0th element of the vector. For invalid - * swizzle characters (e.g., 'k'), a special value is used that will allow - * detection of errors. - */ - static const unsigned char base_idx[26] = { - /* a b c d e f g h i j k l m */ - R, R, I, I, I, I, R, I, I, I, I, I, I, - /* n o p q r s t u v w x y z */ - I, I, S, S, R, S, S, I, I, X, X, X, X - }; - - /* Each valid swizzle character has an entry in the previous table. This - * table encodes the base index encoded in the previous table plus the actual - * index of the swizzle character. When processing swizzles, the first - * character in the string is indexed in the previous table. Each character - * in the string is indexed in this table, and the value found there has the - * value form the first table subtracted. The result must be on the range - * [0,3]. - * - * For example, the string "wzyx" will get X from the first table. Each of - * the charcaters will get X+3, X+2, X+1, and X+0 from this table. After - * subtraction, the swizzle values are { 3, 2, 1, 0 }. - * - * The string "wzrg" will get X from the first table. Each of the characters - * will get X+3, X+2, R+0, and R+1 from this table. After subtraction, the - * swizzle values are { 3, 2, 4, 5 }. Since 4 and 5 are outside the range - * [0,3], the error is detected. - */ - static const unsigned char idx_map[26] = { - /* a b c d e f g h i j k l m */ - R+3, R+2, 0, 0, 0, 0, R+1, 0, 0, 0, 0, 0, 0, - /* n o p q r s t u v w x y z */ - 0, 0, S+2, S+3, R+0, S+0, S+1, 0, 0, X+3, X+0, X+1, X+2 - }; - - int swiz_idx[4] = { 0, 0, 0, 0 }; - unsigned i; - - - /* Validate the first character in the swizzle string and look up the base - * index value as described above. - */ - if ((str[0] < 'a') || (str[0] > 'z')) - return NULL; - - const unsigned base = base_idx[str[0] - 'a']; - - - for (i = 0; (i < 4) && (str[i] != '\0'); i++) { - /* Validate the next character, and, as described above, convert it to a - * swizzle index. - */ - if ((str[i] < 'a') || (str[i] > 'z')) - return NULL; - - swiz_idx[i] = idx_map[str[i] - 'a'] - base; - if ((swiz_idx[i] < 0) || (swiz_idx[i] >= (int) vector_length)) - return NULL; - } - - if (str[i] != '\0') - return NULL; - - return new(ctx) ir_swizzle(val, swiz_idx[0], swiz_idx[1], swiz_idx[2], - swiz_idx[3], i); -} - -#undef X -#undef R -#undef S -#undef I - -ir_variable * -ir_swizzle::variable_referenced() const -{ - return this->val->variable_referenced(); -} - - -bool ir_variable::temporaries_allocate_names = false; - -const char ir_variable::tmp_name[] = "compiler_temp"; - -ir_variable::ir_variable(const struct glsl_type *type, const char *name, - ir_variable_mode mode) - : ir_instruction(ir_type_variable) -{ - this->type = type; - - if (mode == ir_var_temporary && !ir_variable::temporaries_allocate_names) - name = NULL; - - /* The ir_variable clone method may call this constructor with name set to - * tmp_name. - */ - assert(name != NULL - || mode == ir_var_temporary - || mode == ir_var_function_in - || mode == ir_var_function_out - || mode == ir_var_function_inout); - assert(name != ir_variable::tmp_name - || mode == ir_var_temporary); - if (mode == ir_var_temporary - && (name == NULL || name == ir_variable::tmp_name)) { - this->name = ir_variable::tmp_name; - } else { - this->name = ralloc_strdup(this, name); - } - - this->u.max_ifc_array_access = NULL; - - this->data.explicit_location = false; - this->data.has_initializer = false; - this->data.location = -1; - this->data.location_frac = 0; - this->data.binding = 0; - this->data.warn_extension_index = 0; - this->constant_value = NULL; - this->constant_initializer = NULL; - this->data.origin_upper_left = false; - this->data.pixel_center_integer = false; - this->data.depth_layout = ir_depth_layout_none; - this->data.used = false; - this->data.always_active_io = false; - this->data.read_only = false; - this->data.centroid = false; - this->data.sample = false; - this->data.patch = false; - this->data.invariant = false; - this->data.how_declared = ir_var_declared_normally; - this->data.mode = mode; - this->data.interpolation = INTERP_QUALIFIER_NONE; - this->data.max_array_access = 0; - this->data.offset = 0; - this->data.precision = GLSL_PRECISION_NONE; - this->data.image_read_only = false; - this->data.image_write_only = false; - this->data.image_coherent = false; - this->data.image_volatile = false; - this->data.image_restrict = false; - this->data.from_ssbo_unsized_array = false; - - if (type != NULL) { - if (type->base_type == GLSL_TYPE_SAMPLER) - this->data.read_only = true; - - if (type->is_interface()) - this->init_interface_type(type); - else if (type->without_array()->is_interface()) - this->init_interface_type(type->without_array()); - } -} - - -const char * -interpolation_string(unsigned interpolation) -{ - switch (interpolation) { - case INTERP_QUALIFIER_NONE: return "no"; - case INTERP_QUALIFIER_SMOOTH: return "smooth"; - case INTERP_QUALIFIER_FLAT: return "flat"; - case INTERP_QUALIFIER_NOPERSPECTIVE: return "noperspective"; - } - - assert(!"Should not get here."); - return ""; -} - - -glsl_interp_qualifier -ir_variable::determine_interpolation_mode(bool flat_shade) -{ - if (this->data.interpolation != INTERP_QUALIFIER_NONE) - return (glsl_interp_qualifier) this->data.interpolation; - int location = this->data.location; - bool is_gl_Color = - location == VARYING_SLOT_COL0 || location == VARYING_SLOT_COL1; - if (flat_shade && is_gl_Color) - return INTERP_QUALIFIER_FLAT; - else - return INTERP_QUALIFIER_SMOOTH; -} - -const char *const ir_variable::warn_extension_table[] = { - "", - "GL_ARB_shader_stencil_export", - "GL_AMD_shader_stencil_export", -}; - -void -ir_variable::enable_extension_warning(const char *extension) -{ - for (unsigned i = 0; i < ARRAY_SIZE(warn_extension_table); i++) { - if (strcmp(warn_extension_table[i], extension) == 0) { - this->data.warn_extension_index = i; - return; - } - } - - assert(!"Should not get here."); - this->data.warn_extension_index = 0; -} - -const char * -ir_variable::get_extension_warning() const -{ - return this->data.warn_extension_index == 0 - ? NULL : warn_extension_table[this->data.warn_extension_index]; -} - -ir_function_signature::ir_function_signature(const glsl_type *return_type, - builtin_available_predicate b) - : ir_instruction(ir_type_function_signature), - return_type(return_type), is_defined(false), is_intrinsic(false), - builtin_avail(b), _function(NULL) -{ - this->origin = NULL; -} - - -bool -ir_function_signature::is_builtin() const -{ - return builtin_avail != NULL; -} - - -bool -ir_function_signature::is_builtin_available(const _mesa_glsl_parse_state *state) const -{ - /* We can't call the predicate without a state pointer, so just say that - * the signature is available. At compile time, we need the filtering, - * but also receive a valid state pointer. At link time, we're resolving - * imported built-in prototypes to their definitions, which will always - * be an exact match. So we can skip the filtering. - */ - if (state == NULL) - return true; - - assert(builtin_avail != NULL); - return builtin_avail(state); -} - - -static bool -modes_match(unsigned a, unsigned b) -{ - if (a == b) - return true; - - /* Accept "in" vs. "const in" */ - if ((a == ir_var_const_in && b == ir_var_function_in) || - (b == ir_var_const_in && a == ir_var_function_in)) - return true; - - return false; -} - - -const char * -ir_function_signature::qualifiers_match(exec_list *params) -{ - /* check that the qualifiers match. */ - foreach_two_lists(a_node, &this->parameters, b_node, params) { - ir_variable *a = (ir_variable *) a_node; - ir_variable *b = (ir_variable *) b_node; - - if (a->data.read_only != b->data.read_only || - !modes_match(a->data.mode, b->data.mode) || - a->data.interpolation != b->data.interpolation || - a->data.centroid != b->data.centroid || - a->data.sample != b->data.sample || - a->data.patch != b->data.patch || - a->data.image_read_only != b->data.image_read_only || - a->data.image_write_only != b->data.image_write_only || - a->data.image_coherent != b->data.image_coherent || - a->data.image_volatile != b->data.image_volatile || - a->data.image_restrict != b->data.image_restrict) { - - /* parameter a's qualifiers don't match */ - return a->name; - } - } - return NULL; -} - - -void -ir_function_signature::replace_parameters(exec_list *new_params) -{ - /* Destroy all of the previous parameter information. If the previous - * parameter information comes from the function prototype, it may either - * specify incorrect parameter names or not have names at all. - */ - new_params->move_nodes_to(¶meters); -} - - -ir_function::ir_function(const char *name) - : ir_instruction(ir_type_function) -{ - this->subroutine_index = -1; - this->name = ralloc_strdup(this, name); -} - - -bool -ir_function::has_user_signature() -{ - foreach_in_list(ir_function_signature, sig, &this->signatures) { - if (!sig->is_builtin()) - return true; - } - return false; -} - - -ir_rvalue * -ir_rvalue::error_value(void *mem_ctx) -{ - ir_rvalue *v = new(mem_ctx) ir_rvalue(ir_type_unset); - - v->type = glsl_type::error_type; - return v; -} - - -void -visit_exec_list(exec_list *list, ir_visitor *visitor) -{ - foreach_in_list_safe(ir_instruction, node, list) { - node->accept(visitor); - } -} - - -static void -steal_memory(ir_instruction *ir, void *new_ctx) -{ - ir_variable *var = ir->as_variable(); - ir_function *fn = ir->as_function(); - ir_constant *constant = ir->as_constant(); - if (var != NULL && var->constant_value != NULL) - steal_memory(var->constant_value, ir); - - if (var != NULL && var->constant_initializer != NULL) - steal_memory(var->constant_initializer, ir); - - if (fn != NULL && fn->subroutine_types) - ralloc_steal(new_ctx, fn->subroutine_types); - - /* The components of aggregate constants are not visited by the normal - * visitor, so steal their values by hand. - */ - if (constant != NULL) { - if (constant->type->is_record()) { - foreach_in_list(ir_constant, field, &constant->components) { - steal_memory(field, ir); - } - } else if (constant->type->is_array()) { - for (unsigned int i = 0; i < constant->type->length; i++) { - steal_memory(constant->array_elements[i], ir); - } - } - } - - ralloc_steal(new_ctx, ir); -} - - -void -reparent_ir(exec_list *list, void *mem_ctx) -{ - foreach_in_list(ir_instruction, node, list) { - visit_tree(node, steal_memory, mem_ctx); - } -} - - -static ir_rvalue * -try_min_one(ir_rvalue *ir) -{ - ir_expression *expr = ir->as_expression(); - - if (!expr || expr->operation != ir_binop_min) - return NULL; - - if (expr->operands[0]->is_one()) - return expr->operands[1]; - - if (expr->operands[1]->is_one()) - return expr->operands[0]; - - return NULL; -} - -static ir_rvalue * -try_max_zero(ir_rvalue *ir) -{ - ir_expression *expr = ir->as_expression(); - - if (!expr || expr->operation != ir_binop_max) - return NULL; - - if (expr->operands[0]->is_zero()) - return expr->operands[1]; - - if (expr->operands[1]->is_zero()) - return expr->operands[0]; - - return NULL; -} - -ir_rvalue * -ir_rvalue::as_rvalue_to_saturate() -{ - ir_expression *expr = this->as_expression(); - - if (!expr) - return NULL; - - ir_rvalue *max_zero = try_max_zero(expr); - if (max_zero) { - return try_min_one(max_zero); - } else { - ir_rvalue *min_one = try_min_one(expr); - if (min_one) { - return try_max_zero(min_one); - } - } - - return NULL; -} - - -unsigned -vertices_per_prim(GLenum prim) -{ - switch (prim) { - case GL_POINTS: - return 1; - case GL_LINES: - return 2; - case GL_TRIANGLES: - return 3; - case GL_LINES_ADJACENCY: - return 4; - case GL_TRIANGLES_ADJACENCY: - return 6; - default: - assert(!"Bad primitive"); - return 3; - } -} - -/** - * Generate a string describing the mode of a variable - */ -const char * -mode_string(const ir_variable *var) -{ - switch (var->data.mode) { - case ir_var_auto: - return (var->data.read_only) ? "global constant" : "global variable"; - - case ir_var_uniform: - return "uniform"; - - case ir_var_shader_storage: - return "buffer"; - - case ir_var_shader_in: - return "shader input"; - - case ir_var_shader_out: - return "shader output"; - - case ir_var_function_in: - case ir_var_const_in: - return "function input"; - - case ir_var_function_out: - return "function output"; - - case ir_var_function_inout: - return "function inout"; - - case ir_var_system_value: - return "shader input"; - - case ir_var_temporary: - return "compiler temporary"; - - case ir_var_mode_count: - break; - } - - assert(!"Should not get here."); - return "invalid variable"; -} diff --git a/src/glsl/ir.h b/src/glsl/ir.h deleted file mode 100644 index bd7b5506343..00000000000 --- a/src/glsl/ir.h +++ /dev/null @@ -1,2632 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef IR_H -#define IR_H - -#include -#include - -#include "util/ralloc.h" -#include "compiler/glsl_types.h" -#include "list.h" -#include "ir_visitor.h" -#include "ir_hierarchical_visitor.h" -#include "main/mtypes.h" - -#ifdef __cplusplus - -/** - * \defgroup IR Intermediate representation nodes - * - * @{ - */ - -/** - * Class tags - * - * Each concrete class derived from \c ir_instruction has a value in this - * enumerant. The value for the type is stored in \c ir_instruction::ir_type - * by the constructor. While using type tags is not very C++, it is extremely - * convenient. For example, during debugging you can simply inspect - * \c ir_instruction::ir_type to find out the actual type of the object. - * - * In addition, it is possible to use a switch-statement based on \c - * \c ir_instruction::ir_type to select different behavior for different object - * types. For functions that have only slight differences for several object - * types, this allows writing very straightforward, readable code. - */ -enum ir_node_type { - ir_type_dereference_array, - ir_type_dereference_record, - ir_type_dereference_variable, - ir_type_constant, - ir_type_expression, - ir_type_swizzle, - ir_type_texture, - ir_type_variable, - ir_type_assignment, - ir_type_call, - ir_type_function, - ir_type_function_signature, - ir_type_if, - ir_type_loop, - ir_type_loop_jump, - ir_type_return, - ir_type_discard, - ir_type_emit_vertex, - ir_type_end_primitive, - ir_type_barrier, - ir_type_max, /**< maximum ir_type enum number, for validation */ - ir_type_unset = ir_type_max -}; - - -/** - * Base class of all IR instructions - */ -class ir_instruction : public exec_node { -public: - enum ir_node_type ir_type; - - /** - * GCC 4.7+ and clang warn when deleting an ir_instruction unless - * there's a virtual destructor present. Because we almost - * universally use ralloc for our memory management of - * ir_instructions, the destructor doesn't need to do any work. - */ - virtual ~ir_instruction() - { - } - - /** ir_print_visitor helper for debugging. */ - void print(void) const; - void fprint(FILE *f) const; - - virtual void accept(ir_visitor *) = 0; - virtual ir_visitor_status accept(ir_hierarchical_visitor *) = 0; - virtual ir_instruction *clone(void *mem_ctx, - struct hash_table *ht) const = 0; - - bool is_rvalue() const - { - return ir_type == ir_type_dereference_array || - ir_type == ir_type_dereference_record || - ir_type == ir_type_dereference_variable || - ir_type == ir_type_constant || - ir_type == ir_type_expression || - ir_type == ir_type_swizzle || - ir_type == ir_type_texture; - } - - bool is_dereference() const - { - return ir_type == ir_type_dereference_array || - ir_type == ir_type_dereference_record || - ir_type == ir_type_dereference_variable; - } - - bool is_jump() const - { - return ir_type == ir_type_loop_jump || - ir_type == ir_type_return || - ir_type == ir_type_discard; - } - - /** - * \name IR instruction downcast functions - * - * These functions either cast the object to a derived class or return - * \c NULL if the object's type does not match the specified derived class. - * Additional downcast functions will be added as needed. - */ - /*@{*/ - #define AS_BASE(TYPE) \ - class ir_##TYPE *as_##TYPE() \ - { \ - assume(this != NULL); \ - return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ - } \ - const class ir_##TYPE *as_##TYPE() const \ - { \ - assume(this != NULL); \ - return is_##TYPE() ? (ir_##TYPE *) this : NULL; \ - } - - AS_BASE(rvalue) - AS_BASE(dereference) - AS_BASE(jump) - #undef AS_BASE - - #define AS_CHILD(TYPE) \ - class ir_##TYPE * as_##TYPE() \ - { \ - assume(this != NULL); \ - return ir_type == ir_type_##TYPE ? (ir_##TYPE *) this : NULL; \ - } \ - const class ir_##TYPE * as_##TYPE() const \ - { \ - assume(this != NULL); \ - return ir_type == ir_type_##TYPE ? (const ir_##TYPE *) this : NULL; \ - } - AS_CHILD(variable) - AS_CHILD(function) - AS_CHILD(dereference_array) - AS_CHILD(dereference_variable) - AS_CHILD(dereference_record) - AS_CHILD(expression) - AS_CHILD(loop) - AS_CHILD(assignment) - AS_CHILD(call) - AS_CHILD(return) - AS_CHILD(if) - AS_CHILD(swizzle) - AS_CHILD(texture) - AS_CHILD(constant) - AS_CHILD(discard) - #undef AS_CHILD - /*@}*/ - - /** - * IR equality method: Return true if the referenced instruction would - * return the same value as this one. - * - * This intended to be used for CSE and algebraic optimizations, on rvalues - * in particular. No support for other instruction types (assignments, - * jumps, calls, etc.) is planned. - */ - virtual bool equals(const ir_instruction *ir, - enum ir_node_type ignore = ir_type_unset) const; - -protected: - ir_instruction(enum ir_node_type t) - : ir_type(t) - { - } - -private: - ir_instruction() - { - assert(!"Should not get here."); - } -}; - - -/** - * The base class for all "values"/expression trees. - */ -class ir_rvalue : public ir_instruction { -public: - const struct glsl_type *type; - - virtual ir_rvalue *clone(void *mem_ctx, struct hash_table *) const; - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - ir_rvalue *as_rvalue_to_saturate(); - - virtual bool is_lvalue() const - { - return false; - } - - /** - * Get the variable that is ultimately referenced by an r-value - */ - virtual ir_variable *variable_referenced() const - { - return NULL; - } - - - /** - * If an r-value is a reference to a whole variable, get that variable - * - * \return - * Pointer to a variable that is completely dereferenced by the r-value. If - * the r-value is not a dereference or the dereference does not access the - * entire variable (i.e., it's just one array element, struct field), \c NULL - * is returned. - */ - virtual ir_variable *whole_variable_referenced() - { - return NULL; - } - - /** - * Determine if an r-value has the value zero - * - * The base implementation of this function always returns \c false. The - * \c ir_constant class over-rides this function to return \c true \b only - * for vector and scalar types that have all elements set to the value - * zero (or \c false for booleans). - * - * \sa ir_constant::has_value, ir_rvalue::is_one, ir_rvalue::is_negative_one - */ - virtual bool is_zero() const; - - /** - * Determine if an r-value has the value one - * - * The base implementation of this function always returns \c false. The - * \c ir_constant class over-rides this function to return \c true \b only - * for vector and scalar types that have all elements set to the value - * one (or \c true for booleans). - * - * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_negative_one - */ - virtual bool is_one() const; - - /** - * Determine if an r-value has the value negative one - * - * The base implementation of this function always returns \c false. The - * \c ir_constant class over-rides this function to return \c true \b only - * for vector and scalar types that have all elements set to the value - * negative one. For boolean types, the result is always \c false. - * - * \sa ir_constant::has_value, ir_rvalue::is_zero, ir_rvalue::is_one - */ - virtual bool is_negative_one() const; - - /** - * Determine if an r-value is an unsigned integer constant which can be - * stored in 16 bits. - * - * \sa ir_constant::is_uint16_constant. - */ - virtual bool is_uint16_constant() const { return false; } - - /** - * Return a generic value of error_type. - * - * Allocation will be performed with 'mem_ctx' as ralloc owner. - */ - static ir_rvalue *error_value(void *mem_ctx); - -protected: - ir_rvalue(enum ir_node_type t); -}; - - -/** - * Variable storage classes - */ -enum ir_variable_mode { - ir_var_auto = 0, /**< Function local variables and globals. */ - ir_var_uniform, /**< Variable declared as a uniform. */ - ir_var_shader_storage, /**< Variable declared as an ssbo. */ - ir_var_shader_shared, /**< Variable declared as shared. */ - ir_var_shader_in, - ir_var_shader_out, - ir_var_function_in, - ir_var_function_out, - ir_var_function_inout, - ir_var_const_in, /**< "in" param that must be a constant expression */ - ir_var_system_value, /**< Ex: front-face, instance-id, etc. */ - ir_var_temporary, /**< Temporary variable generated during compilation. */ - ir_var_mode_count /**< Number of variable modes */ -}; - -/** - * Enum keeping track of how a variable was declared. For error checking of - * the gl_PerVertex redeclaration rules. - */ -enum ir_var_declaration_type { - /** - * Normal declaration (for most variables, this means an explicit - * declaration. Exception: temporaries are always implicitly declared, but - * they still use ir_var_declared_normally). - * - * Note: an ir_variable that represents a named interface block uses - * ir_var_declared_normally. - */ - ir_var_declared_normally = 0, - - /** - * Variable was explicitly declared (or re-declared) in an unnamed - * interface block. - */ - ir_var_declared_in_block, - - /** - * Variable is an implicitly declared built-in that has not been explicitly - * re-declared by the shader. - */ - ir_var_declared_implicitly, - - /** - * Variable is implicitly generated by the compiler and should not be - * visible via the API. - */ - ir_var_hidden, -}; - -/** - * \brief Layout qualifiers for gl_FragDepth. - * - * The AMD/ARB_conservative_depth extensions allow gl_FragDepth to be redeclared - * with a layout qualifier. - */ -enum ir_depth_layout { - ir_depth_layout_none, /**< No depth layout is specified. */ - ir_depth_layout_any, - ir_depth_layout_greater, - ir_depth_layout_less, - ir_depth_layout_unchanged -}; - -/** - * \brief Convert depth layout qualifier to string. - */ -const char* -depth_layout_string(ir_depth_layout layout); - -/** - * Description of built-in state associated with a uniform - * - * \sa ir_variable::state_slots - */ -struct ir_state_slot { - int tokens[5]; - int swizzle; -}; - - -/** - * Get the string value for an interpolation qualifier - * - * \return The string that would be used in a shader to specify \c - * mode will be returned. - * - * This function is used to generate error messages of the form "shader - * uses %s interpolation qualifier", so in the case where there is no - * interpolation qualifier, it returns "no". - * - * This function should only be used on a shader input or output variable. - */ -const char *interpolation_string(unsigned interpolation); - - -class ir_variable : public ir_instruction { -public: - ir_variable(const struct glsl_type *, const char *, ir_variable_mode); - - virtual ir_variable *clone(void *mem_ctx, struct hash_table *ht) const; - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - - /** - * Determine how this variable should be interpolated based on its - * interpolation qualifier (if present), whether it is gl_Color or - * gl_SecondaryColor, and whether flatshading is enabled in the current GL - * state. - * - * The return value will always be either INTERP_QUALIFIER_SMOOTH, - * INTERP_QUALIFIER_NOPERSPECTIVE, or INTERP_QUALIFIER_FLAT. - */ - glsl_interp_qualifier determine_interpolation_mode(bool flat_shade); - - /** - * Determine whether or not a variable is part of a uniform or - * shader storage block. - */ - inline bool is_in_buffer_block() const - { - return (this->data.mode == ir_var_uniform || - this->data.mode == ir_var_shader_storage) && - this->interface_type != NULL; - } - - /** - * Determine whether or not a variable is part of a shader storage block. - */ - inline bool is_in_shader_storage_block() const - { - return this->data.mode == ir_var_shader_storage && - this->interface_type != NULL; - } - - /** - * Determine whether or not a variable is the declaration of an interface - * block - * - * For the first declaration below, there will be an \c ir_variable named - * "instance" whose type and whose instance_type will be the same - * \cglsl_type. For the second declaration, there will be an \c ir_variable - * named "f" whose type is float and whose instance_type is B2. - * - * "instance" is an interface instance variable, but "f" is not. - * - * uniform B1 { - * float f; - * } instance; - * - * uniform B2 { - * float f; - * }; - */ - inline bool is_interface_instance() const - { - return this->type->without_array() == this->interface_type; - } - - /** - * Set this->interface_type on a newly created variable. - */ - void init_interface_type(const struct glsl_type *type) - { - assert(this->interface_type == NULL); - this->interface_type = type; - if (this->is_interface_instance()) { - this->u.max_ifc_array_access = - rzalloc_array(this, unsigned, type->length); - } - } - - /** - * Change this->interface_type on a variable that previously had a - * different, but compatible, interface_type. This is used during linking - * to set the size of arrays in interface blocks. - */ - void change_interface_type(const struct glsl_type *type) - { - if (this->u.max_ifc_array_access != NULL) { - /* max_ifc_array_access has already been allocated, so make sure the - * new interface has the same number of fields as the old one. - */ - assert(this->interface_type->length == type->length); - } - this->interface_type = type; - } - - /** - * Change this->interface_type on a variable that previously had a - * different, and incompatible, interface_type. This is used during - * compilation to handle redeclaration of the built-in gl_PerVertex - * interface block. - */ - void reinit_interface_type(const struct glsl_type *type) - { - if (this->u.max_ifc_array_access != NULL) { -#ifndef NDEBUG - /* Redeclaring gl_PerVertex is only allowed if none of the built-ins - * it defines have been accessed yet; so it's safe to throw away the - * old max_ifc_array_access pointer, since all of its values are - * zero. - */ - for (unsigned i = 0; i < this->interface_type->length; i++) - assert(this->u.max_ifc_array_access[i] == 0); -#endif - ralloc_free(this->u.max_ifc_array_access); - this->u.max_ifc_array_access = NULL; - } - this->interface_type = NULL; - init_interface_type(type); - } - - const glsl_type *get_interface_type() const - { - return this->interface_type; - } - - /** - * Get the max_ifc_array_access pointer - * - * A "set" function is not needed because the array is dynmically allocated - * as necessary. - */ - inline unsigned *get_max_ifc_array_access() - { - assert(this->data._num_state_slots == 0); - return this->u.max_ifc_array_access; - } - - inline unsigned get_num_state_slots() const - { - assert(!this->is_interface_instance() - || this->data._num_state_slots == 0); - return this->data._num_state_slots; - } - - inline void set_num_state_slots(unsigned n) - { - assert(!this->is_interface_instance() - || n == 0); - this->data._num_state_slots = n; - } - - inline ir_state_slot *get_state_slots() - { - return this->is_interface_instance() ? NULL : this->u.state_slots; - } - - inline const ir_state_slot *get_state_slots() const - { - return this->is_interface_instance() ? NULL : this->u.state_slots; - } - - inline ir_state_slot *allocate_state_slots(unsigned n) - { - assert(!this->is_interface_instance()); - - this->u.state_slots = ralloc_array(this, ir_state_slot, n); - this->data._num_state_slots = 0; - - if (this->u.state_slots != NULL) - this->data._num_state_slots = n; - - return this->u.state_slots; - } - - inline bool is_name_ralloced() const - { - return this->name != ir_variable::tmp_name; - } - - /** - * Enable emitting extension warnings for this variable - */ - void enable_extension_warning(const char *extension); - - /** - * Get the extension warning string for this variable - * - * If warnings are not enabled, \c NULL is returned. - */ - const char *get_extension_warning() const; - - /** - * Declared type of the variable - */ - const struct glsl_type *type; - - /** - * Declared name of the variable - */ - const char *name; - - struct ir_variable_data { - - /** - * Is the variable read-only? - * - * This is set for variables declared as \c const, shader inputs, - * and uniforms. - */ - unsigned read_only:1; - unsigned centroid:1; - unsigned sample:1; - unsigned patch:1; - unsigned invariant:1; - unsigned precise:1; - - /** - * Has this variable been used for reading or writing? - * - * Several GLSL semantic checks require knowledge of whether or not a - * variable has been used. For example, it is an error to redeclare a - * variable as invariant after it has been used. - * - * This is only maintained in the ast_to_hir.cpp path, not in - * Mesa's fixed function or ARB program paths. - */ - unsigned used:1; - - /** - * Has this variable been statically assigned? - * - * This answers whether the variable was assigned in any path of - * the shader during ast_to_hir. This doesn't answer whether it is - * still written after dead code removal, nor is it maintained in - * non-ast_to_hir.cpp (GLSL parsing) paths. - */ - unsigned assigned:1; - - /** - * When separate shader programs are enabled, only input/outputs between - * the stages of a multi-stage separate program can be safely removed - * from the shader interface. Other input/outputs must remains active. - */ - unsigned always_active_io:1; - - /** - * Enum indicating how the variable was declared. See - * ir_var_declaration_type. - * - * This is used to detect certain kinds of illegal variable redeclarations. - */ - unsigned how_declared:2; - - /** - * Storage class of the variable. - * - * \sa ir_variable_mode - */ - unsigned mode:4; - - /** - * Interpolation mode for shader inputs / outputs - * - * \sa ir_variable_interpolation - */ - unsigned interpolation:2; - - /** - * \name ARB_fragment_coord_conventions - * @{ - */ - unsigned origin_upper_left:1; - unsigned pixel_center_integer:1; - /*@}*/ - - /** - * Was the location explicitly set in the shader? - * - * If the location is explicitly set in the shader, it \b cannot be changed - * by the linker or by the API (e.g., calls to \c glBindAttribLocation have - * no effect). - */ - unsigned explicit_location:1; - unsigned explicit_index:1; - - /** - * Was an initial binding explicitly set in the shader? - * - * If so, constant_value contains an integer ir_constant representing the - * initial binding point. - */ - unsigned explicit_binding:1; - - /** - * Does this variable have an initializer? - * - * This is used by the linker to cross-validiate initializers of global - * variables. - */ - unsigned has_initializer:1; - - /** - * Is this variable a generic output or input that has not yet been matched - * up to a variable in another stage of the pipeline? - * - * This is used by the linker as scratch storage while assigning locations - * to generic inputs and outputs. - */ - unsigned is_unmatched_generic_inout:1; - - /** - * If non-zero, then this variable may be packed along with other variables - * into a single varying slot, so this offset should be applied when - * accessing components. For example, an offset of 1 means that the x - * component of this variable is actually stored in component y of the - * location specified by \c location. - */ - unsigned location_frac:2; - - /** - * Layout of the matrix. Uses glsl_matrix_layout values. - */ - unsigned matrix_layout:2; - - /** - * Non-zero if this variable was created by lowering a named interface - * block which was not an array. - * - * Note that this variable and \c from_named_ifc_block_array will never - * both be non-zero. - */ - unsigned from_named_ifc_block_nonarray:1; - - /** - * Non-zero if this variable was created by lowering a named interface - * block which was an array. - * - * Note that this variable and \c from_named_ifc_block_nonarray will never - * both be non-zero. - */ - unsigned from_named_ifc_block_array:1; - - /** - * Non-zero if the variable must be a shader input. This is useful for - * constraints on function parameters. - */ - unsigned must_be_shader_input:1; - - /** - * Output index for dual source blending. - * - * \note - * The GLSL spec only allows the values 0 or 1 for the index in \b dual - * source blending. - */ - unsigned index:1; - - /** - * Precision qualifier. - * - * In desktop GLSL we do not care about precision qualifiers at all, in - * fact, the spec says that precision qualifiers are ignored. - * - * To make things easy, we make it so that this field is always - * GLSL_PRECISION_NONE on desktop shaders. This way all the variables - * have the same precision value and the checks we add in the compiler - * for this field will never break a desktop shader compile. - */ - unsigned precision:2; - - /** - * \brief Layout qualifier for gl_FragDepth. - * - * This is not equal to \c ir_depth_layout_none if and only if this - * variable is \c gl_FragDepth and a layout qualifier is specified. - */ - ir_depth_layout depth_layout:3; - - /** - * ARB_shader_image_load_store qualifiers. - */ - unsigned image_read_only:1; /**< "readonly" qualifier. */ - unsigned image_write_only:1; /**< "writeonly" qualifier. */ - unsigned image_coherent:1; - unsigned image_volatile:1; - unsigned image_restrict:1; - - /** - * ARB_shader_storage_buffer_object - */ - unsigned from_ssbo_unsized_array:1; /**< unsized array buffer variable. */ - - /** - * Emit a warning if this variable is accessed. - */ - private: - uint8_t warn_extension_index; - - public: - /** Image internal format if specified explicitly, otherwise GL_NONE. */ - uint16_t image_format; - - private: - /** - * Number of state slots used - * - * \note - * This could be stored in as few as 7-bits, if necessary. If it is made - * smaller, add an assertion to \c ir_variable::allocate_state_slots to - * be safe. - */ - uint16_t _num_state_slots; - - public: - /** - * Initial binding point for a sampler, atomic, or UBO. - * - * For array types, this represents the binding point for the first element. - */ - int16_t binding; - - /** - * Storage location of the base of this variable - * - * The precise meaning of this field depends on the nature of the variable. - * - * - Vertex shader input: one of the values from \c gl_vert_attrib. - * - Vertex shader output: one of the values from \c gl_varying_slot. - * - Geometry shader input: one of the values from \c gl_varying_slot. - * - Geometry shader output: one of the values from \c gl_varying_slot. - * - Fragment shader input: one of the values from \c gl_varying_slot. - * - Fragment shader output: one of the values from \c gl_frag_result. - * - Uniforms: Per-stage uniform slot number for default uniform block. - * - Uniforms: Index within the uniform block definition for UBO members. - * - Non-UBO Uniforms: explicit location until linking then reused to - * store uniform slot number. - * - Other: This field is not currently used. - * - * If the variable is a uniform, shader input, or shader output, and the - * slot has not been assigned, the value will be -1. - */ - int location; - - /** - * Vertex stream output identifier. - */ - unsigned stream; - - /** - * Location an atomic counter is stored at. - */ - unsigned offset; - - /** - * Highest element accessed with a constant expression array index - * - * Not used for non-array variables. - */ - unsigned max_array_access; - - /** - * Allow (only) ir_variable direct access private members. - */ - friend class ir_variable; - } data; - - /** - * Value assigned in the initializer of a variable declared "const" - */ - ir_constant *constant_value; - - /** - * Constant expression assigned in the initializer of the variable - * - * \warning - * This field and \c ::constant_value are distinct. Even if the two fields - * refer to constants with the same value, they must point to separate - * objects. - */ - ir_constant *constant_initializer; - -private: - static const char *const warn_extension_table[]; - - union { - /** - * For variables which satisfy the is_interface_instance() predicate, - * this points to an array of integers such that if the ith member of - * the interface block is an array, max_ifc_array_access[i] is the - * maximum array element of that member that has been accessed. If the - * ith member of the interface block is not an array, - * max_ifc_array_access[i] is unused. - * - * For variables whose type is not an interface block, this pointer is - * NULL. - */ - unsigned *max_ifc_array_access; - - /** - * Built-in state that backs this uniform - * - * Once set at variable creation, \c state_slots must remain invariant. - * - * If the variable is not a uniform, \c _num_state_slots will be zero - * and \c state_slots will be \c NULL. - */ - ir_state_slot *state_slots; - } u; - - /** - * For variables that are in an interface block or are an instance of an - * interface block, this is the \c GLSL_TYPE_INTERFACE type for that block. - * - * \sa ir_variable::location - */ - const glsl_type *interface_type; - - /** - * Name used for anonymous compiler temporaries - */ - static const char tmp_name[]; - -public: - /** - * Should the construct keep names for ir_var_temporary variables? - * - * When this global is false, names passed to the constructor for - * \c ir_var_temporary variables will be dropped. Instead, the variable will - * be named "compiler_temp". This name will be in static storage. - * - * \warning - * \b NEVER change the mode of an \c ir_var_temporary. - * - * \warning - * This variable is \b not thread-safe. It is global, \b not - * per-context. It begins life false. A context can, at some point, make - * it true. From that point on, it will be true forever. This should be - * okay since it will only be set true while debugging. - */ - static bool temporaries_allocate_names; -}; - -/** - * A function that returns whether a built-in function is available in the - * current shading language (based on version, ES or desktop, and extensions). - */ -typedef bool (*builtin_available_predicate)(const _mesa_glsl_parse_state *); - -/*@{*/ -/** - * The representation of a function instance; may be the full definition or - * simply a prototype. - */ -class ir_function_signature : public ir_instruction { - /* An ir_function_signature will be part of the list of signatures in - * an ir_function. - */ -public: - ir_function_signature(const glsl_type *return_type, - builtin_available_predicate builtin_avail = NULL); - - virtual ir_function_signature *clone(void *mem_ctx, - struct hash_table *ht) const; - ir_function_signature *clone_prototype(void *mem_ctx, - struct hash_table *ht) const; - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - /** - * Attempt to evaluate this function as a constant expression, - * given a list of the actual parameters and the variable context. - * Returns NULL for non-built-ins. - */ - ir_constant *constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context); - - /** - * Get the name of the function for which this is a signature - */ - const char *function_name() const; - - /** - * Get a handle to the function for which this is a signature - * - * There is no setter function, this function returns a \c const pointer, - * and \c ir_function_signature::_function is private for a reason. The - * only way to make a connection between a function and function signature - * is via \c ir_function::add_signature. This helps ensure that certain - * invariants (i.e., a function signature is in the list of signatures for - * its \c _function) are met. - * - * \sa ir_function::add_signature - */ - inline const class ir_function *function() const - { - return this->_function; - } - - /** - * Check whether the qualifiers match between this signature's parameters - * and the supplied parameter list. If not, returns the name of the first - * parameter with mismatched qualifiers (for use in error messages). - */ - const char *qualifiers_match(exec_list *params); - - /** - * Replace the current parameter list with the given one. This is useful - * if the current information came from a prototype, and either has invalid - * or missing parameter names. - */ - void replace_parameters(exec_list *new_params); - - /** - * Function return type. - * - * \note This discards the optional precision qualifier. - */ - const struct glsl_type *return_type; - - /** - * List of ir_variable of function parameters. - * - * This represents the storage. The paramaters passed in a particular - * call will be in ir_call::actual_paramaters. - */ - struct exec_list parameters; - - /** Whether or not this function has a body (which may be empty). */ - unsigned is_defined:1; - - /** Whether or not this function signature is a built-in. */ - bool is_builtin() const; - - /** - * Whether or not this function is an intrinsic to be implemented - * by the driver. - */ - bool is_intrinsic; - - /** Whether or not a built-in is available for this shader. */ - bool is_builtin_available(const _mesa_glsl_parse_state *state) const; - - /** Body of instructions in the function. */ - struct exec_list body; - -private: - /** - * A function pointer to a predicate that answers whether a built-in - * function is available in the current shader. NULL if not a built-in. - */ - builtin_available_predicate builtin_avail; - - /** Function of which this signature is one overload. */ - class ir_function *_function; - - /** Function signature of which this one is a prototype clone */ - const ir_function_signature *origin; - - friend class ir_function; - - /** - * Helper function to run a list of instructions for constant - * expression evaluation. - * - * The hash table represents the values of the visible variables. - * There are no scoping issues because the table is indexed on - * ir_variable pointers, not variable names. - * - * Returns false if the expression is not constant, true otherwise, - * and the value in *result if result is non-NULL. - */ - bool constant_expression_evaluate_expression_list(const struct exec_list &body, - struct hash_table *variable_context, - ir_constant **result); -}; - - -/** - * Header for tracking multiple overloaded functions with the same name. - * Contains a list of ir_function_signatures representing each of the - * actual functions. - */ -class ir_function : public ir_instruction { -public: - ir_function(const char *name); - - virtual ir_function *clone(void *mem_ctx, struct hash_table *ht) const; - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - void add_signature(ir_function_signature *sig) - { - sig->_function = this; - this->signatures.push_tail(sig); - } - - /** - * Find a signature that matches a set of actual parameters, taking implicit - * conversions into account. Also flags whether the match was exact. - */ - ir_function_signature *matching_signature(_mesa_glsl_parse_state *state, - const exec_list *actual_param, - bool allow_builtins, - bool *match_is_exact); - - /** - * Find a signature that matches a set of actual parameters, taking implicit - * conversions into account. - */ - ir_function_signature *matching_signature(_mesa_glsl_parse_state *state, - const exec_list *actual_param, - bool allow_builtins); - - /** - * Find a signature that exactly matches a set of actual parameters without - * any implicit type conversions. - */ - ir_function_signature *exact_matching_signature(_mesa_glsl_parse_state *state, - const exec_list *actual_ps); - - /** - * Name of the function. - */ - const char *name; - - /** Whether or not this function has a signature that isn't a built-in. */ - bool has_user_signature(); - - /** - * List of ir_function_signature for each overloaded function with this name. - */ - struct exec_list signatures; - - /** - * is this function a subroutine type declaration - * e.g. subroutine void type1(float arg1); - */ - bool is_subroutine; - - /** - * is this function associated to a subroutine type - * e.g. subroutine (type1, type2) function_name { function_body }; - * would have num_subroutine_types 2, - * and pointers to the type1 and type2 types. - */ - int num_subroutine_types; - const struct glsl_type **subroutine_types; - - int subroutine_index; -}; - -inline const char *ir_function_signature::function_name() const -{ - return this->_function->name; -} -/*@}*/ - - -/** - * IR instruction representing high-level if-statements - */ -class ir_if : public ir_instruction { -public: - ir_if(ir_rvalue *condition) - : ir_instruction(ir_type_if), condition(condition) - { - } - - virtual ir_if *clone(void *mem_ctx, struct hash_table *ht) const; - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - ir_rvalue *condition; - /** List of ir_instruction for the body of the then branch */ - exec_list then_instructions; - /** List of ir_instruction for the body of the else branch */ - exec_list else_instructions; -}; - - -/** - * IR instruction representing a high-level loop structure. - */ -class ir_loop : public ir_instruction { -public: - ir_loop(); - - virtual ir_loop *clone(void *mem_ctx, struct hash_table *ht) const; - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - /** List of ir_instruction that make up the body of the loop. */ - exec_list body_instructions; -}; - - -class ir_assignment : public ir_instruction { -public: - ir_assignment(ir_rvalue *lhs, ir_rvalue *rhs, ir_rvalue *condition = NULL); - - /** - * Construct an assignment with an explicit write mask - * - * \note - * Since a write mask is supplied, the LHS must already be a bare - * \c ir_dereference. The cannot be any swizzles in the LHS. - */ - ir_assignment(ir_dereference *lhs, ir_rvalue *rhs, ir_rvalue *condition, - unsigned write_mask); - - virtual ir_assignment *clone(void *mem_ctx, struct hash_table *ht) const; - - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - /** - * Get a whole variable written by an assignment - * - * If the LHS of the assignment writes a whole variable, the variable is - * returned. Otherwise \c NULL is returned. Examples of whole-variable - * assignment are: - * - * - Assigning to a scalar - * - Assigning to all components of a vector - * - Whole array (or matrix) assignment - * - Whole structure assignment - */ - ir_variable *whole_variable_written(); - - /** - * Set the LHS of an assignment - */ - void set_lhs(ir_rvalue *lhs); - - /** - * Left-hand side of the assignment. - * - * This should be treated as read only. If you need to set the LHS of an - * assignment, use \c ir_assignment::set_lhs. - */ - ir_dereference *lhs; - - /** - * Value being assigned - */ - ir_rvalue *rhs; - - /** - * Optional condition for the assignment. - */ - ir_rvalue *condition; - - - /** - * Component mask written - * - * For non-vector types in the LHS, this field will be zero. For vector - * types, a bit will be set for each component that is written. Note that - * for \c vec2 and \c vec3 types only the lower bits will ever be set. - * - * A partially-set write mask means that each enabled channel gets - * the value from a consecutive channel of the rhs. For example, - * to write just .xyw of gl_FrontColor with color: - * - * (assign (constant bool (1)) (xyw) - * (var_ref gl_FragColor) - * (swiz xyw (var_ref color))) - */ - unsigned write_mask:4; -}; - -/* Update ir_expression::get_num_operands() and operator_strs when - * updating this list. - */ -enum ir_expression_operation { - ir_unop_bit_not, - ir_unop_logic_not, - ir_unop_neg, - ir_unop_abs, - ir_unop_sign, - ir_unop_rcp, - ir_unop_rsq, - ir_unop_sqrt, - ir_unop_exp, /**< Log base e on gentype */ - ir_unop_log, /**< Natural log on gentype */ - ir_unop_exp2, - ir_unop_log2, - ir_unop_f2i, /**< Float-to-integer conversion. */ - ir_unop_f2u, /**< Float-to-unsigned conversion. */ - ir_unop_i2f, /**< Integer-to-float conversion. */ - ir_unop_f2b, /**< Float-to-boolean conversion */ - ir_unop_b2f, /**< Boolean-to-float conversion */ - ir_unop_i2b, /**< int-to-boolean conversion */ - ir_unop_b2i, /**< Boolean-to-int conversion */ - ir_unop_u2f, /**< Unsigned-to-float conversion. */ - ir_unop_i2u, /**< Integer-to-unsigned conversion. */ - ir_unop_u2i, /**< Unsigned-to-integer conversion. */ - ir_unop_d2f, /**< Double-to-float conversion. */ - ir_unop_f2d, /**< Float-to-double conversion. */ - ir_unop_d2i, /**< Double-to-integer conversion. */ - ir_unop_i2d, /**< Integer-to-double conversion. */ - ir_unop_d2u, /**< Double-to-unsigned conversion. */ - ir_unop_u2d, /**< Unsigned-to-double conversion. */ - ir_unop_d2b, /**< Double-to-boolean conversion. */ - ir_unop_bitcast_i2f, /**< Bit-identical int-to-float "conversion" */ - ir_unop_bitcast_f2i, /**< Bit-identical float-to-int "conversion" */ - ir_unop_bitcast_u2f, /**< Bit-identical uint-to-float "conversion" */ - ir_unop_bitcast_f2u, /**< Bit-identical float-to-uint "conversion" */ - - /** - * \name Unary floating-point rounding operations. - */ - /*@{*/ - ir_unop_trunc, - ir_unop_ceil, - ir_unop_floor, - ir_unop_fract, - ir_unop_round_even, - /*@}*/ - - /** - * \name Trigonometric operations. - */ - /*@{*/ - ir_unop_sin, - ir_unop_cos, - /*@}*/ - - /** - * \name Partial derivatives. - */ - /*@{*/ - ir_unop_dFdx, - ir_unop_dFdx_coarse, - ir_unop_dFdx_fine, - ir_unop_dFdy, - ir_unop_dFdy_coarse, - ir_unop_dFdy_fine, - /*@}*/ - - /** - * \name Floating point pack and unpack operations. - */ - /*@{*/ - ir_unop_pack_snorm_2x16, - ir_unop_pack_snorm_4x8, - ir_unop_pack_unorm_2x16, - ir_unop_pack_unorm_4x8, - ir_unop_pack_half_2x16, - ir_unop_unpack_snorm_2x16, - ir_unop_unpack_snorm_4x8, - ir_unop_unpack_unorm_2x16, - ir_unop_unpack_unorm_4x8, - ir_unop_unpack_half_2x16, - /*@}*/ - - /** - * \name Lowered floating point unpacking operations. - * - * \see lower_packing_builtins_visitor::split_unpack_half_2x16 - */ - /*@{*/ - ir_unop_unpack_half_2x16_split_x, - ir_unop_unpack_half_2x16_split_y, - /*@}*/ - - /** - * \name Bit operations, part of ARB_gpu_shader5. - */ - /*@{*/ - ir_unop_bitfield_reverse, - ir_unop_bit_count, - ir_unop_find_msb, - ir_unop_find_lsb, - /*@}*/ - - ir_unop_saturate, - - /** - * \name Double packing, part of ARB_gpu_shader_fp64. - */ - /*@{*/ - ir_unop_pack_double_2x32, - ir_unop_unpack_double_2x32, - /*@}*/ - - ir_unop_frexp_sig, - ir_unop_frexp_exp, - - ir_unop_noise, - - ir_unop_subroutine_to_int, - /** - * Interpolate fs input at centroid - * - * operand0 is the fs input. - */ - ir_unop_interpolate_at_centroid, - - /** - * Ask the driver for the total size of a buffer block. - * - * operand0 is the ir_constant buffer block index in the linked shader. - */ - ir_unop_get_buffer_size, - - /** - * Calculate length of an unsized array inside a buffer block. - * This opcode is going to be replaced in a lowering pass inside - * the linker. - * - * operand0 is the unsized array's ir_value for the calculation - * of its length. - */ - ir_unop_ssbo_unsized_array_length, - - /** - * A sentinel marking the last of the unary operations. - */ - ir_last_unop = ir_unop_ssbo_unsized_array_length, - - ir_binop_add, - ir_binop_sub, - ir_binop_mul, /**< Floating-point or low 32-bit integer multiply. */ - ir_binop_imul_high, /**< Calculates the high 32-bits of a 64-bit multiply. */ - ir_binop_div, - - /** - * Returns the carry resulting from the addition of the two arguments. - */ - /*@{*/ - ir_binop_carry, - /*@}*/ - - /** - * Returns the borrow resulting from the subtraction of the second argument - * from the first argument. - */ - /*@{*/ - ir_binop_borrow, - /*@}*/ - - /** - * Takes one of two combinations of arguments: - * - * - mod(vecN, vecN) - * - mod(vecN, float) - * - * Does not take integer types. - */ - ir_binop_mod, - - /** - * \name Binary comparison operators which return a boolean vector. - * The type of both operands must be equal. - */ - /*@{*/ - ir_binop_less, - ir_binop_greater, - ir_binop_lequal, - ir_binop_gequal, - ir_binop_equal, - ir_binop_nequal, - /** - * Returns single boolean for whether all components of operands[0] - * equal the components of operands[1]. - */ - ir_binop_all_equal, - /** - * Returns single boolean for whether any component of operands[0] - * is not equal to the corresponding component of operands[1]. - */ - ir_binop_any_nequal, - /*@}*/ - - /** - * \name Bit-wise binary operations. - */ - /*@{*/ - ir_binop_lshift, - ir_binop_rshift, - ir_binop_bit_and, - ir_binop_bit_xor, - ir_binop_bit_or, - /*@}*/ - - ir_binop_logic_and, - ir_binop_logic_xor, - ir_binop_logic_or, - - ir_binop_dot, - ir_binop_min, - ir_binop_max, - - ir_binop_pow, - - /** - * \name Lowered floating point packing operations. - * - * \see lower_packing_builtins_visitor::split_pack_half_2x16 - */ - /*@{*/ - ir_binop_pack_half_2x16_split, - /*@}*/ - - /** - * Load a value the size of a given GLSL type from a uniform block. - * - * operand0 is the ir_constant uniform block index in the linked shader. - * operand1 is a byte offset within the uniform block. - */ - ir_binop_ubo_load, - - /** - * \name Multiplies a number by two to a power, part of ARB_gpu_shader5. - */ - /*@{*/ - ir_binop_ldexp, - /*@}*/ - - /** - * Extract a scalar from a vector - * - * operand0 is the vector - * operand1 is the index of the field to read from operand0 - */ - ir_binop_vector_extract, - - /** - * Interpolate fs input at offset - * - * operand0 is the fs input - * operand1 is the offset from the pixel center - */ - ir_binop_interpolate_at_offset, - - /** - * Interpolate fs input at sample position - * - * operand0 is the fs input - * operand1 is the sample ID - */ - ir_binop_interpolate_at_sample, - - /** - * A sentinel marking the last of the binary operations. - */ - ir_last_binop = ir_binop_interpolate_at_sample, - - /** - * \name Fused floating-point multiply-add, part of ARB_gpu_shader5. - */ - /*@{*/ - ir_triop_fma, - /*@}*/ - - ir_triop_lrp, - - /** - * \name Conditional Select - * - * A vector conditional select instruction (like ?:, but operating per- - * component on vectors). - * - * \see lower_instructions_visitor::ldexp_to_arith - */ - /*@{*/ - ir_triop_csel, - /*@}*/ - - ir_triop_bitfield_extract, - - /** - * Generate a value with one field of a vector changed - * - * operand0 is the vector - * operand1 is the value to write into the vector result - * operand2 is the index in operand0 to be modified - */ - ir_triop_vector_insert, - - /** - * A sentinel marking the last of the ternary operations. - */ - ir_last_triop = ir_triop_vector_insert, - - ir_quadop_bitfield_insert, - - ir_quadop_vector, - - /** - * A sentinel marking the last of the ternary operations. - */ - ir_last_quadop = ir_quadop_vector, - - /** - * A sentinel marking the last of all operations. - */ - ir_last_opcode = ir_quadop_vector -}; - -class ir_expression : public ir_rvalue { -public: - ir_expression(int op, const struct glsl_type *type, - ir_rvalue *op0, ir_rvalue *op1 = NULL, - ir_rvalue *op2 = NULL, ir_rvalue *op3 = NULL); - - /** - * Constructor for unary operation expressions - */ - ir_expression(int op, ir_rvalue *); - - /** - * Constructor for binary operation expressions - */ - ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1); - - /** - * Constructor for ternary operation expressions - */ - ir_expression(int op, ir_rvalue *op0, ir_rvalue *op1, ir_rvalue *op2); - - virtual bool equals(const ir_instruction *ir, - enum ir_node_type ignore = ir_type_unset) const; - - virtual ir_expression *clone(void *mem_ctx, struct hash_table *ht) const; - - /** - * Attempt to constant-fold the expression - * - * The "variable_context" hash table links ir_variable * to ir_constant * - * that represent the variables' values. \c NULL represents an empty - * context. - * - * If the expression cannot be constant folded, this method will return - * \c NULL. - */ - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - /** - * Determine the number of operands used by an expression - */ - static unsigned int get_num_operands(ir_expression_operation); - - /** - * Determine the number of operands used by an expression - */ - unsigned int get_num_operands() const - { - return (this->operation == ir_quadop_vector) - ? this->type->vector_elements : get_num_operands(operation); - } - - /** - * Return whether the expression operates on vectors horizontally. - */ - bool is_horizontal() const - { - return operation == ir_binop_all_equal || - operation == ir_binop_any_nequal || - operation == ir_binop_dot || - operation == ir_binop_vector_extract || - operation == ir_triop_vector_insert || - operation == ir_quadop_vector; - } - - /** - * Return a string representing this expression's operator. - */ - const char *operator_string(); - - /** - * Return a string representing this expression's operator. - */ - static const char *operator_string(ir_expression_operation); - - - /** - * Do a reverse-lookup to translate the given string into an operator. - */ - static ir_expression_operation get_operator(const char *); - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - virtual ir_variable *variable_referenced() const; - - ir_expression_operation operation; - ir_rvalue *operands[4]; -}; - - -/** - * HIR instruction representing a high-level function call, containing a list - * of parameters and returning a value in the supplied temporary. - */ -class ir_call : public ir_instruction { -public: - ir_call(ir_function_signature *callee, - ir_dereference_variable *return_deref, - exec_list *actual_parameters) - : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(NULL), array_idx(NULL) - { - assert(callee->return_type != NULL); - actual_parameters->move_nodes_to(& this->actual_parameters); - this->use_builtin = callee->is_builtin(); - } - - ir_call(ir_function_signature *callee, - ir_dereference_variable *return_deref, - exec_list *actual_parameters, - ir_variable *var, ir_rvalue *array_idx) - : ir_instruction(ir_type_call), return_deref(return_deref), callee(callee), sub_var(var), array_idx(array_idx) - { - assert(callee->return_type != NULL); - actual_parameters->move_nodes_to(& this->actual_parameters); - this->use_builtin = callee->is_builtin(); - } - - virtual ir_call *clone(void *mem_ctx, struct hash_table *ht) const; - - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - /** - * Get the name of the function being called. - */ - const char *callee_name() const - { - return callee->function_name(); - } - - /** - * Generates an inline version of the function before @ir, - * storing the return value in return_deref. - */ - void generate_inline(ir_instruction *ir); - - /** - * Storage for the function's return value. - * This must be NULL if the return type is void. - */ - ir_dereference_variable *return_deref; - - /** - * The specific function signature being called. - */ - ir_function_signature *callee; - - /* List of ir_rvalue of paramaters passed in this call. */ - exec_list actual_parameters; - - /** Should this call only bind to a built-in function? */ - bool use_builtin; - - /* - * ARB_shader_subroutine support - - * the subroutine uniform variable and array index - * rvalue to be used in the lowering pass later. - */ - ir_variable *sub_var; - ir_rvalue *array_idx; -}; - - -/** - * \name Jump-like IR instructions. - * - * These include \c break, \c continue, \c return, and \c discard. - */ -/*@{*/ -class ir_jump : public ir_instruction { -protected: - ir_jump(enum ir_node_type t) - : ir_instruction(t) - { - } -}; - -class ir_return : public ir_jump { -public: - ir_return() - : ir_jump(ir_type_return), value(NULL) - { - } - - ir_return(ir_rvalue *value) - : ir_jump(ir_type_return), value(value) - { - } - - virtual ir_return *clone(void *mem_ctx, struct hash_table *) const; - - ir_rvalue *get_value() const - { - return value; - } - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - ir_rvalue *value; -}; - - -/** - * Jump instructions used inside loops - * - * These include \c break and \c continue. The \c break within a loop is - * different from the \c break within a switch-statement. - * - * \sa ir_switch_jump - */ -class ir_loop_jump : public ir_jump { -public: - enum jump_mode { - jump_break, - jump_continue - }; - - ir_loop_jump(jump_mode mode) - : ir_jump(ir_type_loop_jump) - { - this->mode = mode; - } - - virtual ir_loop_jump *clone(void *mem_ctx, struct hash_table *) const; - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - bool is_break() const - { - return mode == jump_break; - } - - bool is_continue() const - { - return mode == jump_continue; - } - - /** Mode selector for the jump instruction. */ - enum jump_mode mode; -}; - -/** - * IR instruction representing discard statements. - */ -class ir_discard : public ir_jump { -public: - ir_discard() - : ir_jump(ir_type_discard) - { - this->condition = NULL; - } - - ir_discard(ir_rvalue *cond) - : ir_jump(ir_type_discard) - { - this->condition = cond; - } - - virtual ir_discard *clone(void *mem_ctx, struct hash_table *ht) const; - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - ir_rvalue *condition; -}; -/*@}*/ - - -/** - * Texture sampling opcodes used in ir_texture - */ -enum ir_texture_opcode { - ir_tex, /**< Regular texture look-up */ - ir_txb, /**< Texture look-up with LOD bias */ - ir_txl, /**< Texture look-up with explicit LOD */ - ir_txd, /**< Texture look-up with partial derivatvies */ - ir_txf, /**< Texel fetch with explicit LOD */ - ir_txf_ms, /**< Multisample texture fetch */ - ir_txs, /**< Texture size */ - ir_lod, /**< Texture lod query */ - ir_tg4, /**< Texture gather */ - ir_query_levels, /**< Texture levels query */ - ir_texture_samples, /**< Texture samples query */ - ir_samples_identical, /**< Query whether all samples are definitely identical. */ -}; - - -/** - * IR instruction to sample a texture - * - * The specific form of the IR instruction depends on the \c mode value - * selected from \c ir_texture_opcodes. In the printed IR, these will - * appear as: - * - * Texel offset (0 or an expression) - * | Projection divisor - * | | Shadow comparitor - * | | | - * v v v - * (tex 0 1 ( )) - * (txb 0 1 ( ) ) - * (txl 0 1 ( ) ) - * (txd 0 1 ( ) (dPdx dPdy)) - * (txf 0 ) - * (txf_ms - * ) - * (txs ) - * (lod ) - * (tg4 ) - * (query_levels ) - * (samples_identical ) - */ -class ir_texture : public ir_rvalue { -public: - ir_texture(enum ir_texture_opcode op) - : ir_rvalue(ir_type_texture), - op(op), sampler(NULL), coordinate(NULL), projector(NULL), - shadow_comparitor(NULL), offset(NULL) - { - memset(&lod_info, 0, sizeof(lod_info)); - } - - virtual ir_texture *clone(void *mem_ctx, struct hash_table *) const; - - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - virtual bool equals(const ir_instruction *ir, - enum ir_node_type ignore = ir_type_unset) const; - - /** - * Return a string representing the ir_texture_opcode. - */ - const char *opcode_string(); - - /** Set the sampler and type. */ - void set_sampler(ir_dereference *sampler, const glsl_type *type); - - /** - * Do a reverse-lookup to translate a string into an ir_texture_opcode. - */ - static ir_texture_opcode get_opcode(const char *); - - enum ir_texture_opcode op; - - /** Sampler to use for the texture access. */ - ir_dereference *sampler; - - /** Texture coordinate to sample */ - ir_rvalue *coordinate; - - /** - * Value used for projective divide. - * - * If there is no projective divide (the common case), this will be - * \c NULL. Optimization passes should check for this to point to a constant - * of 1.0 and replace that with \c NULL. - */ - ir_rvalue *projector; - - /** - * Coordinate used for comparison on shadow look-ups. - * - * If there is no shadow comparison, this will be \c NULL. For the - * \c ir_txf opcode, this *must* be \c NULL. - */ - ir_rvalue *shadow_comparitor; - - /** Texel offset. */ - ir_rvalue *offset; - - union { - ir_rvalue *lod; /**< Floating point LOD */ - ir_rvalue *bias; /**< Floating point LOD bias */ - ir_rvalue *sample_index; /**< MSAA sample index */ - ir_rvalue *component; /**< Gather component selector */ - struct { - ir_rvalue *dPdx; /**< Partial derivative of coordinate wrt X */ - ir_rvalue *dPdy; /**< Partial derivative of coordinate wrt Y */ - } grad; - } lod_info; -}; - - -struct ir_swizzle_mask { - unsigned x:2; - unsigned y:2; - unsigned z:2; - unsigned w:2; - - /** - * Number of components in the swizzle. - */ - unsigned num_components:3; - - /** - * Does the swizzle contain duplicate components? - * - * L-value swizzles cannot contain duplicate components. - */ - unsigned has_duplicates:1; -}; - - -class ir_swizzle : public ir_rvalue { -public: - ir_swizzle(ir_rvalue *, unsigned x, unsigned y, unsigned z, unsigned w, - unsigned count); - - ir_swizzle(ir_rvalue *val, const unsigned *components, unsigned count); - - ir_swizzle(ir_rvalue *val, ir_swizzle_mask mask); - - virtual ir_swizzle *clone(void *mem_ctx, struct hash_table *) const; - - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - /** - * Construct an ir_swizzle from the textual representation. Can fail. - */ - static ir_swizzle *create(ir_rvalue *, const char *, unsigned vector_length); - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - virtual bool equals(const ir_instruction *ir, - enum ir_node_type ignore = ir_type_unset) const; - - bool is_lvalue() const - { - return val->is_lvalue() && !mask.has_duplicates; - } - - /** - * Get the variable that is ultimately referenced by an r-value - */ - virtual ir_variable *variable_referenced() const; - - ir_rvalue *val; - ir_swizzle_mask mask; - -private: - /** - * Initialize the mask component of a swizzle - * - * This is used by the \c ir_swizzle constructors. - */ - void init_mask(const unsigned *components, unsigned count); -}; - - -class ir_dereference : public ir_rvalue { -public: - virtual ir_dereference *clone(void *mem_ctx, struct hash_table *) const = 0; - - bool is_lvalue() const; - - /** - * Get the variable that is ultimately referenced by an r-value - */ - virtual ir_variable *variable_referenced() const = 0; - -protected: - ir_dereference(enum ir_node_type t) - : ir_rvalue(t) - { - } -}; - - -class ir_dereference_variable : public ir_dereference { -public: - ir_dereference_variable(ir_variable *var); - - virtual ir_dereference_variable *clone(void *mem_ctx, - struct hash_table *) const; - - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - virtual bool equals(const ir_instruction *ir, - enum ir_node_type ignore = ir_type_unset) const; - - /** - * Get the variable that is ultimately referenced by an r-value - */ - virtual ir_variable *variable_referenced() const - { - return this->var; - } - - virtual ir_variable *whole_variable_referenced() - { - /* ir_dereference_variable objects always dereference the entire - * variable. However, if this dereference is dereferenced by anything - * else, the complete deferefernce chain is not a whole-variable - * dereference. This method should only be called on the top most - * ir_rvalue in a dereference chain. - */ - return this->var; - } - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - /** - * Object being dereferenced. - */ - ir_variable *var; -}; - - -class ir_dereference_array : public ir_dereference { -public: - ir_dereference_array(ir_rvalue *value, ir_rvalue *array_index); - - ir_dereference_array(ir_variable *var, ir_rvalue *array_index); - - virtual ir_dereference_array *clone(void *mem_ctx, - struct hash_table *) const; - - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - virtual bool equals(const ir_instruction *ir, - enum ir_node_type ignore = ir_type_unset) const; - - /** - * Get the variable that is ultimately referenced by an r-value - */ - virtual ir_variable *variable_referenced() const - { - return this->array->variable_referenced(); - } - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - ir_rvalue *array; - ir_rvalue *array_index; - -private: - void set_array(ir_rvalue *value); -}; - - -class ir_dereference_record : public ir_dereference { -public: - ir_dereference_record(ir_rvalue *value, const char *field); - - ir_dereference_record(ir_variable *var, const char *field); - - virtual ir_dereference_record *clone(void *mem_ctx, - struct hash_table *) const; - - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - /** - * Get the variable that is ultimately referenced by an r-value - */ - virtual ir_variable *variable_referenced() const - { - return this->record->variable_referenced(); - } - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - ir_rvalue *record; - const char *field; -}; - - -/** - * Data stored in an ir_constant - */ -union ir_constant_data { - unsigned u[16]; - int i[16]; - float f[16]; - bool b[16]; - double d[16]; -}; - - -class ir_constant : public ir_rvalue { -public: - ir_constant(const struct glsl_type *type, const ir_constant_data *data); - ir_constant(bool b, unsigned vector_elements=1); - ir_constant(unsigned int u, unsigned vector_elements=1); - ir_constant(int i, unsigned vector_elements=1); - ir_constant(float f, unsigned vector_elements=1); - ir_constant(double d, unsigned vector_elements=1); - - /** - * Construct an ir_constant from a list of ir_constant values - */ - ir_constant(const struct glsl_type *type, exec_list *values); - - /** - * Construct an ir_constant from a scalar component of another ir_constant - * - * The new \c ir_constant inherits the type of the component from the - * source constant. - * - * \note - * In the case of a matrix constant, the new constant is a scalar, \b not - * a vector. - */ - ir_constant(const ir_constant *c, unsigned i); - - /** - * Return a new ir_constant of the specified type containing all zeros. - */ - static ir_constant *zero(void *mem_ctx, const glsl_type *type); - - virtual ir_constant *clone(void *mem_ctx, struct hash_table *) const; - - virtual ir_constant *constant_expression_value(struct hash_table *variable_context = NULL); - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - virtual bool equals(const ir_instruction *ir, - enum ir_node_type ignore = ir_type_unset) const; - - /** - * Get a particular component of a constant as a specific type - * - * This is useful, for example, to get a value from an integer constant - * as a float or bool. This appears frequently when constructors are - * called with all constant parameters. - */ - /*@{*/ - bool get_bool_component(unsigned i) const; - float get_float_component(unsigned i) const; - double get_double_component(unsigned i) const; - int get_int_component(unsigned i) const; - unsigned get_uint_component(unsigned i) const; - /*@}*/ - - ir_constant *get_array_element(unsigned i) const; - - ir_constant *get_record_field(const char *name); - - /** - * Copy the values on another constant at a given offset. - * - * The offset is ignored for array or struct copies, it's only for - * scalars or vectors into vectors or matrices. - * - * With identical types on both sides and zero offset it's clone() - * without creating a new object. - */ - - void copy_offset(ir_constant *src, int offset); - - /** - * Copy the values on another constant at a given offset and - * following an assign-like mask. - * - * The mask is ignored for scalars. - * - * Note that this function only handles what assign can handle, - * i.e. at most a vector as source and a column of a matrix as - * destination. - */ - - void copy_masked_offset(ir_constant *src, int offset, unsigned int mask); - - /** - * Determine whether a constant has the same value as another constant - * - * \sa ir_constant::is_zero, ir_constant::is_one, - * ir_constant::is_negative_one - */ - bool has_value(const ir_constant *) const; - - /** - * Return true if this ir_constant represents the given value. - * - * For vectors, this checks that each component is the given value. - */ - virtual bool is_value(float f, int i) const; - virtual bool is_zero() const; - virtual bool is_one() const; - virtual bool is_negative_one() const; - - /** - * Return true for constants that could be stored as 16-bit unsigned values. - * - * Note that this will return true even for signed integer ir_constants, as - * long as the value is non-negative and fits in 16-bits. - */ - virtual bool is_uint16_constant() const; - - /** - * Value of the constant. - * - * The field used to back the values supplied by the constant is determined - * by the type associated with the \c ir_instruction. Constants may be - * scalars, vectors, or matrices. - */ - union ir_constant_data value; - - /* Array elements */ - ir_constant **array_elements; - - /* Structure fields */ - exec_list components; - -private: - /** - * Parameterless constructor only used by the clone method - */ - ir_constant(void); -}; - -/** - * IR instruction to emit a vertex in a geometry shader. - */ -class ir_emit_vertex : public ir_instruction { -public: - ir_emit_vertex(ir_rvalue *stream) - : ir_instruction(ir_type_emit_vertex), - stream(stream) - { - assert(stream); - } - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_emit_vertex *clone(void *mem_ctx, struct hash_table *ht) const - { - return new(mem_ctx) ir_emit_vertex(this->stream->clone(mem_ctx, ht)); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - int stream_id() const - { - return stream->as_constant()->value.i[0]; - } - - ir_rvalue *stream; -}; - -/** - * IR instruction to complete the current primitive and start a new one in a - * geometry shader. - */ -class ir_end_primitive : public ir_instruction { -public: - ir_end_primitive(ir_rvalue *stream) - : ir_instruction(ir_type_end_primitive), - stream(stream) - { - assert(stream); - } - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_end_primitive *clone(void *mem_ctx, struct hash_table *ht) const - { - return new(mem_ctx) ir_end_primitive(this->stream->clone(mem_ctx, ht)); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); - - int stream_id() const - { - return stream->as_constant()->value.i[0]; - } - - ir_rvalue *stream; -}; - -/** - * IR instruction for tessellation control and compute shader barrier. - */ -class ir_barrier : public ir_instruction { -public: - ir_barrier() - : ir_instruction(ir_type_barrier) - { - } - - virtual void accept(ir_visitor *v) - { - v->visit(this); - } - - virtual ir_barrier *clone(void *mem_ctx, struct hash_table *) const - { - return new(mem_ctx) ir_barrier(); - } - - virtual ir_visitor_status accept(ir_hierarchical_visitor *); -}; - -/*@}*/ - -/** - * Apply a visitor to each IR node in a list - */ -void -visit_exec_list(exec_list *list, ir_visitor *visitor); - -/** - * Validate invariants on each IR node in a list - */ -void validate_ir_tree(exec_list *instructions); - -struct _mesa_glsl_parse_state; -struct gl_shader_program; - -/** - * Detect whether an unlinked shader contains static recursion - * - * If the list of instructions is determined to contain static recursion, - * \c _mesa_glsl_error will be called to emit error messages for each function - * that is in the recursion cycle. - */ -void -detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, - exec_list *instructions); - -/** - * Detect whether a linked shader contains static recursion - * - * If the list of instructions is determined to contain static recursion, - * \c link_error_printf will be called to emit error messages for each function - * that is in the recursion cycle. In addition, - * \c gl_shader_program::LinkStatus will be set to false. - */ -void -detect_recursion_linked(struct gl_shader_program *prog, - exec_list *instructions); - -/** - * Make a clone of each IR instruction in a list - * - * \param in List of IR instructions that are to be cloned - * \param out List to hold the cloned instructions - */ -void -clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in); - -extern void -_mesa_glsl_initialize_variables(exec_list *instructions, - struct _mesa_glsl_parse_state *state); - -extern void -_mesa_glsl_initialize_derived_variables(gl_shader *shader); - -extern void -_mesa_glsl_initialize_functions(_mesa_glsl_parse_state *state); - -extern void -_mesa_glsl_initialize_builtin_functions(); - -extern ir_function_signature * -_mesa_glsl_find_builtin_function(_mesa_glsl_parse_state *state, - const char *name, exec_list *actual_parameters); - -extern ir_function * -_mesa_glsl_find_builtin_function_by_name(const char *name); - -extern gl_shader * -_mesa_glsl_get_builtin_function_shader(void); - -extern ir_function_signature * -_mesa_get_main_function_signature(gl_shader *sh); - -extern void -_mesa_glsl_release_functions(void); - -extern void -_mesa_glsl_release_builtin_functions(void); - -extern void -reparent_ir(exec_list *list, void *mem_ctx); - -struct glsl_symbol_table; - -extern void -import_prototypes(const exec_list *source, exec_list *dest, - struct glsl_symbol_table *symbols, void *mem_ctx); - -extern bool -ir_has_call(ir_instruction *ir); - -extern void -do_set_program_inouts(exec_list *instructions, struct gl_program *prog, - gl_shader_stage shader_stage); - -extern char * -prototype_string(const glsl_type *return_type, const char *name, - exec_list *parameters); - -const char * -mode_string(const ir_variable *var); - -/** - * Built-in / reserved GL variables names start with "gl_" - */ -static inline bool -is_gl_identifier(const char *s) -{ - return s && s[0] == 'g' && s[1] == 'l' && s[2] == '_'; -} - -extern "C" { -#endif /* __cplusplus */ - -extern void _mesa_print_ir(FILE *f, struct exec_list *instructions, - struct _mesa_glsl_parse_state *state); - -extern void -fprint_ir(FILE *f, const void *instruction); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -unsigned -vertices_per_prim(GLenum prim); - -#endif /* IR_H */ diff --git a/src/glsl/ir_basic_block.cpp b/src/glsl/ir_basic_block.cpp deleted file mode 100644 index 15481aa47f6..00000000000 --- a/src/glsl/ir_basic_block.cpp +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_basic_block.cpp - * - * Basic block analysis of instruction streams. - */ - -#include "ir.h" -#include "ir_basic_block.h" - -/** - * Calls a user function for every basic block in the instruction stream. - * - * Basic block analysis is pretty easy in our IR thanks to the lack of - * unstructured control flow. We've got: - * - * ir_loop (for () {}, while () {}, do {} while ()) - * ir_loop_jump ( - * ir_if () {} - * ir_return - * ir_call() - * - * Note that the basic blocks returned by this don't encompass all - * operations performed by the program -- for example, if conditions - * don't get returned, nor do the assignments that will be generated - * for ir_call parameters. - */ -void call_for_basic_blocks(exec_list *instructions, - void (*callback)(ir_instruction *first, - ir_instruction *last, - void *data), - void *data) -{ - ir_instruction *leader = NULL; - ir_instruction *last = NULL; - - foreach_in_list(ir_instruction, ir, instructions) { - ir_if *ir_if; - ir_loop *ir_loop; - ir_function *ir_function; - - if (!leader) - leader = ir; - - if ((ir_if = ir->as_if())) { - callback(leader, ir, data); - leader = NULL; - - call_for_basic_blocks(&ir_if->then_instructions, callback, data); - call_for_basic_blocks(&ir_if->else_instructions, callback, data); - } else if ((ir_loop = ir->as_loop())) { - callback(leader, ir, data); - leader = NULL; - call_for_basic_blocks(&ir_loop->body_instructions, callback, data); - } else if (ir->as_jump() || ir->as_call()) { - callback(leader, ir, data); - leader = NULL; - } else if ((ir_function = ir->as_function())) { - /* A function definition doesn't interrupt our basic block - * since execution doesn't go into it. We should process the - * bodies of its signatures for BBs, though. - * - * Note that we miss an opportunity for producing more - * maximal BBs between the instructions that precede main() - * and the body of main(). Perhaps those instructions ought - * to live inside of main(). - */ - foreach_in_list(ir_function_signature, ir_sig, &ir_function->signatures) { - call_for_basic_blocks(&ir_sig->body, callback, data); - } - } - last = ir; - } - if (leader) { - callback(leader, last, data); - } -} diff --git a/src/glsl/ir_basic_block.h b/src/glsl/ir_basic_block.h deleted file mode 100644 index dbd678b5c4f..00000000000 --- a/src/glsl/ir_basic_block.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -void call_for_basic_blocks(exec_list *instructions, - void (*callback)(ir_instruction *first, - ir_instruction *last, - void *data), - void *data); diff --git a/src/glsl/ir_builder.cpp b/src/glsl/ir_builder.cpp deleted file mode 100644 index c9cf1240dfe..00000000000 --- a/src/glsl/ir_builder.cpp +++ /dev/null @@ -1,612 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "ir_builder.h" -#include "program/prog_instruction.h" - -using namespace ir_builder; - -namespace ir_builder { - -void -ir_factory::emit(ir_instruction *ir) -{ - instructions->push_tail(ir); -} - -ir_variable * -ir_factory::make_temp(const glsl_type *type, const char *name) -{ - ir_variable *var; - - var = new(mem_ctx) ir_variable(type, name, ir_var_temporary); - emit(var); - - return var; -} - -ir_assignment * -assign(deref lhs, operand rhs, operand condition, int writemask) -{ - void *mem_ctx = ralloc_parent(lhs.val); - - ir_assignment *assign = new(mem_ctx) ir_assignment(lhs.val, - rhs.val, - condition.val, - writemask); - - return assign; -} - -ir_assignment * -assign(deref lhs, operand rhs) -{ - return assign(lhs, rhs, (1 << lhs.val->type->vector_elements) - 1); -} - -ir_assignment * -assign(deref lhs, operand rhs, int writemask) -{ - return assign(lhs, rhs, (ir_rvalue *) NULL, writemask); -} - -ir_assignment * -assign(deref lhs, operand rhs, operand condition) -{ - return assign(lhs, rhs, condition, (1 << lhs.val->type->vector_elements) - 1); -} - -ir_return * -ret(operand retval) -{ - void *mem_ctx = ralloc_parent(retval.val); - return new(mem_ctx) ir_return(retval.val); -} - -ir_swizzle * -swizzle(operand a, int swizzle, int components) -{ - void *mem_ctx = ralloc_parent(a.val); - - return new(mem_ctx) ir_swizzle(a.val, - GET_SWZ(swizzle, 0), - GET_SWZ(swizzle, 1), - GET_SWZ(swizzle, 2), - GET_SWZ(swizzle, 3), - components); -} - -ir_swizzle * -swizzle_for_size(operand a, unsigned components) -{ - void *mem_ctx = ralloc_parent(a.val); - - if (a.val->type->vector_elements < components) - components = a.val->type->vector_elements; - - unsigned s[4] = { 0, 1, 2, 3 }; - for (int i = components; i < 4; i++) - s[i] = components - 1; - - return new(mem_ctx) ir_swizzle(a.val, s, components); -} - -ir_swizzle * -swizzle_xxxx(operand a) -{ - return swizzle(a, SWIZZLE_XXXX, 4); -} - -ir_swizzle * -swizzle_yyyy(operand a) -{ - return swizzle(a, SWIZZLE_YYYY, 4); -} - -ir_swizzle * -swizzle_zzzz(operand a) -{ - return swizzle(a, SWIZZLE_ZZZZ, 4); -} - -ir_swizzle * -swizzle_wwww(operand a) -{ - return swizzle(a, SWIZZLE_WWWW, 4); -} - -ir_swizzle * -swizzle_x(operand a) -{ - return swizzle(a, SWIZZLE_XXXX, 1); -} - -ir_swizzle * -swizzle_y(operand a) -{ - return swizzle(a, SWIZZLE_YYYY, 1); -} - -ir_swizzle * -swizzle_z(operand a) -{ - return swizzle(a, SWIZZLE_ZZZZ, 1); -} - -ir_swizzle * -swizzle_w(operand a) -{ - return swizzle(a, SWIZZLE_WWWW, 1); -} - -ir_swizzle * -swizzle_xy(operand a) -{ - return swizzle(a, SWIZZLE_XYZW, 2); -} - -ir_swizzle * -swizzle_xyz(operand a) -{ - return swizzle(a, SWIZZLE_XYZW, 3); -} - -ir_swizzle * -swizzle_xyzw(operand a) -{ - return swizzle(a, SWIZZLE_XYZW, 4); -} - -ir_expression * -expr(ir_expression_operation op, operand a) -{ - void *mem_ctx = ralloc_parent(a.val); - - return new(mem_ctx) ir_expression(op, a.val); -} - -ir_expression * -expr(ir_expression_operation op, operand a, operand b) -{ - void *mem_ctx = ralloc_parent(a.val); - - return new(mem_ctx) ir_expression(op, a.val, b.val); -} - -ir_expression * -expr(ir_expression_operation op, operand a, operand b, operand c) -{ - void *mem_ctx = ralloc_parent(a.val); - - return new(mem_ctx) ir_expression(op, a.val, b.val, c.val); -} - -ir_expression *add(operand a, operand b) -{ - return expr(ir_binop_add, a, b); -} - -ir_expression *sub(operand a, operand b) -{ - return expr(ir_binop_sub, a, b); -} - -ir_expression *min2(operand a, operand b) -{ - return expr(ir_binop_min, a, b); -} - -ir_expression *max2(operand a, operand b) -{ - return expr(ir_binop_max, a, b); -} - -ir_expression *mul(operand a, operand b) -{ - return expr(ir_binop_mul, a, b); -} - -ir_expression *imul_high(operand a, operand b) -{ - return expr(ir_binop_imul_high, a, b); -} - -ir_expression *div(operand a, operand b) -{ - return expr(ir_binop_div, a, b); -} - -ir_expression *carry(operand a, operand b) -{ - return expr(ir_binop_carry, a, b); -} - -ir_expression *borrow(operand a, operand b) -{ - return expr(ir_binop_borrow, a, b); -} - -ir_expression *trunc(operand a) -{ - return expr(ir_unop_trunc, a); -} - -ir_expression *round_even(operand a) -{ - return expr(ir_unop_round_even, a); -} - -ir_expression *fract(operand a) -{ - return expr(ir_unop_fract, a); -} - -/* dot for vectors, mul for scalars */ -ir_expression *dot(operand a, operand b) -{ - assert(a.val->type == b.val->type); - - if (a.val->type->vector_elements == 1) - return expr(ir_binop_mul, a, b); - - return expr(ir_binop_dot, a, b); -} - -ir_expression* -clamp(operand a, operand b, operand c) -{ - return expr(ir_binop_min, expr(ir_binop_max, a, b), c); -} - -ir_expression * -saturate(operand a) -{ - return expr(ir_unop_saturate, a); -} - -ir_expression * -abs(operand a) -{ - return expr(ir_unop_abs, a); -} - -ir_expression * -neg(operand a) -{ - return expr(ir_unop_neg, a); -} - -ir_expression * -sin(operand a) -{ - return expr(ir_unop_sin, a); -} - -ir_expression * -cos(operand a) -{ - return expr(ir_unop_cos, a); -} - -ir_expression * -exp(operand a) -{ - return expr(ir_unop_exp, a); -} - -ir_expression * -rsq(operand a) -{ - return expr(ir_unop_rsq, a); -} - -ir_expression * -sqrt(operand a) -{ - return expr(ir_unop_sqrt, a); -} - -ir_expression * -log(operand a) -{ - return expr(ir_unop_log, a); -} - -ir_expression * -sign(operand a) -{ - return expr(ir_unop_sign, a); -} - -ir_expression * -subr_to_int(operand a) -{ - return expr(ir_unop_subroutine_to_int, a); -} - -ir_expression* -equal(operand a, operand b) -{ - return expr(ir_binop_equal, a, b); -} - -ir_expression* -nequal(operand a, operand b) -{ - return expr(ir_binop_nequal, a, b); -} - -ir_expression* -less(operand a, operand b) -{ - return expr(ir_binop_less, a, b); -} - -ir_expression* -greater(operand a, operand b) -{ - return expr(ir_binop_greater, a, b); -} - -ir_expression* -lequal(operand a, operand b) -{ - return expr(ir_binop_lequal, a, b); -} - -ir_expression* -gequal(operand a, operand b) -{ - return expr(ir_binop_gequal, a, b); -} - -ir_expression* -logic_not(operand a) -{ - return expr(ir_unop_logic_not, a); -} - -ir_expression* -logic_and(operand a, operand b) -{ - return expr(ir_binop_logic_and, a, b); -} - -ir_expression* -logic_or(operand a, operand b) -{ - return expr(ir_binop_logic_or, a, b); -} - -ir_expression* -bit_not(operand a) -{ - return expr(ir_unop_bit_not, a); -} - -ir_expression* -bit_and(operand a, operand b) -{ - return expr(ir_binop_bit_and, a, b); -} - -ir_expression* -bit_or(operand a, operand b) -{ - return expr(ir_binop_bit_or, a, b); -} - -ir_expression* -lshift(operand a, operand b) -{ - return expr(ir_binop_lshift, a, b); -} - -ir_expression* -rshift(operand a, operand b) -{ - return expr(ir_binop_rshift, a, b); -} - -ir_expression* -f2i(operand a) -{ - return expr(ir_unop_f2i, a); -} - -ir_expression* -bitcast_f2i(operand a) -{ - return expr(ir_unop_bitcast_f2i, a); -} - -ir_expression* -i2f(operand a) -{ - return expr(ir_unop_i2f, a); -} - -ir_expression* -bitcast_i2f(operand a) -{ - return expr(ir_unop_bitcast_i2f, a); -} - -ir_expression* -i2u(operand a) -{ - return expr(ir_unop_i2u, a); -} - -ir_expression* -u2i(operand a) -{ - return expr(ir_unop_u2i, a); -} - -ir_expression* -f2u(operand a) -{ - return expr(ir_unop_f2u, a); -} - -ir_expression* -bitcast_f2u(operand a) -{ - return expr(ir_unop_bitcast_f2u, a); -} - -ir_expression* -u2f(operand a) -{ - return expr(ir_unop_u2f, a); -} - -ir_expression* -bitcast_u2f(operand a) -{ - return expr(ir_unop_bitcast_u2f, a); -} - -ir_expression* -i2b(operand a) -{ - return expr(ir_unop_i2b, a); -} - -ir_expression* -b2i(operand a) -{ - return expr(ir_unop_b2i, a); -} - -ir_expression * -f2b(operand a) -{ - return expr(ir_unop_f2b, a); -} - -ir_expression * -b2f(operand a) -{ - return expr(ir_unop_b2f, a); -} - -ir_expression * -interpolate_at_centroid(operand a) -{ - return expr(ir_unop_interpolate_at_centroid, a); -} - -ir_expression * -interpolate_at_offset(operand a, operand b) -{ - return expr(ir_binop_interpolate_at_offset, a, b); -} - -ir_expression * -interpolate_at_sample(operand a, operand b) -{ - return expr(ir_binop_interpolate_at_sample, a, b); -} - -ir_expression * -f2d(operand a) -{ - return expr(ir_unop_f2d, a); -} - -ir_expression * -i2d(operand a) -{ - return expr(ir_unop_i2d, a); -} - -ir_expression * -u2d(operand a) -{ - return expr(ir_unop_u2d, a); -} - -ir_expression * -fma(operand a, operand b, operand c) -{ - return expr(ir_triop_fma, a, b, c); -} - -ir_expression * -lrp(operand x, operand y, operand a) -{ - return expr(ir_triop_lrp, x, y, a); -} - -ir_expression * -csel(operand a, operand b, operand c) -{ - return expr(ir_triop_csel, a, b, c); -} - -ir_expression * -bitfield_extract(operand a, operand b, operand c) -{ - return expr(ir_triop_bitfield_extract, a, b, c); -} - -ir_expression * -bitfield_insert(operand a, operand b, operand c, operand d) -{ - void *mem_ctx = ralloc_parent(a.val); - return new(mem_ctx) ir_expression(ir_quadop_bitfield_insert, - a.val->type, a.val, b.val, c.val, d.val); -} - -ir_if* -if_tree(operand condition, - ir_instruction *then_branch) -{ - assert(then_branch != NULL); - - void *mem_ctx = ralloc_parent(condition.val); - - ir_if *result = new(mem_ctx) ir_if(condition.val); - result->then_instructions.push_tail(then_branch); - return result; -} - -ir_if* -if_tree(operand condition, - ir_instruction *then_branch, - ir_instruction *else_branch) -{ - assert(then_branch != NULL); - assert(else_branch != NULL); - - void *mem_ctx = ralloc_parent(condition.val); - - ir_if *result = new(mem_ctx) ir_if(condition.val); - result->then_instructions.push_tail(then_branch); - result->else_instructions.push_tail(else_branch); - return result; -} - -} /* namespace ir_builder */ diff --git a/src/glsl/ir_builder.h b/src/glsl/ir_builder.h deleted file mode 100644 index b483ebf6269..00000000000 --- a/src/glsl/ir_builder.h +++ /dev/null @@ -1,230 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -#include "ir.h" - -namespace ir_builder { - -#ifndef WRITEMASK_X -enum writemask { - WRITEMASK_X = 0x1, - WRITEMASK_Y = 0x2, - WRITEMASK_Z = 0x4, - WRITEMASK_W = 0x8, -}; -#endif - -/** - * This little class exists to let the helper expression generators - * take either an ir_rvalue * or an ir_variable * to be automatically - * dereferenced, while still providing compile-time type checking. - * - * You don't have to explicitly call the constructor -- C++ will see - * that you passed an ir_variable, and silently call the - * operand(ir_variable *var) constructor behind your back. - */ -class operand { -public: - operand(ir_rvalue *val) - : val(val) - { - } - - operand(ir_variable *var) - { - void *mem_ctx = ralloc_parent(var); - val = new(mem_ctx) ir_dereference_variable(var); - } - - ir_rvalue *val; -}; - -/** Automatic generator for ir_dereference_variable on assignment LHS. - * - * \sa operand - */ -class deref { -public: - deref(ir_dereference *val) - : val(val) - { - } - - deref(ir_variable *var) - { - void *mem_ctx = ralloc_parent(var); - val = new(mem_ctx) ir_dereference_variable(var); - } - - - ir_dereference *val; -}; - -class ir_factory { -public: - ir_factory(exec_list *instructions = NULL, void *mem_ctx = NULL) - : instructions(instructions), - mem_ctx(mem_ctx) - { - return; - } - - void emit(ir_instruction *ir); - ir_variable *make_temp(const glsl_type *type, const char *name); - - ir_constant* - constant(float f) - { - return new(mem_ctx) ir_constant(f); - } - - ir_constant* - constant(int i) - { - return new(mem_ctx) ir_constant(i); - } - - ir_constant* - constant(unsigned u) - { - return new(mem_ctx) ir_constant(u); - } - - ir_constant* - constant(bool b) - { - return new(mem_ctx) ir_constant(b); - } - - exec_list *instructions; - void *mem_ctx; -}; - -ir_assignment *assign(deref lhs, operand rhs); -ir_assignment *assign(deref lhs, operand rhs, int writemask); -ir_assignment *assign(deref lhs, operand rhs, operand condition); -ir_assignment *assign(deref lhs, operand rhs, operand condition, int writemask); - -ir_return *ret(operand retval); - -ir_expression *expr(ir_expression_operation op, operand a); -ir_expression *expr(ir_expression_operation op, operand a, operand b); -ir_expression *expr(ir_expression_operation op, operand a, operand b, operand c); -ir_expression *add(operand a, operand b); -ir_expression *sub(operand a, operand b); -ir_expression *mul(operand a, operand b); -ir_expression *imul_high(operand a, operand b); -ir_expression *div(operand a, operand b); -ir_expression *carry(operand a, operand b); -ir_expression *borrow(operand a, operand b); -ir_expression *trunc(operand a); -ir_expression *round_even(operand a); -ir_expression *fract(operand a); -ir_expression *dot(operand a, operand b); -ir_expression *clamp(operand a, operand b, operand c); -ir_expression *saturate(operand a); -ir_expression *abs(operand a); -ir_expression *neg(operand a); -ir_expression *sin(operand a); -ir_expression *cos(operand a); -ir_expression *exp(operand a); -ir_expression *rsq(operand a); -ir_expression *sqrt(operand a); -ir_expression *log(operand a); -ir_expression *sign(operand a); - -ir_expression *subr_to_int(operand a); -ir_expression *equal(operand a, operand b); -ir_expression *nequal(operand a, operand b); -ir_expression *less(operand a, operand b); -ir_expression *greater(operand a, operand b); -ir_expression *lequal(operand a, operand b); -ir_expression *gequal(operand a, operand b); - -ir_expression *logic_not(operand a); -ir_expression *logic_and(operand a, operand b); -ir_expression *logic_or(operand a, operand b); - -ir_expression *bit_not(operand a); -ir_expression *bit_or(operand a, operand b); -ir_expression *bit_and(operand a, operand b); -ir_expression *lshift(operand a, operand b); -ir_expression *rshift(operand a, operand b); - -ir_expression *f2i(operand a); -ir_expression *bitcast_f2i(operand a); -ir_expression *i2f(operand a); -ir_expression *bitcast_i2f(operand a); -ir_expression *f2u(operand a); -ir_expression *bitcast_f2u(operand a); -ir_expression *u2f(operand a); -ir_expression *bitcast_u2f(operand a); -ir_expression *i2u(operand a); -ir_expression *u2i(operand a); -ir_expression *b2i(operand a); -ir_expression *i2b(operand a); -ir_expression *f2b(operand a); -ir_expression *b2f(operand a); - -ir_expression *f2d(operand a); -ir_expression *i2d(operand a); -ir_expression *u2d(operand a); - -ir_expression *min2(operand a, operand b); -ir_expression *max2(operand a, operand b); - -ir_expression *interpolate_at_centroid(operand a); -ir_expression *interpolate_at_offset(operand a, operand b); -ir_expression *interpolate_at_sample(operand a, operand b); - -ir_expression *fma(operand a, operand b, operand c); -ir_expression *lrp(operand x, operand y, operand a); -ir_expression *csel(operand a, operand b, operand c); -ir_expression *bitfield_extract(operand a, operand b, operand c); -ir_expression *bitfield_insert(operand a, operand b, operand c, operand d); - -ir_swizzle *swizzle(operand a, int swizzle, int components); -/** - * Swizzle away later components, but preserve the ordering. - */ -ir_swizzle *swizzle_for_size(operand a, unsigned components); - -ir_swizzle *swizzle_xxxx(operand a); -ir_swizzle *swizzle_yyyy(operand a); -ir_swizzle *swizzle_zzzz(operand a); -ir_swizzle *swizzle_wwww(operand a); -ir_swizzle *swizzle_x(operand a); -ir_swizzle *swizzle_y(operand a); -ir_swizzle *swizzle_z(operand a); -ir_swizzle *swizzle_w(operand a); -ir_swizzle *swizzle_xy(operand a); -ir_swizzle *swizzle_xyz(operand a); -ir_swizzle *swizzle_xyzw(operand a); - -ir_if *if_tree(operand condition, - ir_instruction *then_branch); -ir_if *if_tree(operand condition, - ir_instruction *then_branch, - ir_instruction *else_branch); - -} /* namespace ir_builder */ diff --git a/src/glsl/ir_clone.cpp b/src/glsl/ir_clone.cpp deleted file mode 100644 index 0965b0d3719..00000000000 --- a/src/glsl/ir_clone.cpp +++ /dev/null @@ -1,440 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include "main/compiler.h" -#include "ir.h" -#include "compiler/glsl_types.h" -#include "program/hash_table.h" - -ir_rvalue * -ir_rvalue::clone(void *mem_ctx, struct hash_table *) const -{ - /* The only possible instantiation is the generic error value. */ - return error_value(mem_ctx); -} - -/** - * Duplicate an IR variable - */ -ir_variable * -ir_variable::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_variable *var = new(mem_ctx) ir_variable(this->type, this->name, - (ir_variable_mode) this->data.mode); - - var->data.max_array_access = this->data.max_array_access; - if (this->is_interface_instance()) { - var->u.max_ifc_array_access = - rzalloc_array(var, unsigned, this->interface_type->length); - memcpy(var->u.max_ifc_array_access, this->u.max_ifc_array_access, - this->interface_type->length * sizeof(unsigned)); - } - - memcpy(&var->data, &this->data, sizeof(var->data)); - - if (this->get_state_slots()) { - ir_state_slot *s = var->allocate_state_slots(this->get_num_state_slots()); - memcpy(s, this->get_state_slots(), - sizeof(s[0]) * var->get_num_state_slots()); - } - - if (this->constant_value) - var->constant_value = this->constant_value->clone(mem_ctx, ht); - - if (this->constant_initializer) - var->constant_initializer = - this->constant_initializer->clone(mem_ctx, ht); - - var->interface_type = this->interface_type; - - if (ht) { - hash_table_insert(ht, var, (void *)const_cast(this)); - } - - return var; -} - -ir_swizzle * -ir_swizzle::clone(void *mem_ctx, struct hash_table *ht) const -{ - return new(mem_ctx) ir_swizzle(this->val->clone(mem_ctx, ht), this->mask); -} - -ir_return * -ir_return::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_rvalue *new_value = NULL; - - if (this->value) - new_value = this->value->clone(mem_ctx, ht); - - return new(mem_ctx) ir_return(new_value); -} - -ir_discard * -ir_discard::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_rvalue *new_condition = NULL; - - if (this->condition != NULL) - new_condition = this->condition->clone(mem_ctx, ht); - - return new(mem_ctx) ir_discard(new_condition); -} - -ir_loop_jump * -ir_loop_jump::clone(void *mem_ctx, struct hash_table *ht) const -{ - (void)ht; - - return new(mem_ctx) ir_loop_jump(this->mode); -} - -ir_if * -ir_if::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_if *new_if = new(mem_ctx) ir_if(this->condition->clone(mem_ctx, ht)); - - foreach_in_list(ir_instruction, ir, &this->then_instructions) { - new_if->then_instructions.push_tail(ir->clone(mem_ctx, ht)); - } - - foreach_in_list(ir_instruction, ir, &this->else_instructions) { - new_if->else_instructions.push_tail(ir->clone(mem_ctx, ht)); - } - - return new_if; -} - -ir_loop * -ir_loop::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_loop *new_loop = new(mem_ctx) ir_loop(); - - foreach_in_list(ir_instruction, ir, &this->body_instructions) { - new_loop->body_instructions.push_tail(ir->clone(mem_ctx, ht)); - } - - return new_loop; -} - -ir_call * -ir_call::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_dereference_variable *new_return_ref = NULL; - if (this->return_deref != NULL) - new_return_ref = this->return_deref->clone(mem_ctx, ht); - - exec_list new_parameters; - - foreach_in_list(ir_instruction, ir, &this->actual_parameters) { - new_parameters.push_tail(ir->clone(mem_ctx, ht)); - } - - return new(mem_ctx) ir_call(this->callee, new_return_ref, &new_parameters); -} - -ir_expression * -ir_expression::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_rvalue *op[ARRAY_SIZE(this->operands)] = { NULL, }; - unsigned int i; - - for (i = 0; i < get_num_operands(); i++) { - op[i] = this->operands[i]->clone(mem_ctx, ht); - } - - return new(mem_ctx) ir_expression(this->operation, this->type, - op[0], op[1], op[2], op[3]); -} - -ir_dereference_variable * -ir_dereference_variable::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_variable *new_var; - - if (ht) { - new_var = (ir_variable *)hash_table_find(ht, this->var); - if (!new_var) - new_var = this->var; - } else { - new_var = this->var; - } - - return new(mem_ctx) ir_dereference_variable(new_var); -} - -ir_dereference_array * -ir_dereference_array::clone(void *mem_ctx, struct hash_table *ht) const -{ - return new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, ht), - this->array_index->clone(mem_ctx, - ht)); -} - -ir_dereference_record * -ir_dereference_record::clone(void *mem_ctx, struct hash_table *ht) const -{ - return new(mem_ctx) ir_dereference_record(this->record->clone(mem_ctx, ht), - this->field); -} - -ir_texture * -ir_texture::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_texture *new_tex = new(mem_ctx) ir_texture(this->op); - new_tex->type = this->type; - - new_tex->sampler = this->sampler->clone(mem_ctx, ht); - if (this->coordinate) - new_tex->coordinate = this->coordinate->clone(mem_ctx, ht); - if (this->projector) - new_tex->projector = this->projector->clone(mem_ctx, ht); - if (this->shadow_comparitor) { - new_tex->shadow_comparitor = this->shadow_comparitor->clone(mem_ctx, ht); - } - - if (this->offset != NULL) - new_tex->offset = this->offset->clone(mem_ctx, ht); - - switch (this->op) { - case ir_tex: - case ir_lod: - case ir_query_levels: - case ir_texture_samples: - case ir_samples_identical: - break; - case ir_txb: - new_tex->lod_info.bias = this->lod_info.bias->clone(mem_ctx, ht); - break; - case ir_txl: - case ir_txf: - case ir_txs: - new_tex->lod_info.lod = this->lod_info.lod->clone(mem_ctx, ht); - break; - case ir_txf_ms: - new_tex->lod_info.sample_index = this->lod_info.sample_index->clone(mem_ctx, ht); - break; - case ir_txd: - new_tex->lod_info.grad.dPdx = this->lod_info.grad.dPdx->clone(mem_ctx, ht); - new_tex->lod_info.grad.dPdy = this->lod_info.grad.dPdy->clone(mem_ctx, ht); - break; - case ir_tg4: - new_tex->lod_info.component = this->lod_info.component->clone(mem_ctx, ht); - break; - } - - return new_tex; -} - -ir_assignment * -ir_assignment::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_rvalue *new_condition = NULL; - - if (this->condition) - new_condition = this->condition->clone(mem_ctx, ht); - - ir_assignment *cloned = - new(mem_ctx) ir_assignment(this->lhs->clone(mem_ctx, ht), - this->rhs->clone(mem_ctx, ht), - new_condition); - cloned->write_mask = this->write_mask; - return cloned; -} - -ir_function * -ir_function::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_function *copy = new(mem_ctx) ir_function(this->name); - - copy->is_subroutine = this->is_subroutine; - copy->subroutine_index = this->subroutine_index; - copy->num_subroutine_types = this->num_subroutine_types; - copy->subroutine_types = ralloc_array(mem_ctx, const struct glsl_type *, copy->num_subroutine_types); - for (int i = 0; i < copy->num_subroutine_types; i++) - copy->subroutine_types[i] = this->subroutine_types[i]; - - foreach_in_list(const ir_function_signature, sig, &this->signatures) { - ir_function_signature *sig_copy = sig->clone(mem_ctx, ht); - copy->add_signature(sig_copy); - - if (ht != NULL) - hash_table_insert(ht, sig_copy, - (void *)const_cast(sig)); - } - - return copy; -} - -ir_function_signature * -ir_function_signature::clone(void *mem_ctx, struct hash_table *ht) const -{ - ir_function_signature *copy = this->clone_prototype(mem_ctx, ht); - - copy->is_defined = this->is_defined; - - /* Clone the instruction list. - */ - foreach_in_list(const ir_instruction, inst, &this->body) { - ir_instruction *const inst_copy = inst->clone(mem_ctx, ht); - copy->body.push_tail(inst_copy); - } - - return copy; -} - -ir_function_signature * -ir_function_signature::clone_prototype(void *mem_ctx, struct hash_table *ht) const -{ - ir_function_signature *copy = - new(mem_ctx) ir_function_signature(this->return_type); - - copy->is_defined = false; - copy->builtin_avail = this->builtin_avail; - copy->origin = this; - - /* Clone the parameter list, but NOT the body. - */ - foreach_in_list(const ir_variable, param, &this->parameters) { - assert(const_cast(param)->as_variable() != NULL); - - ir_variable *const param_copy = param->clone(mem_ctx, ht); - copy->parameters.push_tail(param_copy); - } - - return copy; -} - -ir_constant * -ir_constant::clone(void *mem_ctx, struct hash_table *ht) const -{ - (void)ht; - - switch (this->type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_FLOAT: - case GLSL_TYPE_DOUBLE: - case GLSL_TYPE_BOOL: - return new(mem_ctx) ir_constant(this->type, &this->value); - - case GLSL_TYPE_STRUCT: { - ir_constant *c = new(mem_ctx) ir_constant; - - c->type = this->type; - for (exec_node *node = this->components.head - ; !node->is_tail_sentinel() - ; node = node->next) { - ir_constant *const orig = (ir_constant *) node; - - c->components.push_tail(orig->clone(mem_ctx, NULL)); - } - - return c; - } - - case GLSL_TYPE_ARRAY: { - ir_constant *c = new(mem_ctx) ir_constant; - - c->type = this->type; - c->array_elements = ralloc_array(c, ir_constant *, this->type->length); - for (unsigned i = 0; i < this->type->length; i++) { - c->array_elements[i] = this->array_elements[i]->clone(mem_ctx, NULL); - } - return c; - } - - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_SUBROUTINE: - case GLSL_TYPE_INTERFACE: - assert(!"Should not get here."); - break; - } - - return NULL; -} - - -class fixup_ir_call_visitor : public ir_hierarchical_visitor { -public: - fixup_ir_call_visitor(struct hash_table *ht) - { - this->ht = ht; - } - - virtual ir_visitor_status visit_enter(ir_call *ir) - { - /* Try to find the function signature referenced by the ir_call in the - * table. If it is found, replace it with the value from the table. - */ - ir_function_signature *sig = - (ir_function_signature *) hash_table_find(this->ht, ir->callee); - if (sig != NULL) - ir->callee = sig; - - /* Since this may be used before function call parameters are flattened, - * the children also need to be processed. - */ - return visit_continue; - } - -private: - struct hash_table *ht; -}; - - -static void -fixup_function_calls(struct hash_table *ht, exec_list *instructions) -{ - fixup_ir_call_visitor v(ht); - v.run(instructions); -} - - -void -clone_ir_list(void *mem_ctx, exec_list *out, const exec_list *in) -{ - struct hash_table *ht = - hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); - - foreach_in_list(const ir_instruction, original, in) { - ir_instruction *copy = original->clone(mem_ctx, ht); - - out->push_tail(copy); - } - - /* Make a pass over the cloned tree to fix up ir_call nodes to point to the - * cloned ir_function_signature nodes. This cannot be done automatically - * during cloning because the ir_call might be a forward reference (i.e., - * the function signature that it references may not have been cloned yet). - */ - fixup_function_calls(ht, out); - - hash_table_dtor(ht); -} diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp deleted file mode 100644 index fbbf7794da6..00000000000 --- a/src/glsl/ir_constant_expression.cpp +++ /dev/null @@ -1,2092 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_constant_expression.cpp - * Evaluate and process constant valued expressions - * - * In GLSL, constant valued expressions are used in several places. These - * must be processed and evaluated very early in the compilation process. - * - * * Sizes of arrays - * * Initializers for uniforms - * * Initializers for \c const variables - */ - -#include -#include "main/core.h" /* for MAX2, MIN2, CLAMP */ -#include "util/rounding.h" /* for _mesa_roundeven */ -#include "util/half_float.h" -#include "ir.h" -#include "compiler/glsl_types.h" -#include "program/hash_table.h" - -static float -dot_f(ir_constant *op0, ir_constant *op1) -{ - assert(op0->type->is_float() && op1->type->is_float()); - - float result = 0; - for (unsigned c = 0; c < op0->type->components(); c++) - result += op0->value.f[c] * op1->value.f[c]; - - return result; -} - -static double -dot_d(ir_constant *op0, ir_constant *op1) -{ - assert(op0->type->is_double() && op1->type->is_double()); - - double result = 0; - for (unsigned c = 0; c < op0->type->components(); c++) - result += op0->value.d[c] * op1->value.d[c]; - - return result; -} - -/* This method is the only one supported by gcc. Unions in particular - * are iffy, and read-through-converted-pointer is killed by strict - * aliasing. OTOH, the compiler sees through the memcpy, so the - * resulting asm is reasonable. - */ -static float -bitcast_u2f(unsigned int u) -{ - assert(sizeof(float) == sizeof(unsigned int)); - float f; - memcpy(&f, &u, sizeof(f)); - return f; -} - -static unsigned int -bitcast_f2u(float f) -{ - assert(sizeof(float) == sizeof(unsigned int)); - unsigned int u; - memcpy(&u, &f, sizeof(f)); - return u; -} - -/** - * Evaluate one component of a floating-point 4x8 unpacking function. - */ -typedef uint8_t -(*pack_1x8_func_t)(float); - -/** - * Evaluate one component of a floating-point 2x16 unpacking function. - */ -typedef uint16_t -(*pack_1x16_func_t)(float); - -/** - * Evaluate one component of a floating-point 4x8 unpacking function. - */ -typedef float -(*unpack_1x8_func_t)(uint8_t); - -/** - * Evaluate one component of a floating-point 2x16 unpacking function. - */ -typedef float -(*unpack_1x16_func_t)(uint16_t); - -/** - * Evaluate a 2x16 floating-point packing function. - */ -static uint32_t -pack_2x16(pack_1x16_func_t pack_1x16, - float x, float y) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * packSnorm2x16 - * ------------- - * The first component of the vector will be written to the least - * significant bits of the output; the last component will be written to - * the most significant bits. - * - * The specifications for the other packing functions contain similar - * language. - */ - uint32_t u = 0; - u |= ((uint32_t) pack_1x16(x) << 0); - u |= ((uint32_t) pack_1x16(y) << 16); - return u; -} - -/** - * Evaluate a 4x8 floating-point packing function. - */ -static uint32_t -pack_4x8(pack_1x8_func_t pack_1x8, - float x, float y, float z, float w) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * packSnorm4x8 - * ------------ - * The first component of the vector will be written to the least - * significant bits of the output; the last component will be written to - * the most significant bits. - * - * The specifications for the other packing functions contain similar - * language. - */ - uint32_t u = 0; - u |= ((uint32_t) pack_1x8(x) << 0); - u |= ((uint32_t) pack_1x8(y) << 8); - u |= ((uint32_t) pack_1x8(z) << 16); - u |= ((uint32_t) pack_1x8(w) << 24); - return u; -} - -/** - * Evaluate a 2x16 floating-point unpacking function. - */ -static void -unpack_2x16(unpack_1x16_func_t unpack_1x16, - uint32_t u, - float *x, float *y) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * unpackSnorm2x16 - * --------------- - * The first component of the returned vector will be extracted from - * the least significant bits of the input; the last component will be - * extracted from the most significant bits. - * - * The specifications for the other unpacking functions contain similar - * language. - */ - *x = unpack_1x16((uint16_t) (u & 0xffff)); - *y = unpack_1x16((uint16_t) (u >> 16)); -} - -/** - * Evaluate a 4x8 floating-point unpacking function. - */ -static void -unpack_4x8(unpack_1x8_func_t unpack_1x8, uint32_t u, - float *x, float *y, float *z, float *w) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * unpackSnorm4x8 - * -------------- - * The first component of the returned vector will be extracted from - * the least significant bits of the input; the last component will be - * extracted from the most significant bits. - * - * The specifications for the other unpacking functions contain similar - * language. - */ - *x = unpack_1x8((uint8_t) (u & 0xff)); - *y = unpack_1x8((uint8_t) (u >> 8)); - *z = unpack_1x8((uint8_t) (u >> 16)); - *w = unpack_1x8((uint8_t) (u >> 24)); -} - -/** - * Evaluate one component of packSnorm4x8. - */ -static uint8_t -pack_snorm_1x8(float x) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * packSnorm4x8 - * ------------ - * The conversion for component c of v to fixed point is done as - * follows: - * - * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) - */ - return (uint8_t) - _mesa_lroundevenf(CLAMP(x, -1.0f, +1.0f) * 127.0f); -} - -/** - * Evaluate one component of packSnorm2x16. - */ -static uint16_t -pack_snorm_1x16(float x) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * packSnorm2x16 - * ------------- - * The conversion for component c of v to fixed point is done as - * follows: - * - * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) - */ - return (uint16_t) - _mesa_lroundevenf(CLAMP(x, -1.0f, +1.0f) * 32767.0f); -} - -/** - * Evaluate one component of unpackSnorm4x8. - */ -static float -unpack_snorm_1x8(uint8_t u) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * unpackSnorm4x8 - * -------------- - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackSnorm4x8: clamp(f / 127.0, -1, +1) - */ - return CLAMP((int8_t) u / 127.0f, -1.0f, +1.0f); -} - -/** - * Evaluate one component of unpackSnorm2x16. - */ -static float -unpack_snorm_1x16(uint16_t u) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * unpackSnorm2x16 - * --------------- - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackSnorm2x16: clamp(f / 32767.0, -1, +1) - */ - return CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f); -} - -/** - * Evaluate one component packUnorm4x8. - */ -static uint8_t -pack_unorm_1x8(float x) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * packUnorm4x8 - * ------------ - * The conversion for component c of v to fixed point is done as - * follows: - * - * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) - */ - return (uint8_t) (int) _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 255.0f); -} - -/** - * Evaluate one component packUnorm2x16. - */ -static uint16_t -pack_unorm_1x16(float x) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * packUnorm2x16 - * ------------- - * The conversion for component c of v to fixed point is done as - * follows: - * - * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) - */ - return (uint16_t) (int) - _mesa_roundevenf(CLAMP(x, 0.0f, 1.0f) * 65535.0f); -} - -/** - * Evaluate one component of unpackUnorm4x8. - */ -static float -unpack_unorm_1x8(uint8_t u) -{ - /* From section 8.4 of the GLSL 4.30 spec: - * - * unpackUnorm4x8 - * -------------- - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackUnorm4x8: f / 255.0 - */ - return (float) u / 255.0f; -} - -/** - * Evaluate one component of unpackUnorm2x16. - */ -static float -unpack_unorm_1x16(uint16_t u) -{ - /* From section 8.4 of the GLSL ES 3.00 spec: - * - * unpackUnorm2x16 - * --------------- - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackUnorm2x16: f / 65535.0 - */ - return (float) u / 65535.0f; -} - -/** - * Evaluate one component of packHalf2x16. - */ -static uint16_t -pack_half_1x16(float x) -{ - return _mesa_float_to_half(x); -} - -/** - * Evaluate one component of unpackHalf2x16. - */ -static float -unpack_half_1x16(uint16_t u) -{ - return _mesa_half_to_float(u); -} - -/** - * Get the constant that is ultimately referenced by an r-value, in a constant - * expression evaluation context. - * - * The offset is used when the reference is to a specific column of a matrix. - */ -static bool -constant_referenced(const ir_dereference *deref, - struct hash_table *variable_context, - ir_constant *&store, int &offset) -{ - store = NULL; - offset = 0; - - if (variable_context == NULL) - return false; - - switch (deref->ir_type) { - case ir_type_dereference_array: { - const ir_dereference_array *const da = - (const ir_dereference_array *) deref; - - ir_constant *const index_c = - da->array_index->constant_expression_value(variable_context); - - if (!index_c || !index_c->type->is_scalar() || !index_c->type->is_integer()) - break; - - const int index = index_c->type->base_type == GLSL_TYPE_INT ? - index_c->get_int_component(0) : - index_c->get_uint_component(0); - - ir_constant *substore; - int suboffset; - - const ir_dereference *const deref = da->array->as_dereference(); - if (!deref) - break; - - if (!constant_referenced(deref, variable_context, substore, suboffset)) - break; - - const glsl_type *const vt = da->array->type; - if (vt->is_array()) { - store = substore->get_array_element(index); - offset = 0; - } else if (vt->is_matrix()) { - store = substore; - offset = index * vt->vector_elements; - } else if (vt->is_vector()) { - store = substore; - offset = suboffset + index; - } - - break; - } - - case ir_type_dereference_record: { - const ir_dereference_record *const dr = - (const ir_dereference_record *) deref; - - const ir_dereference *const deref = dr->record->as_dereference(); - if (!deref) - break; - - ir_constant *substore; - int suboffset; - - if (!constant_referenced(deref, variable_context, substore, suboffset)) - break; - - /* Since we're dropping it on the floor... - */ - assert(suboffset == 0); - - store = substore->get_record_field(dr->field); - break; - } - - case ir_type_dereference_variable: { - const ir_dereference_variable *const dv = - (const ir_dereference_variable *) deref; - - store = (ir_constant *) hash_table_find(variable_context, dv->var); - break; - } - - default: - assert(!"Should not get here."); - break; - } - - return store != NULL; -} - - -ir_constant * -ir_rvalue::constant_expression_value(struct hash_table *) -{ - assert(this->type->is_error()); - return NULL; -} - -ir_constant * -ir_expression::constant_expression_value(struct hash_table *variable_context) -{ - if (this->type->is_error()) - return NULL; - - ir_constant *op[ARRAY_SIZE(this->operands)] = { NULL, }; - ir_constant_data data; - - memset(&data, 0, sizeof(data)); - - for (unsigned operand = 0; operand < this->get_num_operands(); operand++) { - op[operand] = this->operands[operand]->constant_expression_value(variable_context); - if (!op[operand]) - return NULL; - } - - if (op[1] != NULL) - switch (this->operation) { - case ir_binop_lshift: - case ir_binop_rshift: - case ir_binop_ldexp: - case ir_binop_interpolate_at_offset: - case ir_binop_interpolate_at_sample: - case ir_binop_vector_extract: - case ir_triop_csel: - case ir_triop_bitfield_extract: - break; - - default: - assert(op[0]->type->base_type == op[1]->type->base_type); - break; - } - - bool op0_scalar = op[0]->type->is_scalar(); - bool op1_scalar = op[1] != NULL && op[1]->type->is_scalar(); - - /* When iterating over a vector or matrix's components, we want to increase - * the loop counter. However, for scalars, we want to stay at 0. - */ - unsigned c0_inc = op0_scalar ? 0 : 1; - unsigned c1_inc = op1_scalar ? 0 : 1; - unsigned components; - if (op1_scalar || !op[1]) { - components = op[0]->type->components(); - } else { - components = op[1]->type->components(); - } - - void *ctx = ralloc_parent(this); - - /* Handle array operations here, rather than below. */ - if (op[0]->type->is_array()) { - assert(op[1] != NULL && op[1]->type->is_array()); - switch (this->operation) { - case ir_binop_all_equal: - return new(ctx) ir_constant(op[0]->has_value(op[1])); - case ir_binop_any_nequal: - return new(ctx) ir_constant(!op[0]->has_value(op[1])); - default: - break; - } - return NULL; - } - - switch (this->operation) { - case ir_unop_bit_not: - switch (op[0]->type->base_type) { - case GLSL_TYPE_INT: - for (unsigned c = 0; c < components; c++) - data.i[c] = ~ op[0]->value.i[c]; - break; - case GLSL_TYPE_UINT: - for (unsigned c = 0; c < components; c++) - data.u[c] = ~ op[0]->value.u[c]; - break; - default: - assert(0); - } - break; - - case ir_unop_logic_not: - assert(op[0]->type->base_type == GLSL_TYPE_BOOL); - for (unsigned c = 0; c < op[0]->type->components(); c++) - data.b[c] = !op[0]->value.b[c]; - break; - - case ir_unop_f2i: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.i[c] = (int) op[0]->value.f[c]; - } - break; - case ir_unop_f2u: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.i[c] = (unsigned) op[0]->value.f[c]; - } - break; - case ir_unop_i2f: - assert(op[0]->type->base_type == GLSL_TYPE_INT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = (float) op[0]->value.i[c]; - } - break; - case ir_unop_u2f: - assert(op[0]->type->base_type == GLSL_TYPE_UINT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = (float) op[0]->value.u[c]; - } - break; - case ir_unop_b2f: - assert(op[0]->type->base_type == GLSL_TYPE_BOOL); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = op[0]->value.b[c] ? 1.0F : 0.0F; - } - break; - case ir_unop_f2b: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.b[c] = op[0]->value.f[c] != 0.0F ? true : false; - } - break; - case ir_unop_b2i: - assert(op[0]->type->base_type == GLSL_TYPE_BOOL); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.u[c] = op[0]->value.b[c] ? 1 : 0; - } - break; - case ir_unop_i2b: - assert(op[0]->type->is_integer()); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.b[c] = op[0]->value.u[c] ? true : false; - } - break; - case ir_unop_u2i: - assert(op[0]->type->base_type == GLSL_TYPE_UINT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.i[c] = op[0]->value.u[c]; - } - break; - case ir_unop_i2u: - assert(op[0]->type->base_type == GLSL_TYPE_INT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.u[c] = op[0]->value.i[c]; - } - break; - case ir_unop_bitcast_i2f: - assert(op[0]->type->base_type == GLSL_TYPE_INT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = bitcast_u2f(op[0]->value.i[c]); - } - break; - case ir_unop_bitcast_f2i: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.i[c] = bitcast_f2u(op[0]->value.f[c]); - } - break; - case ir_unop_bitcast_u2f: - assert(op[0]->type->base_type == GLSL_TYPE_UINT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = bitcast_u2f(op[0]->value.u[c]); - } - break; - case ir_unop_bitcast_f2u: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.u[c] = bitcast_f2u(op[0]->value.f[c]); - } - break; - case ir_unop_d2f: - assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = op[0]->value.d[c]; - } - break; - case ir_unop_f2d: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.d[c] = op[0]->value.f[c]; - } - break; - case ir_unop_d2i: - assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.i[c] = op[0]->value.d[c]; - } - break; - case ir_unop_i2d: - assert(op[0]->type->base_type == GLSL_TYPE_INT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.d[c] = op[0]->value.i[c]; - } - break; - case ir_unop_d2u: - assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.u[c] = op[0]->value.d[c]; - } - break; - case ir_unop_u2d: - assert(op[0]->type->base_type == GLSL_TYPE_UINT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.d[c] = op[0]->value.u[c]; - } - break; - case ir_unop_d2b: - assert(op[0]->type->base_type == GLSL_TYPE_DOUBLE); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.b[c] = op[0]->value.d[c] != 0.0; - } - break; - case ir_unop_trunc: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[c] = trunc(op[0]->value.d[c]); - else - data.f[c] = truncf(op[0]->value.f[c]); - } - break; - - case ir_unop_round_even: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[c] = _mesa_roundeven(op[0]->value.d[c]); - else - data.f[c] = _mesa_roundevenf(op[0]->value.f[c]); - } - break; - - case ir_unop_ceil: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[c] = ceil(op[0]->value.d[c]); - else - data.f[c] = ceilf(op[0]->value.f[c]); - } - break; - - case ir_unop_floor: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[c] = floor(op[0]->value.d[c]); - else - data.f[c] = floorf(op[0]->value.f[c]); - } - break; - - case ir_unop_fract: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - switch (this->type->base_type) { - case GLSL_TYPE_UINT: - data.u[c] = 0; - break; - case GLSL_TYPE_INT: - data.i[c] = 0; - break; - case GLSL_TYPE_FLOAT: - data.f[c] = op[0]->value.f[c] - floor(op[0]->value.f[c]); - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = op[0]->value.d[c] - floor(op[0]->value.d[c]); - break; - default: - assert(0); - } - } - break; - - case ir_unop_sin: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = sinf(op[0]->value.f[c]); - } - break; - - case ir_unop_cos: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = cosf(op[0]->value.f[c]); - } - break; - - case ir_unop_neg: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - switch (this->type->base_type) { - case GLSL_TYPE_UINT: - data.u[c] = -((int) op[0]->value.u[c]); - break; - case GLSL_TYPE_INT: - data.i[c] = -op[0]->value.i[c]; - break; - case GLSL_TYPE_FLOAT: - data.f[c] = -op[0]->value.f[c]; - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = -op[0]->value.d[c]; - break; - default: - assert(0); - } - } - break; - - case ir_unop_abs: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - switch (this->type->base_type) { - case GLSL_TYPE_UINT: - data.u[c] = op[0]->value.u[c]; - break; - case GLSL_TYPE_INT: - data.i[c] = op[0]->value.i[c]; - if (data.i[c] < 0) - data.i[c] = -data.i[c]; - break; - case GLSL_TYPE_FLOAT: - data.f[c] = fabs(op[0]->value.f[c]); - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = fabs(op[0]->value.d[c]); - break; - default: - assert(0); - } - } - break; - - case ir_unop_sign: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - switch (this->type->base_type) { - case GLSL_TYPE_UINT: - data.u[c] = op[0]->value.i[c] > 0; - break; - case GLSL_TYPE_INT: - data.i[c] = (op[0]->value.i[c] > 0) - (op[0]->value.i[c] < 0); - break; - case GLSL_TYPE_FLOAT: - data.f[c] = float((op[0]->value.f[c] > 0)-(op[0]->value.f[c] < 0)); - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = double((op[0]->value.d[c] > 0)-(op[0]->value.d[c] < 0)); - break; - default: - assert(0); - } - } - break; - - case ir_unop_rcp: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - switch (this->type->base_type) { - case GLSL_TYPE_UINT: - if (op[0]->value.u[c] != 0.0) - data.u[c] = 1 / op[0]->value.u[c]; - break; - case GLSL_TYPE_INT: - if (op[0]->value.i[c] != 0.0) - data.i[c] = 1 / op[0]->value.i[c]; - break; - case GLSL_TYPE_FLOAT: - if (op[0]->value.f[c] != 0.0) - data.f[c] = 1.0F / op[0]->value.f[c]; - break; - case GLSL_TYPE_DOUBLE: - if (op[0]->value.d[c] != 0.0) - data.d[c] = 1.0 / op[0]->value.d[c]; - break; - default: - assert(0); - } - } - break; - - case ir_unop_rsq: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[c] = 1.0 / sqrt(op[0]->value.d[c]); - else - data.f[c] = 1.0F / sqrtf(op[0]->value.f[c]); - } - break; - - case ir_unop_sqrt: - for (unsigned c = 0; c < op[0]->type->components(); c++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[c] = sqrt(op[0]->value.d[c]); - else - data.f[c] = sqrtf(op[0]->value.f[c]); - } - break; - - case ir_unop_exp: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = expf(op[0]->value.f[c]); - } - break; - - case ir_unop_exp2: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = exp2f(op[0]->value.f[c]); - } - break; - - case ir_unop_log: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = logf(op[0]->value.f[c]); - } - break; - - case ir_unop_log2: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = log2f(op[0]->value.f[c]); - } - break; - - case ir_unop_dFdx: - case ir_unop_dFdx_coarse: - case ir_unop_dFdx_fine: - case ir_unop_dFdy: - case ir_unop_dFdy_coarse: - case ir_unop_dFdy_fine: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = 0.0; - } - break; - - case ir_unop_pack_snorm_2x16: - assert(op[0]->type == glsl_type::vec2_type); - data.u[0] = pack_2x16(pack_snorm_1x16, - op[0]->value.f[0], - op[0]->value.f[1]); - break; - case ir_unop_pack_snorm_4x8: - assert(op[0]->type == glsl_type::vec4_type); - data.u[0] = pack_4x8(pack_snorm_1x8, - op[0]->value.f[0], - op[0]->value.f[1], - op[0]->value.f[2], - op[0]->value.f[3]); - break; - case ir_unop_unpack_snorm_2x16: - assert(op[0]->type == glsl_type::uint_type); - unpack_2x16(unpack_snorm_1x16, - op[0]->value.u[0], - &data.f[0], &data.f[1]); - break; - case ir_unop_unpack_snorm_4x8: - assert(op[0]->type == glsl_type::uint_type); - unpack_4x8(unpack_snorm_1x8, - op[0]->value.u[0], - &data.f[0], &data.f[1], &data.f[2], &data.f[3]); - break; - case ir_unop_pack_unorm_2x16: - assert(op[0]->type == glsl_type::vec2_type); - data.u[0] = pack_2x16(pack_unorm_1x16, - op[0]->value.f[0], - op[0]->value.f[1]); - break; - case ir_unop_pack_unorm_4x8: - assert(op[0]->type == glsl_type::vec4_type); - data.u[0] = pack_4x8(pack_unorm_1x8, - op[0]->value.f[0], - op[0]->value.f[1], - op[0]->value.f[2], - op[0]->value.f[3]); - break; - case ir_unop_unpack_unorm_2x16: - assert(op[0]->type == glsl_type::uint_type); - unpack_2x16(unpack_unorm_1x16, - op[0]->value.u[0], - &data.f[0], &data.f[1]); - break; - case ir_unop_unpack_unorm_4x8: - assert(op[0]->type == glsl_type::uint_type); - unpack_4x8(unpack_unorm_1x8, - op[0]->value.u[0], - &data.f[0], &data.f[1], &data.f[2], &data.f[3]); - break; - case ir_unop_pack_half_2x16: - assert(op[0]->type == glsl_type::vec2_type); - data.u[0] = pack_2x16(pack_half_1x16, - op[0]->value.f[0], - op[0]->value.f[1]); - break; - case ir_unop_unpack_half_2x16: - assert(op[0]->type == glsl_type::uint_type); - unpack_2x16(unpack_half_1x16, - op[0]->value.u[0], - &data.f[0], &data.f[1]); - break; - case ir_binop_pow: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - data.f[c] = powf(op[0]->value.f[c], op[1]->value.f[c]); - } - break; - - case ir_binop_dot: - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[0] = dot_d(op[0], op[1]); - else - data.f[0] = dot_f(op[0], op[1]); - break; - - case ir_binop_min: - assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.u[c] = MIN2(op[0]->value.u[c0], op[1]->value.u[c1]); - break; - case GLSL_TYPE_INT: - data.i[c] = MIN2(op[0]->value.i[c0], op[1]->value.i[c1]); - break; - case GLSL_TYPE_FLOAT: - data.f[c] = MIN2(op[0]->value.f[c0], op[1]->value.f[c1]); - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = MIN2(op[0]->value.d[c0], op[1]->value.d[c1]); - break; - default: - assert(0); - } - } - - break; - case ir_binop_max: - assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.u[c] = MAX2(op[0]->value.u[c0], op[1]->value.u[c1]); - break; - case GLSL_TYPE_INT: - data.i[c] = MAX2(op[0]->value.i[c0], op[1]->value.i[c1]); - break; - case GLSL_TYPE_FLOAT: - data.f[c] = MAX2(op[0]->value.f[c0], op[1]->value.f[c1]); - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = MAX2(op[0]->value.d[c0], op[1]->value.d[c1]); - break; - default: - assert(0); - } - } - break; - - case ir_binop_add: - assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.u[c] = op[0]->value.u[c0] + op[1]->value.u[c1]; - break; - case GLSL_TYPE_INT: - data.i[c] = op[0]->value.i[c0] + op[1]->value.i[c1]; - break; - case GLSL_TYPE_FLOAT: - data.f[c] = op[0]->value.f[c0] + op[1]->value.f[c1]; - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = op[0]->value.d[c0] + op[1]->value.d[c1]; - break; - default: - assert(0); - } - } - - break; - case ir_binop_sub: - assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.u[c] = op[0]->value.u[c0] - op[1]->value.u[c1]; - break; - case GLSL_TYPE_INT: - data.i[c] = op[0]->value.i[c0] - op[1]->value.i[c1]; - break; - case GLSL_TYPE_FLOAT: - data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1]; - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1]; - break; - default: - assert(0); - } - } - - break; - case ir_binop_mul: - /* Check for equal types, or unequal types involving scalars */ - if ((op[0]->type == op[1]->type && !op[0]->type->is_matrix()) - || op0_scalar || op1_scalar) { - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.u[c] = op[0]->value.u[c0] * op[1]->value.u[c1]; - break; - case GLSL_TYPE_INT: - data.i[c] = op[0]->value.i[c0] * op[1]->value.i[c1]; - break; - case GLSL_TYPE_FLOAT: - data.f[c] = op[0]->value.f[c0] * op[1]->value.f[c1]; - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = op[0]->value.d[c0] * op[1]->value.d[c1]; - break; - default: - assert(0); - } - } - } else { - assert(op[0]->type->is_matrix() || op[1]->type->is_matrix()); - - /* Multiply an N-by-M matrix with an M-by-P matrix. Since either - * matrix can be a GLSL vector, either N or P can be 1. - * - * For vec*mat, the vector is treated as a row vector. This - * means the vector is a 1-row x M-column matrix. - * - * For mat*vec, the vector is treated as a column vector. Since - * matrix_columns is 1 for vectors, this just works. - */ - const unsigned n = op[0]->type->is_vector() - ? 1 : op[0]->type->vector_elements; - const unsigned m = op[1]->type->vector_elements; - const unsigned p = op[1]->type->matrix_columns; - for (unsigned j = 0; j < p; j++) { - for (unsigned i = 0; i < n; i++) { - for (unsigned k = 0; k < m; k++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[i+n*j] += op[0]->value.d[i+n*k]*op[1]->value.d[k+m*j]; - else - data.f[i+n*j] += op[0]->value.f[i+n*k]*op[1]->value.f[k+m*j]; - } - } - } - } - - break; - case ir_binop_div: - /* FINISHME: Emit warning when division-by-zero is detected. */ - assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - if (op[1]->value.u[c1] == 0) { - data.u[c] = 0; - } else { - data.u[c] = op[0]->value.u[c0] / op[1]->value.u[c1]; - } - break; - case GLSL_TYPE_INT: - if (op[1]->value.i[c1] == 0) { - data.i[c] = 0; - } else { - data.i[c] = op[0]->value.i[c0] / op[1]->value.i[c1]; - } - break; - case GLSL_TYPE_FLOAT: - data.f[c] = op[0]->value.f[c0] / op[1]->value.f[c1]; - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = op[0]->value.d[c0] / op[1]->value.d[c1]; - break; - default: - assert(0); - } - } - - break; - case ir_binop_mod: - /* FINISHME: Emit warning when division-by-zero is detected. */ - assert(op[0]->type == op[1]->type || op0_scalar || op1_scalar); - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - if (op[1]->value.u[c1] == 0) { - data.u[c] = 0; - } else { - data.u[c] = op[0]->value.u[c0] % op[1]->value.u[c1]; - } - break; - case GLSL_TYPE_INT: - if (op[1]->value.i[c1] == 0) { - data.i[c] = 0; - } else { - data.i[c] = op[0]->value.i[c0] % op[1]->value.i[c1]; - } - break; - case GLSL_TYPE_FLOAT: - /* We don't use fmod because it rounds toward zero; GLSL specifies - * the use of floor. - */ - data.f[c] = op[0]->value.f[c0] - op[1]->value.f[c1] - * floorf(op[0]->value.f[c0] / op[1]->value.f[c1]); - break; - case GLSL_TYPE_DOUBLE: - /* We don't use fmod because it rounds toward zero; GLSL specifies - * the use of floor. - */ - data.d[c] = op[0]->value.d[c0] - op[1]->value.d[c1] - * floor(op[0]->value.d[c0] / op[1]->value.d[c1]); - break; - default: - assert(0); - } - } - - break; - - case ir_binop_logic_and: - assert(op[0]->type->base_type == GLSL_TYPE_BOOL); - for (unsigned c = 0; c < op[0]->type->components(); c++) - data.b[c] = op[0]->value.b[c] && op[1]->value.b[c]; - break; - case ir_binop_logic_xor: - assert(op[0]->type->base_type == GLSL_TYPE_BOOL); - for (unsigned c = 0; c < op[0]->type->components(); c++) - data.b[c] = op[0]->value.b[c] ^ op[1]->value.b[c]; - break; - case ir_binop_logic_or: - assert(op[0]->type->base_type == GLSL_TYPE_BOOL); - for (unsigned c = 0; c < op[0]->type->components(); c++) - data.b[c] = op[0]->value.b[c] || op[1]->value.b[c]; - break; - - case ir_binop_less: - assert(op[0]->type == op[1]->type); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.b[c] = op[0]->value.u[c] < op[1]->value.u[c]; - break; - case GLSL_TYPE_INT: - data.b[c] = op[0]->value.i[c] < op[1]->value.i[c]; - break; - case GLSL_TYPE_FLOAT: - data.b[c] = op[0]->value.f[c] < op[1]->value.f[c]; - break; - case GLSL_TYPE_DOUBLE: - data.b[c] = op[0]->value.d[c] < op[1]->value.d[c]; - break; - default: - assert(0); - } - } - break; - case ir_binop_greater: - assert(op[0]->type == op[1]->type); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.b[c] = op[0]->value.u[c] > op[1]->value.u[c]; - break; - case GLSL_TYPE_INT: - data.b[c] = op[0]->value.i[c] > op[1]->value.i[c]; - break; - case GLSL_TYPE_FLOAT: - data.b[c] = op[0]->value.f[c] > op[1]->value.f[c]; - break; - case GLSL_TYPE_DOUBLE: - data.b[c] = op[0]->value.d[c] > op[1]->value.d[c]; - break; - default: - assert(0); - } - } - break; - case ir_binop_lequal: - assert(op[0]->type == op[1]->type); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.b[c] = op[0]->value.u[c] <= op[1]->value.u[c]; - break; - case GLSL_TYPE_INT: - data.b[c] = op[0]->value.i[c] <= op[1]->value.i[c]; - break; - case GLSL_TYPE_FLOAT: - data.b[c] = op[0]->value.f[c] <= op[1]->value.f[c]; - break; - case GLSL_TYPE_DOUBLE: - data.b[c] = op[0]->value.d[c] <= op[1]->value.d[c]; - break; - default: - assert(0); - } - } - break; - case ir_binop_gequal: - assert(op[0]->type == op[1]->type); - for (unsigned c = 0; c < op[0]->type->components(); c++) { - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.b[c] = op[0]->value.u[c] >= op[1]->value.u[c]; - break; - case GLSL_TYPE_INT: - data.b[c] = op[0]->value.i[c] >= op[1]->value.i[c]; - break; - case GLSL_TYPE_FLOAT: - data.b[c] = op[0]->value.f[c] >= op[1]->value.f[c]; - break; - case GLSL_TYPE_DOUBLE: - data.b[c] = op[0]->value.d[c] >= op[1]->value.d[c]; - break; - default: - assert(0); - } - } - break; - case ir_binop_equal: - assert(op[0]->type == op[1]->type); - for (unsigned c = 0; c < components; c++) { - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.b[c] = op[0]->value.u[c] == op[1]->value.u[c]; - break; - case GLSL_TYPE_INT: - data.b[c] = op[0]->value.i[c] == op[1]->value.i[c]; - break; - case GLSL_TYPE_FLOAT: - data.b[c] = op[0]->value.f[c] == op[1]->value.f[c]; - break; - case GLSL_TYPE_BOOL: - data.b[c] = op[0]->value.b[c] == op[1]->value.b[c]; - break; - case GLSL_TYPE_DOUBLE: - data.b[c] = op[0]->value.d[c] == op[1]->value.d[c]; - break; - default: - assert(0); - } - } - break; - case ir_binop_nequal: - assert(op[0]->type == op[1]->type); - for (unsigned c = 0; c < components; c++) { - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.b[c] = op[0]->value.u[c] != op[1]->value.u[c]; - break; - case GLSL_TYPE_INT: - data.b[c] = op[0]->value.i[c] != op[1]->value.i[c]; - break; - case GLSL_TYPE_FLOAT: - data.b[c] = op[0]->value.f[c] != op[1]->value.f[c]; - break; - case GLSL_TYPE_BOOL: - data.b[c] = op[0]->value.b[c] != op[1]->value.b[c]; - break; - case GLSL_TYPE_DOUBLE: - data.b[c] = op[0]->value.d[c] != op[1]->value.d[c]; - break; - default: - assert(0); - } - } - break; - case ir_binop_all_equal: - data.b[0] = op[0]->has_value(op[1]); - break; - case ir_binop_any_nequal: - data.b[0] = !op[0]->has_value(op[1]); - break; - - case ir_binop_lshift: - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - if (op[0]->type->base_type == GLSL_TYPE_INT && - op[1]->type->base_type == GLSL_TYPE_INT) { - data.i[c] = op[0]->value.i[c0] << op[1]->value.i[c1]; - - } else if (op[0]->type->base_type == GLSL_TYPE_INT && - op[1]->type->base_type == GLSL_TYPE_UINT) { - data.i[c] = op[0]->value.i[c0] << op[1]->value.u[c1]; - - } else if (op[0]->type->base_type == GLSL_TYPE_UINT && - op[1]->type->base_type == GLSL_TYPE_INT) { - data.u[c] = op[0]->value.u[c0] << op[1]->value.i[c1]; - - } else if (op[0]->type->base_type == GLSL_TYPE_UINT && - op[1]->type->base_type == GLSL_TYPE_UINT) { - data.u[c] = op[0]->value.u[c0] << op[1]->value.u[c1]; - } - } - break; - - case ir_binop_rshift: - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - if (op[0]->type->base_type == GLSL_TYPE_INT && - op[1]->type->base_type == GLSL_TYPE_INT) { - data.i[c] = op[0]->value.i[c0] >> op[1]->value.i[c1]; - - } else if (op[0]->type->base_type == GLSL_TYPE_INT && - op[1]->type->base_type == GLSL_TYPE_UINT) { - data.i[c] = op[0]->value.i[c0] >> op[1]->value.u[c1]; - - } else if (op[0]->type->base_type == GLSL_TYPE_UINT && - op[1]->type->base_type == GLSL_TYPE_INT) { - data.u[c] = op[0]->value.u[c0] >> op[1]->value.i[c1]; - - } else if (op[0]->type->base_type == GLSL_TYPE_UINT && - op[1]->type->base_type == GLSL_TYPE_UINT) { - data.u[c] = op[0]->value.u[c0] >> op[1]->value.u[c1]; - } - } - break; - - case ir_binop_bit_and: - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_INT: - data.i[c] = op[0]->value.i[c0] & op[1]->value.i[c1]; - break; - case GLSL_TYPE_UINT: - data.u[c] = op[0]->value.u[c0] & op[1]->value.u[c1]; - break; - default: - assert(0); - } - } - break; - - case ir_binop_bit_or: - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_INT: - data.i[c] = op[0]->value.i[c0] | op[1]->value.i[c1]; - break; - case GLSL_TYPE_UINT: - data.u[c] = op[0]->value.u[c0] | op[1]->value.u[c1]; - break; - default: - assert(0); - } - } - break; - - case ir_binop_vector_extract: { - const int c = CLAMP(op[1]->value.i[0], 0, - (int) op[0]->type->vector_elements - 1); - - switch (op[0]->type->base_type) { - case GLSL_TYPE_UINT: - data.u[0] = op[0]->value.u[c]; - break; - case GLSL_TYPE_INT: - data.i[0] = op[0]->value.i[c]; - break; - case GLSL_TYPE_FLOAT: - data.f[0] = op[0]->value.f[c]; - break; - case GLSL_TYPE_DOUBLE: - data.d[0] = op[0]->value.d[c]; - break; - case GLSL_TYPE_BOOL: - data.b[0] = op[0]->value.b[c]; - break; - default: - assert(0); - } - break; - } - - case ir_binop_bit_xor: - for (unsigned c = 0, c0 = 0, c1 = 0; - c < components; - c0 += c0_inc, c1 += c1_inc, c++) { - - switch (op[0]->type->base_type) { - case GLSL_TYPE_INT: - data.i[c] = op[0]->value.i[c0] ^ op[1]->value.i[c1]; - break; - case GLSL_TYPE_UINT: - data.u[c] = op[0]->value.u[c0] ^ op[1]->value.u[c1]; - break; - default: - assert(0); - } - } - break; - - case ir_unop_bitfield_reverse: - /* http://graphics.stanford.edu/~seander/bithacks.html#BitReverseObvious */ - for (unsigned c = 0; c < components; c++) { - unsigned int v = op[0]->value.u[c]; // input bits to be reversed - unsigned int r = v; // r will be reversed bits of v; first get LSB of v - int s = sizeof(v) * CHAR_BIT - 1; // extra shift needed at end - - for (v >>= 1; v; v >>= 1) { - r <<= 1; - r |= v & 1; - s--; - } - r <<= s; // shift when v's highest bits are zero - - data.u[c] = r; - } - break; - - case ir_unop_bit_count: - for (unsigned c = 0; c < components; c++) { - unsigned count = 0; - unsigned v = op[0]->value.u[c]; - - for (; v; count++) { - v &= v - 1; - } - data.u[c] = count; - } - break; - - case ir_unop_find_msb: - for (unsigned c = 0; c < components; c++) { - int v = op[0]->value.i[c]; - - if (v == 0 || (op[0]->type->base_type == GLSL_TYPE_INT && v == -1)) - data.i[c] = -1; - else { - int count = 0; - unsigned top_bit = op[0]->type->base_type == GLSL_TYPE_UINT - ? 0 : v & (1u << 31); - - while (((v & (1u << 31)) == top_bit) && count != 32) { - count++; - v <<= 1; - } - - data.i[c] = 31 - count; - } - } - break; - - case ir_unop_find_lsb: - for (unsigned c = 0; c < components; c++) { - if (op[0]->value.i[c] == 0) - data.i[c] = -1; - else { - unsigned pos = 0; - unsigned v = op[0]->value.u[c]; - - for (; !(v & 1); v >>= 1) { - pos++; - } - data.u[c] = pos; - } - } - break; - - case ir_unop_saturate: - for (unsigned c = 0; c < components; c++) { - data.f[c] = CLAMP(op[0]->value.f[c], 0.0f, 1.0f); - } - break; - case ir_unop_pack_double_2x32: { - /* XXX needs to be checked on big-endian */ - uint64_t temp; - temp = (uint64_t)op[0]->value.u[0] | ((uint64_t)op[0]->value.u[1] << 32); - data.d[0] = *(double *)&temp; - - break; - } - case ir_unop_unpack_double_2x32: - /* XXX needs to be checked on big-endian */ - data.u[0] = *(uint32_t *)&op[0]->value.d[0]; - data.u[1] = *((uint32_t *)&op[0]->value.d[0] + 1); - break; - - case ir_triop_bitfield_extract: { - for (unsigned c = 0; c < components; c++) { - int offset = op[1]->value.i[c]; - int bits = op[2]->value.i[c]; - - if (bits == 0) - data.u[c] = 0; - else if (offset < 0 || bits < 0) - data.u[c] = 0; /* Undefined, per spec. */ - else if (offset + bits > 32) - data.u[c] = 0; /* Undefined, per spec. */ - else { - if (op[0]->type->base_type == GLSL_TYPE_INT) { - /* int so that the right shift will sign-extend. */ - int value = op[0]->value.i[c]; - value <<= 32 - bits - offset; - value >>= 32 - bits; - data.i[c] = value; - } else { - unsigned value = op[0]->value.u[c]; - value <<= 32 - bits - offset; - value >>= 32 - bits; - data.u[c] = value; - } - } - } - break; - } - - case ir_binop_ldexp: - for (unsigned c = 0; c < components; c++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) { - data.d[c] = ldexp(op[0]->value.d[c], op[1]->value.i[c]); - /* Flush subnormal values to zero. */ - if (!isnormal(data.d[c])) - data.d[c] = copysign(0.0, op[0]->value.d[c]); - } else { - data.f[c] = ldexpf(op[0]->value.f[c], op[1]->value.i[c]); - /* Flush subnormal values to zero. */ - if (!isnormal(data.f[c])) - data.f[c] = copysignf(0.0f, op[0]->value.f[c]); - } - } - break; - - case ir_triop_fma: - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT || - op[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(op[1]->type->base_type == GLSL_TYPE_FLOAT || - op[1]->type->base_type == GLSL_TYPE_DOUBLE); - assert(op[2]->type->base_type == GLSL_TYPE_FLOAT || - op[2]->type->base_type == GLSL_TYPE_DOUBLE); - - for (unsigned c = 0; c < components; c++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[c] = op[0]->value.d[c] * op[1]->value.d[c] - + op[2]->value.d[c]; - else - data.f[c] = op[0]->value.f[c] * op[1]->value.f[c] - + op[2]->value.f[c]; - } - break; - - case ir_triop_lrp: { - assert(op[0]->type->base_type == GLSL_TYPE_FLOAT || - op[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(op[1]->type->base_type == GLSL_TYPE_FLOAT || - op[1]->type->base_type == GLSL_TYPE_DOUBLE); - assert(op[2]->type->base_type == GLSL_TYPE_FLOAT || - op[2]->type->base_type == GLSL_TYPE_DOUBLE); - - unsigned c2_inc = op[2]->type->is_scalar() ? 0 : 1; - for (unsigned c = 0, c2 = 0; c < components; c2 += c2_inc, c++) { - if (op[0]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[c] = op[0]->value.d[c] * (1.0 - op[2]->value.d[c2]) + - (op[1]->value.d[c] * op[2]->value.d[c2]); - else - data.f[c] = op[0]->value.f[c] * (1.0f - op[2]->value.f[c2]) + - (op[1]->value.f[c] * op[2]->value.f[c2]); - } - break; - } - - case ir_triop_csel: - for (unsigned c = 0; c < components; c++) { - if (op[1]->type->base_type == GLSL_TYPE_DOUBLE) - data.d[c] = op[0]->value.b[c] ? op[1]->value.d[c] - : op[2]->value.d[c]; - else - data.u[c] = op[0]->value.b[c] ? op[1]->value.u[c] - : op[2]->value.u[c]; - } - break; - - case ir_triop_vector_insert: { - const unsigned idx = op[2]->value.u[0]; - - memcpy(&data, &op[0]->value, sizeof(data)); - - switch (this->type->base_type) { - case GLSL_TYPE_INT: - data.i[idx] = op[1]->value.i[0]; - break; - case GLSL_TYPE_UINT: - data.u[idx] = op[1]->value.u[0]; - break; - case GLSL_TYPE_FLOAT: - data.f[idx] = op[1]->value.f[0]; - break; - case GLSL_TYPE_BOOL: - data.b[idx] = op[1]->value.b[0]; - break; - case GLSL_TYPE_DOUBLE: - data.d[idx] = op[1]->value.d[0]; - break; - default: - assert(!"Should not get here."); - break; - } - break; - } - - case ir_quadop_bitfield_insert: { - for (unsigned c = 0; c < components; c++) { - int offset = op[2]->value.i[c]; - int bits = op[3]->value.i[c]; - - if (bits == 0) - data.u[c] = op[0]->value.u[c]; - else if (offset < 0 || bits < 0) - data.u[c] = 0; /* Undefined, per spec. */ - else if (offset + bits > 32) - data.u[c] = 0; /* Undefined, per spec. */ - else { - unsigned insert_mask = ((1ull << bits) - 1) << offset; - - unsigned insert = op[1]->value.u[c]; - insert <<= offset; - insert &= insert_mask; - - unsigned base = op[0]->value.u[c]; - base &= ~insert_mask; - - data.u[c] = base | insert; - } - } - break; - } - - case ir_quadop_vector: - for (unsigned c = 0; c < this->type->vector_elements; c++) { - switch (this->type->base_type) { - case GLSL_TYPE_INT: - data.i[c] = op[c]->value.i[0]; - break; - case GLSL_TYPE_UINT: - data.u[c] = op[c]->value.u[0]; - break; - case GLSL_TYPE_FLOAT: - data.f[c] = op[c]->value.f[0]; - break; - case GLSL_TYPE_DOUBLE: - data.d[c] = op[c]->value.d[0]; - break; - default: - assert(0); - } - } - break; - - default: - /* FINISHME: Should handle all expression types. */ - return NULL; - } - - return new(ctx) ir_constant(this->type, &data); -} - - -ir_constant * -ir_texture::constant_expression_value(struct hash_table *) -{ - /* texture lookups aren't constant expressions */ - return NULL; -} - - -ir_constant * -ir_swizzle::constant_expression_value(struct hash_table *variable_context) -{ - ir_constant *v = this->val->constant_expression_value(variable_context); - - if (v != NULL) { - ir_constant_data data = { { 0 } }; - - const unsigned swiz_idx[4] = { - this->mask.x, this->mask.y, this->mask.z, this->mask.w - }; - - for (unsigned i = 0; i < this->mask.num_components; i++) { - switch (v->type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: data.u[i] = v->value.u[swiz_idx[i]]; break; - case GLSL_TYPE_FLOAT: data.f[i] = v->value.f[swiz_idx[i]]; break; - case GLSL_TYPE_BOOL: data.b[i] = v->value.b[swiz_idx[i]]; break; - case GLSL_TYPE_DOUBLE:data.d[i] = v->value.d[swiz_idx[i]]; break; - default: assert(!"Should not get here."); break; - } - } - - void *ctx = ralloc_parent(this); - return new(ctx) ir_constant(this->type, &data); - } - return NULL; -} - - -ir_constant * -ir_dereference_variable::constant_expression_value(struct hash_table *variable_context) -{ - assert(var); - - /* Give priority to the context hashtable, if it exists */ - if (variable_context) { - ir_constant *value = (ir_constant *)hash_table_find(variable_context, var); - if(value) - return value; - } - - /* The constant_value of a uniform variable is its initializer, - * not the lifetime constant value of the uniform. - */ - if (var->data.mode == ir_var_uniform) - return NULL; - - if (!var->constant_value) - return NULL; - - return var->constant_value->clone(ralloc_parent(var), NULL); -} - - -ir_constant * -ir_dereference_array::constant_expression_value(struct hash_table *variable_context) -{ - ir_constant *array = this->array->constant_expression_value(variable_context); - ir_constant *idx = this->array_index->constant_expression_value(variable_context); - - if ((array != NULL) && (idx != NULL)) { - void *ctx = ralloc_parent(this); - if (array->type->is_matrix()) { - /* Array access of a matrix results in a vector. - */ - const unsigned column = idx->value.u[0]; - - const glsl_type *const column_type = array->type->column_type(); - - /* Offset in the constant matrix to the first element of the column - * to be extracted. - */ - const unsigned mat_idx = column * column_type->vector_elements; - - ir_constant_data data = { { 0 } }; - - switch (column_type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - for (unsigned i = 0; i < column_type->vector_elements; i++) - data.u[i] = array->value.u[mat_idx + i]; - - break; - - case GLSL_TYPE_FLOAT: - for (unsigned i = 0; i < column_type->vector_elements; i++) - data.f[i] = array->value.f[mat_idx + i]; - - break; - - case GLSL_TYPE_DOUBLE: - for (unsigned i = 0; i < column_type->vector_elements; i++) - data.d[i] = array->value.d[mat_idx + i]; - - break; - - default: - assert(!"Should not get here."); - break; - } - - return new(ctx) ir_constant(column_type, &data); - } else if (array->type->is_vector()) { - const unsigned component = idx->value.u[0]; - - return new(ctx) ir_constant(array, component); - } else { - const unsigned index = idx->value.u[0]; - return array->get_array_element(index)->clone(ctx, NULL); - } - } - return NULL; -} - - -ir_constant * -ir_dereference_record::constant_expression_value(struct hash_table *) -{ - ir_constant *v = this->record->constant_expression_value(); - - return (v != NULL) ? v->get_record_field(this->field) : NULL; -} - - -ir_constant * -ir_assignment::constant_expression_value(struct hash_table *) -{ - /* FINISHME: Handle CEs involving assignment (return RHS) */ - return NULL; -} - - -ir_constant * -ir_constant::constant_expression_value(struct hash_table *) -{ - return this; -} - - -ir_constant * -ir_call::constant_expression_value(struct hash_table *variable_context) -{ - return this->callee->constant_expression_value(&this->actual_parameters, variable_context); -} - - -bool ir_function_signature::constant_expression_evaluate_expression_list(const struct exec_list &body, - struct hash_table *variable_context, - ir_constant **result) -{ - foreach_in_list(ir_instruction, inst, &body) { - switch(inst->ir_type) { - - /* (declare () type symbol) */ - case ir_type_variable: { - ir_variable *var = inst->as_variable(); - hash_table_insert(variable_context, ir_constant::zero(this, var->type), var); - break; - } - - /* (assign [condition] (write-mask) (ref) (value)) */ - case ir_type_assignment: { - ir_assignment *asg = inst->as_assignment(); - if (asg->condition) { - ir_constant *cond = asg->condition->constant_expression_value(variable_context); - if (!cond) - return false; - if (!cond->get_bool_component(0)) - break; - } - - ir_constant *store = NULL; - int offset = 0; - - if (!constant_referenced(asg->lhs, variable_context, store, offset)) - return false; - - ir_constant *value = asg->rhs->constant_expression_value(variable_context); - - if (!value) - return false; - - store->copy_masked_offset(value, offset, asg->write_mask); - break; - } - - /* (return (expression)) */ - case ir_type_return: - assert (result); - *result = inst->as_return()->value->constant_expression_value(variable_context); - return *result != NULL; - - /* (call name (ref) (params))*/ - case ir_type_call: { - ir_call *call = inst->as_call(); - - /* Just say no to void functions in constant expressions. We - * don't need them at that point. - */ - - if (!call->return_deref) - return false; - - ir_constant *store = NULL; - int offset = 0; - - if (!constant_referenced(call->return_deref, variable_context, - store, offset)) - return false; - - ir_constant *value = call->constant_expression_value(variable_context); - - if(!value) - return false; - - store->copy_offset(value, offset); - break; - } - - /* (if condition (then-instructions) (else-instructions)) */ - case ir_type_if: { - ir_if *iif = inst->as_if(); - - ir_constant *cond = iif->condition->constant_expression_value(variable_context); - if (!cond || !cond->type->is_boolean()) - return false; - - exec_list &branch = cond->get_bool_component(0) ? iif->then_instructions : iif->else_instructions; - - *result = NULL; - if (!constant_expression_evaluate_expression_list(branch, variable_context, result)) - return false; - - /* If there was a return in the branch chosen, drop out now. */ - if (*result) - return true; - - break; - } - - /* Every other expression type, we drop out. */ - default: - return false; - } - } - - /* Reaching the end of the block is not an error condition */ - if (result) - *result = NULL; - - return true; -} - -ir_constant * -ir_function_signature::constant_expression_value(exec_list *actual_parameters, struct hash_table *variable_context) -{ - const glsl_type *type = this->return_type; - if (type == glsl_type::void_type) - return NULL; - - /* From the GLSL 1.20 spec, page 23: - * "Function calls to user-defined functions (non-built-in functions) - * cannot be used to form constant expressions." - */ - if (!this->is_builtin()) - return NULL; - - /* - * Of the builtin functions, only the texture lookups and the noise - * ones must not be used in constant expressions. They all include - * specific opcodes so they don't need to be special-cased at this - * point. - */ - - /* Initialize the table of dereferencable names with the function - * parameters. Verify their const-ness on the way. - * - * We expect the correctness of the number of parameters to have - * been checked earlier. - */ - hash_table *deref_hash = hash_table_ctor(8, hash_table_pointer_hash, - hash_table_pointer_compare); - - /* If "origin" is non-NULL, then the function body is there. So we - * have to use the variable objects from the object with the body, - * but the parameter instanciation on the current object. - */ - const exec_node *parameter_info = origin ? origin->parameters.head : parameters.head; - - foreach_in_list(ir_rvalue, n, actual_parameters) { - ir_constant *constant = n->constant_expression_value(variable_context); - if (constant == NULL) { - hash_table_dtor(deref_hash); - return NULL; - } - - - ir_variable *var = (ir_variable *)parameter_info; - hash_table_insert(deref_hash, constant, var); - - parameter_info = parameter_info->next; - } - - ir_constant *result = NULL; - - /* Now run the builtin function until something non-constant - * happens or we get the result. - */ - if (constant_expression_evaluate_expression_list(origin ? origin->body : body, deref_hash, &result) && result) - result = result->clone(ralloc_parent(this), NULL); - - hash_table_dtor(deref_hash); - - return result; -} diff --git a/src/glsl/ir_equals.cpp b/src/glsl/ir_equals.cpp deleted file mode 100644 index b86f4ea16bb..00000000000 --- a/src/glsl/ir_equals.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir.h" - -/** - * Helper for checking equality when one instruction might be NULL, since you - * can't access a's vtable in that case. - */ -static bool -possibly_null_equals(const ir_instruction *a, const ir_instruction *b, - enum ir_node_type ignore) -{ - if (!a || !b) - return !a && !b; - - return a->equals(b, ignore); -} - -/** - * The base equality function: Return not equal for anything we don't know - * about. - */ -bool -ir_instruction::equals(const ir_instruction *, enum ir_node_type) const -{ - return false; -} - -bool -ir_constant::equals(const ir_instruction *ir, enum ir_node_type) const -{ - const ir_constant *other = ir->as_constant(); - if (!other) - return false; - - if (type != other->type) - return false; - - for (unsigned i = 0; i < type->components(); i++) { - if (type->base_type == GLSL_TYPE_DOUBLE) { - if (value.d[i] != other->value.d[i]) - return false; - } else { - if (value.u[i] != other->value.u[i]) - return false; - } - } - - return true; -} - -bool -ir_dereference_variable::equals(const ir_instruction *ir, - enum ir_node_type) const -{ - const ir_dereference_variable *other = ir->as_dereference_variable(); - if (!other) - return false; - - return var == other->var; -} - -bool -ir_dereference_array::equals(const ir_instruction *ir, - enum ir_node_type ignore) const -{ - const ir_dereference_array *other = ir->as_dereference_array(); - if (!other) - return false; - - if (type != other->type) - return false; - - if (!array->equals(other->array, ignore)) - return false; - - if (!array_index->equals(other->array_index, ignore)) - return false; - - return true; -} - -bool -ir_swizzle::equals(const ir_instruction *ir, - enum ir_node_type ignore) const -{ - const ir_swizzle *other = ir->as_swizzle(); - if (!other) - return false; - - if (type != other->type) - return false; - - if (ignore != ir_type_swizzle) { - if (mask.x != other->mask.x || - mask.y != other->mask.y || - mask.z != other->mask.z || - mask.w != other->mask.w) { - return false; - } - } - - return val->equals(other->val, ignore); -} - -bool -ir_texture::equals(const ir_instruction *ir, enum ir_node_type ignore) const -{ - const ir_texture *other = ir->as_texture(); - if (!other) - return false; - - if (type != other->type) - return false; - - if (op != other->op) - return false; - - if (!possibly_null_equals(coordinate, other->coordinate, ignore)) - return false; - - if (!possibly_null_equals(projector, other->projector, ignore)) - return false; - - if (!possibly_null_equals(shadow_comparitor, other->shadow_comparitor, ignore)) - return false; - - if (!possibly_null_equals(offset, other->offset, ignore)) - return false; - - if (!sampler->equals(other->sampler, ignore)) - return false; - - switch (op) { - case ir_tex: - case ir_lod: - case ir_query_levels: - case ir_texture_samples: - case ir_samples_identical: - break; - case ir_txb: - if (!lod_info.bias->equals(other->lod_info.bias, ignore)) - return false; - break; - case ir_txl: - case ir_txf: - case ir_txs: - if (!lod_info.lod->equals(other->lod_info.lod, ignore)) - return false; - break; - case ir_txd: - if (!lod_info.grad.dPdx->equals(other->lod_info.grad.dPdx, ignore) || - !lod_info.grad.dPdy->equals(other->lod_info.grad.dPdy, ignore)) - return false; - break; - case ir_txf_ms: - if (!lod_info.sample_index->equals(other->lod_info.sample_index, ignore)) - return false; - break; - case ir_tg4: - if (!lod_info.component->equals(other->lod_info.component, ignore)) - return false; - break; - default: - assert(!"Unrecognized texture op"); - } - - return true; -} - -bool -ir_expression::equals(const ir_instruction *ir, enum ir_node_type ignore) const -{ - const ir_expression *other = ir->as_expression(); - if (!other) - return false; - - if (type != other->type) - return false; - - if (operation != other->operation) - return false; - - for (unsigned i = 0; i < get_num_operands(); i++) { - if (!operands[i]->equals(other->operands[i], ignore)) - return false; - } - - return true; -} diff --git a/src/glsl/ir_expression_flattening.cpp b/src/glsl/ir_expression_flattening.cpp deleted file mode 100644 index c13ae811d78..00000000000 --- a/src/glsl/ir_expression_flattening.cpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_expression_flattening.cpp - * - * Takes the leaves of expression trees and makes them dereferences of - * assignments of the leaves to temporaries, according to a predicate. - * - * This is used for breaking down matrix operations, where it's easier to - * create a temporary and work on each of its vector components individually. - */ - -#include "ir.h" -#include "ir_rvalue_visitor.h" -#include "ir_expression_flattening.h" - -class ir_expression_flattening_visitor : public ir_rvalue_visitor { -public: - ir_expression_flattening_visitor(bool (*predicate)(ir_instruction *ir)) - { - this->predicate = predicate; - } - - virtual ~ir_expression_flattening_visitor() - { - /* empty */ - } - - void handle_rvalue(ir_rvalue **rvalue); - bool (*predicate)(ir_instruction *ir); -}; - -void -do_expression_flattening(exec_list *instructions, - bool (*predicate)(ir_instruction *ir)) -{ - ir_expression_flattening_visitor v(predicate); - - foreach_in_list(ir_instruction, ir, instructions) { - ir->accept(&v); - } -} - -void -ir_expression_flattening_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - ir_variable *var; - ir_assignment *assign; - ir_rvalue *ir = *rvalue; - - if (!ir || !this->predicate(ir)) - return; - - void *ctx = ralloc_parent(ir); - - var = new(ctx) ir_variable(ir->type, "flattening_tmp", ir_var_temporary); - base_ir->insert_before(var); - - assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(var), - ir, - NULL); - base_ir->insert_before(assign); - - *rvalue = new(ctx) ir_dereference_variable(var); -} diff --git a/src/glsl/ir_expression_flattening.h b/src/glsl/ir_expression_flattening.h deleted file mode 100644 index 2eda1590001..00000000000 --- a/src/glsl/ir_expression_flattening.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -/** - * \file ir_expression_flattening.h - * - * Takes the leaves of expression trees and makes them dereferences of - * assignments of the leaves to temporaries, according to a predicate. - * - * This is used for automatic function inlining, where we want to take - * an expression containing a call and move the call out to its own - * assignment so that we can inline it at the appropriate place in the - * instruction stream. - */ - -void do_expression_flattening(exec_list *instructions, - bool (*predicate)(ir_instruction *ir)); diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp deleted file mode 100644 index 0b4cb4bd30d..00000000000 --- a/src/glsl/ir_function.cpp +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "compiler/glsl_types.h" -#include "ir.h" -#include "glsl_parser_extras.h" -#include "main/errors.h" - -typedef enum { - PARAMETER_LIST_NO_MATCH, - PARAMETER_LIST_EXACT_MATCH, - PARAMETER_LIST_INEXACT_MATCH /*< Match requires implicit conversion. */ -} parameter_list_match_t; - -/** - * \brief Check if two parameter lists match. - * - * \param list_a Parameters of the function definition. - * \param list_b Actual parameters passed to the function. - * \see matching_signature() - */ -static parameter_list_match_t -parameter_lists_match(_mesa_glsl_parse_state *state, - const exec_list *list_a, const exec_list *list_b) -{ - const exec_node *node_a = list_a->head; - const exec_node *node_b = list_b->head; - - /* This is set to true if there is an inexact match requiring an implicit - * conversion. */ - bool inexact_match = false; - - for (/* empty */ - ; !node_a->is_tail_sentinel() - ; node_a = node_a->next, node_b = node_b->next) { - /* If all of the parameters from the other parameter list have been - * exhausted, the lists have different length and, by definition, - * do not match. - */ - if (node_b->is_tail_sentinel()) - return PARAMETER_LIST_NO_MATCH; - - - const ir_variable *const param = (ir_variable *) node_a; - const ir_rvalue *const actual = (ir_rvalue *) node_b; - - if (param->type == actual->type) - continue; - - /* Try to find an implicit conversion from actual to param. */ - inexact_match = true; - switch ((enum ir_variable_mode)(param->data.mode)) { - case ir_var_auto: - case ir_var_uniform: - case ir_var_shader_storage: - case ir_var_temporary: - /* These are all error conditions. It is invalid for a parameter to - * a function to be declared as auto (not in, out, or inout) or - * as uniform. - */ - assert(0); - return PARAMETER_LIST_NO_MATCH; - - case ir_var_const_in: - case ir_var_function_in: - if (!actual->type->can_implicitly_convert_to(param->type, state)) - return PARAMETER_LIST_NO_MATCH; - break; - - case ir_var_function_out: - if (!param->type->can_implicitly_convert_to(actual->type, state)) - return PARAMETER_LIST_NO_MATCH; - break; - - case ir_var_function_inout: - /* Since there are no bi-directional automatic conversions (e.g., - * there is int -> float but no float -> int), inout parameters must - * be exact matches. - */ - return PARAMETER_LIST_NO_MATCH; - - default: - assert(false); - return PARAMETER_LIST_NO_MATCH; - } - } - - /* If all of the parameters from the other parameter list have been - * exhausted, the lists have different length and, by definition, do not - * match. - */ - if (!node_b->is_tail_sentinel()) - return PARAMETER_LIST_NO_MATCH; - - if (inexact_match) - return PARAMETER_LIST_INEXACT_MATCH; - else - return PARAMETER_LIST_EXACT_MATCH; -} - - -/* Classes of parameter match, sorted (mostly) best matches first. - * See is_better_parameter_match() below for the exceptions. - * */ -typedef enum { - PARAMETER_EXACT_MATCH, - PARAMETER_FLOAT_TO_DOUBLE, - PARAMETER_INT_TO_FLOAT, - PARAMETER_INT_TO_DOUBLE, - PARAMETER_OTHER_CONVERSION, -} parameter_match_t; - - -static parameter_match_t -get_parameter_match_type(const ir_variable *param, - const ir_rvalue *actual) -{ - const glsl_type *from_type; - const glsl_type *to_type; - - if (param->data.mode == ir_var_function_out) { - from_type = param->type; - to_type = actual->type; - } else { - from_type = actual->type; - to_type = param->type; - } - - if (from_type == to_type) - return PARAMETER_EXACT_MATCH; - - if (to_type->base_type == GLSL_TYPE_DOUBLE) { - if (from_type->base_type == GLSL_TYPE_FLOAT) - return PARAMETER_FLOAT_TO_DOUBLE; - return PARAMETER_INT_TO_DOUBLE; - } - - if (to_type->base_type == GLSL_TYPE_FLOAT) - return PARAMETER_INT_TO_FLOAT; - - /* int -> uint and any other oddball conversions */ - return PARAMETER_OTHER_CONVERSION; -} - - -static bool -is_better_parameter_match(parameter_match_t a_match, - parameter_match_t b_match) -{ - /* From section 6.1 of the GLSL 4.00 spec (and the ARB_gpu_shader5 spec): - * - * 1. An exact match is better than a match involving any implicit - * conversion. - * - * 2. A match involving an implicit conversion from float to double - * is better than match involving any other implicit conversion. - * - * [XXX: Not in GLSL 4.0: Only in ARB_gpu_shader5: - * 3. A match involving an implicit conversion from either int or uint - * to float is better than a match involving an implicit conversion - * from either int or uint to double.] - * - * If none of the rules above apply to a particular pair of conversions, - * neither conversion is considered better than the other. - * - * -- - * - * Notably, the int->uint conversion is *not* considered to be better - * or worse than int/uint->float or int/uint->double. - */ - - if (a_match >= PARAMETER_INT_TO_FLOAT && b_match == PARAMETER_OTHER_CONVERSION) - return false; - - return a_match < b_match; -} - - -static bool -is_best_inexact_overload(const exec_list *actual_parameters, - ir_function_signature **matches, - int num_matches, - ir_function_signature *sig) -{ - /* From section 6.1 of the GLSL 4.00 spec (and the ARB_gpu_shader5 spec): - * - * "A function definition A is considered a better - * match than function definition B if: - * - * * for at least one function argument, the conversion for that argument - * in A is better than the corresponding conversion in B; and - * - * * there is no function argument for which the conversion in B is better - * than the corresponding conversion in A. - * - * If a single function definition is considered a better match than every - * other matching function definition, it will be used. Otherwise, a - * semantic error occurs and the shader will fail to compile." - */ - for (ir_function_signature **other = matches; - other < matches + num_matches; other++) { - if (*other == sig) - continue; - - const exec_node *node_a = sig->parameters.head; - const exec_node *node_b = (*other)->parameters.head; - const exec_node *node_p = actual_parameters->head; - - bool better_for_some_parameter = false; - - for (/* empty */ - ; !node_a->is_tail_sentinel() - ; node_a = node_a->next, - node_b = node_b->next, - node_p = node_p->next) { - parameter_match_t a_match = get_parameter_match_type( - (const ir_variable *)node_a, - (const ir_rvalue *)node_p); - parameter_match_t b_match = get_parameter_match_type( - (const ir_variable *)node_b, - (const ir_rvalue *)node_p); - - if (is_better_parameter_match(a_match, b_match)) - better_for_some_parameter = true; - - if (is_better_parameter_match(b_match, a_match)) - return false; /* B is better for this parameter */ - } - - if (!better_for_some_parameter) - return false; /* A must be better than B for some parameter */ - - } - - return true; -} - - -static ir_function_signature * -choose_best_inexact_overload(_mesa_glsl_parse_state *state, - const exec_list *actual_parameters, - ir_function_signature **matches, - int num_matches) -{ - if (num_matches == 0) - return NULL; - - if (num_matches == 1) - return *matches; - - /* Without GLSL 4.0 / ARB_gpu_shader5, there is no overload resolution - * among multiple inexact matches. Note that state may be NULL here if - * called from the linker; in that case we assume everything supported in - * any GLSL version is available. */ - if (!state || state->is_version(400, 0) || state->ARB_gpu_shader5_enable) { - for (ir_function_signature **sig = matches; sig < matches + num_matches; sig++) { - if (is_best_inexact_overload(actual_parameters, matches, num_matches, *sig)) - return *sig; - } - } - - return NULL; /* no best candidate */ -} - - -ir_function_signature * -ir_function::matching_signature(_mesa_glsl_parse_state *state, - const exec_list *actual_parameters, - bool allow_builtins) -{ - bool is_exact; - return matching_signature(state, actual_parameters, allow_builtins, - &is_exact); -} - -ir_function_signature * -ir_function::matching_signature(_mesa_glsl_parse_state *state, - const exec_list *actual_parameters, - bool allow_builtins, - bool *is_exact) -{ - ir_function_signature **inexact_matches = NULL; - ir_function_signature **inexact_matches_temp; - ir_function_signature *match = NULL; - int num_inexact_matches = 0; - - /* From page 42 (page 49 of the PDF) of the GLSL 1.20 spec: - * - * "If an exact match is found, the other signatures are ignored, and - * the exact match is used. Otherwise, if no exact match is found, then - * the implicit conversions in Section 4.1.10 "Implicit Conversions" will - * be applied to the calling arguments if this can make their types match - * a signature. In this case, it is a semantic error if there are - * multiple ways to apply these conversions to the actual arguments of a - * call such that the call can be made to match multiple signatures." - */ - foreach_in_list(ir_function_signature, sig, &this->signatures) { - /* Skip over any built-ins that aren't available in this shader. */ - if (sig->is_builtin() && (!allow_builtins || - !sig->is_builtin_available(state))) - continue; - - switch (parameter_lists_match(state, & sig->parameters, actual_parameters)) { - case PARAMETER_LIST_EXACT_MATCH: - *is_exact = true; - free(inexact_matches); - return sig; - case PARAMETER_LIST_INEXACT_MATCH: - inexact_matches_temp = (ir_function_signature **) - realloc(inexact_matches, - sizeof(*inexact_matches) * - (num_inexact_matches + 1)); - if (inexact_matches_temp == NULL) { - _mesa_error_no_memory(__func__); - free(inexact_matches); - return NULL; - } - inexact_matches = inexact_matches_temp; - inexact_matches[num_inexact_matches++] = sig; - continue; - case PARAMETER_LIST_NO_MATCH: - continue; - default: - assert(false); - return NULL; - } - } - - /* There is no exact match (we would have returned it by now). If there - * are multiple inexact matches, the call is ambiguous, which is an error. - * - * FINISHME: Report a decent error. Returning NULL will likely result in - * FINISHME: a "no matching signature" error; it should report that the - * FINISHME: call is ambiguous. But reporting errors from here is hard. - */ - *is_exact = false; - - match = choose_best_inexact_overload(state, actual_parameters, - inexact_matches, num_inexact_matches); - - free(inexact_matches); - return match; -} - - -static bool -parameter_lists_match_exact(const exec_list *list_a, const exec_list *list_b) -{ - const exec_node *node_a = list_a->head; - const exec_node *node_b = list_b->head; - - for (/* empty */ - ; !node_a->is_tail_sentinel() && !node_b->is_tail_sentinel() - ; node_a = node_a->next, node_b = node_b->next) { - ir_variable *a = (ir_variable *) node_a; - ir_variable *b = (ir_variable *) node_b; - - /* If the types of the parameters do not match, the parameters lists - * are different. - */ - if (a->type != b->type) - return false; - } - - /* Unless both lists are exhausted, they differ in length and, by - * definition, do not match. - */ - return (node_a->is_tail_sentinel() == node_b->is_tail_sentinel()); -} - -ir_function_signature * -ir_function::exact_matching_signature(_mesa_glsl_parse_state *state, - const exec_list *actual_parameters) -{ - foreach_in_list(ir_function_signature, sig, &this->signatures) { - /* Skip over any built-ins that aren't available in this shader. */ - if (sig->is_builtin() && !sig->is_builtin_available(state)) - continue; - - if (parameter_lists_match_exact(&sig->parameters, actual_parameters)) - return sig; - } - return NULL; -} diff --git a/src/glsl/ir_function_can_inline.cpp b/src/glsl/ir_function_can_inline.cpp deleted file mode 100644 index 3b1d15f80fc..00000000000 --- a/src/glsl/ir_function_can_inline.cpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_function_can_inline.cpp - * - * Determines if we can inline a function call using ir_function_inlining.cpp. - * - * The primary restriction is that we can't return from the function other - * than as the last instruction. In lower_jumps.cpp, we can lower return - * statements not at the end of the function to other control flow in order to - * deal with this restriction. - */ - -#include "ir.h" - -class ir_function_can_inline_visitor : public ir_hierarchical_visitor { -public: - ir_function_can_inline_visitor() - { - this->num_returns = 0; - } - - virtual ir_visitor_status visit_enter(ir_return *); - - int num_returns; -}; - -ir_visitor_status -ir_function_can_inline_visitor::visit_enter(ir_return *ir) -{ - (void) ir; - this->num_returns++; - return visit_continue; -} - -bool -can_inline(ir_call *call) -{ - ir_function_can_inline_visitor v; - const ir_function_signature *callee = call->callee; - if (!callee->is_defined) - return false; - - v.run((exec_list *) &callee->body); - - /* If the function is empty (no last instruction) or does not end with a - * return statement, we need to count the implicit return. - */ - ir_instruction *last = (ir_instruction *)callee->body.get_tail(); - if (last == NULL || !last->as_return()) - v.num_returns++; - - return v.num_returns == 1; -} diff --git a/src/glsl/ir_function_detect_recursion.cpp b/src/glsl/ir_function_detect_recursion.cpp deleted file mode 100644 index b2334d2e403..00000000000 --- a/src/glsl/ir_function_detect_recursion.cpp +++ /dev/null @@ -1,358 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_function_detect_recursion.cpp - * Determine whether a shader contains static recursion. - * - * Consider the (possibly disjoint) graph of function calls in a shader. If a - * program contains recursion, this graph will contain a cycle. If a function - * is part of a cycle, it will have a caller and it will have a callee (it - * calls another function). - * - * To detect recursion, the function call graph is constructed. The graph is - * repeatedly reduced by removing any function that either has no callees - * (leaf functions) or has no caller. Eventually the only functions that - * remain will be the functions in the cycles. - * - * The GLSL spec is a bit wishy-washy about recursion. - * - * From page 39 (page 45 of the PDF) of the GLSL 1.10 spec: - * - * "Behavior is undefined if recursion is used. Recursion means having any - * function appearing more than once at any one time in the run-time stack - * of function calls. That is, a function may not call itself either - * directly or indirectly. Compilers may give diagnostic messages when - * this is detectable at compile time, but not all such cases can be - * detected at compile time." - * - * From page 79 (page 85 of the PDF): - * - * "22) Should recursion be supported? - * - * DISCUSSION: Probably not necessary, but another example of limiting - * the language based on how it would directly map to hardware. One - * thought is that recursion would benefit ray tracing shaders. On the - * other hand, many recursion operations can also be implemented with the - * user managing the recursion through arrays. RenderMan doesn't support - * recursion. This could be added at a later date, if it proved to be - * necessary. - * - * RESOLVED on September 10, 2002: Implementations are not required to - * support recursion. - * - * CLOSED on September 10, 2002." - * - * From page 79 (page 85 of the PDF): - * - * "56) Is it an error for an implementation to support recursion if the - * specification says recursion is not supported? - * - * ADDED on September 10, 2002. - * - * DISCUSSION: This issues is related to Issue (22). If we say that - * recursion (or some other piece of functionality) is not supported, is - * it an error for an implementation to support it? Perhaps the - * specification should remain silent on these kind of things so that they - * could be gracefully added later as an extension or as part of the - * standard. - * - * RESOLUTION: Languages, in general, have programs that are not - * well-formed in ways a compiler cannot detect. Portability is only - * ensured for well-formed programs. Detecting recursion is an example of - * this. The language will say a well-formed program may not recurse, but - * compilers are not forced to detect that recursion may happen. - * - * CLOSED: November 29, 2002." - * - * In GLSL 1.10 the behavior of recursion is undefined. Compilers don't have - * to reject shaders (at compile-time or link-time) that contain recursion. - * Instead they could work, or crash, or kill a kitten. - * - * From page 44 (page 50 of the PDF) of the GLSL 1.20 spec: - * - * "Recursion is not allowed, not even statically. Static recursion is - * present if the static function call graph of the program contains - * cycles." - * - * This langauge clears things up a bit, but it still leaves a lot of - * questions unanswered. - * - * - Is the error generated at compile-time or link-time? - * - * - Is it an error to have a recursive function that is never statically - * called by main or any function called directly or indirectly by main? - * Technically speaking, such a function is not in the "static function - * call graph of the program" at all. - * - * \bug - * If a shader has multiple cycles, this algorithm may erroneously complain - * about functions that aren't in any cycle, but are in the part of the call - * tree that connects them. For example, if the call graph consists of a - * cycle between A and B, and a cycle between D and E, and B also calls C - * which calls D, then this algorithm will report C as a function which "has - * static recursion" even though it is not part of any cycle. - * - * A better algorithm for cycle detection that doesn't have this drawback can - * be found here: - * - * http://en.wikipedia.org/wiki/Tarjan%E2%80%99s_strongly_connected_components_algorithm - * - * \author Ian Romanick - */ -#include "main/core.h" -#include "ir.h" -#include "glsl_parser_extras.h" -#include "linker.h" -#include "program/hash_table.h" -#include "program.h" - -namespace { - -struct call_node : public exec_node { - class function *func; -}; - -class function { -public: - function(ir_function_signature *sig) - : sig(sig) - { - /* empty */ - } - - DECLARE_RALLOC_CXX_OPERATORS(function) - - ir_function_signature *sig; - - /** List of functions called by this function. */ - exec_list callees; - - /** List of functions that call this function. */ - exec_list callers; -}; - -class has_recursion_visitor : public ir_hierarchical_visitor { -public: - has_recursion_visitor() - : current(NULL) - { - progress = false; - this->mem_ctx = ralloc_context(NULL); - this->function_hash = hash_table_ctor(0, hash_table_pointer_hash, - hash_table_pointer_compare); - } - - ~has_recursion_visitor() - { - hash_table_dtor(this->function_hash); - ralloc_free(this->mem_ctx); - } - - function *get_function(ir_function_signature *sig) - { - function *f = (function *) hash_table_find(this->function_hash, sig); - if (f == NULL) { - f = new(mem_ctx) function(sig); - hash_table_insert(this->function_hash, f, sig); - } - - return f; - } - - virtual ir_visitor_status visit_enter(ir_function_signature *sig) - { - this->current = this->get_function(sig); - return visit_continue; - } - - virtual ir_visitor_status visit_leave(ir_function_signature *sig) - { - (void) sig; - this->current = NULL; - return visit_continue; - } - - virtual ir_visitor_status visit_enter(ir_call *call) - { - /* At global scope this->current will be NULL. Since there is no way to - * call global scope, it can never be part of a cycle. Don't bother - * adding calls from global scope to the graph. - */ - if (this->current == NULL) - return visit_continue; - - function *const target = this->get_function(call->callee); - - /* Create a link from the caller to the callee. - */ - call_node *node = new(mem_ctx) call_node; - node->func = target; - this->current->callees.push_tail(node); - - /* Create a link from the callee to the caller. - */ - node = new(mem_ctx) call_node; - node->func = this->current; - target->callers.push_tail(node); - return visit_continue; - } - - function *current; - struct hash_table *function_hash; - void *mem_ctx; - bool progress; -}; - -} /* anonymous namespace */ - -static void -destroy_links(exec_list *list, function *f) -{ - foreach_in_list_safe(call_node, node, list) { - /* If this is the right function, remove it. Note that the loop cannot - * terminate now. There can be multiple links to a function if it is - * either called multiple times or calls multiple times. - */ - if (node->func == f) - node->remove(); - } -} - - -/** - * Remove a function if it has either no in or no out links - */ -static void -remove_unlinked_functions(const void *key, void *data, void *closure) -{ - has_recursion_visitor *visitor = (has_recursion_visitor *) closure; - function *f = (function *) data; - - if (f->callers.is_empty() || f->callees.is_empty()) { - while (!f->callers.is_empty()) { - struct call_node *n = (struct call_node *) f->callers.pop_head(); - destroy_links(& n->func->callees, f); - } - - while (!f->callees.is_empty()) { - struct call_node *n = (struct call_node *) f->callees.pop_head(); - destroy_links(& n->func->callers, f); - } - - hash_table_remove(visitor->function_hash, key); - visitor->progress = true; - } -} - - -static void -emit_errors_unlinked(const void *key, void *data, void *closure) -{ - struct _mesa_glsl_parse_state *state = - (struct _mesa_glsl_parse_state *) closure; - function *f = (function *) data; - YYLTYPE loc; - - (void) key; - - char *proto = prototype_string(f->sig->return_type, - f->sig->function_name(), - &f->sig->parameters); - - memset(&loc, 0, sizeof(loc)); - _mesa_glsl_error(&loc, state, - "function `%s' has static recursion", - proto); - ralloc_free(proto); -} - - -static void -emit_errors_linked(const void *key, void *data, void *closure) -{ - struct gl_shader_program *prog = - (struct gl_shader_program *) closure; - function *f = (function *) data; - - (void) key; - - char *proto = prototype_string(f->sig->return_type, - f->sig->function_name(), - &f->sig->parameters); - - linker_error(prog, "function `%s' has static recursion.\n", proto); - ralloc_free(proto); -} - - -void -detect_recursion_unlinked(struct _mesa_glsl_parse_state *state, - exec_list *instructions) -{ - has_recursion_visitor v; - - /* Collect all of the information about which functions call which other - * functions. - */ - v.run(instructions); - - /* Remove from the set all of the functions that either have no caller or - * call no other functions. Repeat until no functions are removed. - */ - do { - v.progress = false; - hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); - } while (v.progress); - - - /* At this point any functions still in the hash must be part of a cycle. - */ - hash_table_call_foreach(v.function_hash, emit_errors_unlinked, state); -} - - -void -detect_recursion_linked(struct gl_shader_program *prog, - exec_list *instructions) -{ - has_recursion_visitor v; - - /* Collect all of the information about which functions call which other - * functions. - */ - v.run(instructions); - - /* Remove from the set all of the functions that either have no caller or - * call no other functions. Repeat until no functions are removed. - */ - do { - v.progress = false; - hash_table_call_foreach(v.function_hash, remove_unlinked_functions, & v); - } while (v.progress); - - - /* At this point any functions still in the hash must be part of a cycle. - */ - hash_table_call_foreach(v.function_hash, emit_errors_linked, prog); -} diff --git a/src/glsl/ir_function_inlining.h b/src/glsl/ir_function_inlining.h deleted file mode 100644 index 6db011bbcae..00000000000 --- a/src/glsl/ir_function_inlining.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_function_inlining.h - * - * Replaces calls to functions with the body of the function. - */ - -bool can_inline(ir_call *call); diff --git a/src/glsl/ir_hierarchical_visitor.cpp b/src/glsl/ir_hierarchical_visitor.cpp deleted file mode 100644 index 1d23a776643..00000000000 --- a/src/glsl/ir_hierarchical_visitor.cpp +++ /dev/null @@ -1,383 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir.h" -#include "ir_hierarchical_visitor.h" - -ir_hierarchical_visitor::ir_hierarchical_visitor() -{ - this->base_ir = NULL; - this->callback_enter = NULL; - this->callback_leave = NULL; - this->data_enter = NULL; - this->data_leave = NULL; - this->in_assignee = false; -} - -ir_visitor_status -ir_hierarchical_visitor::visit(ir_rvalue *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit(ir_variable *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit(ir_constant *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit(ir_loop_jump *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit(ir_dereference_variable *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit(ir_barrier *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_loop *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_loop *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_function_signature *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_function_signature *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_function *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_function *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_expression *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_expression *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_texture *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_texture *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_swizzle *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_swizzle *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_dereference_array *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_dereference_array *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_dereference_record *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_dereference_record *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_assignment *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_assignment *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_call *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_call *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_return *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_return *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_discard *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_discard *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_if *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_if *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_emit_vertex *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_emit_vertex *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_enter(ir_end_primitive *ir) -{ - if (this->callback_enter != NULL) - this->callback_enter(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_hierarchical_visitor::visit_leave(ir_end_primitive *ir) -{ - if (this->callback_leave != NULL) - this->callback_leave(ir, this->data_leave); - - return visit_continue; -} - -void -ir_hierarchical_visitor::run(exec_list *instructions) -{ - visit_list_elements(this, instructions); -} - - -void -visit_tree(ir_instruction *ir, - void (*callback_enter)(class ir_instruction *ir, void *data), - void *data_enter, - void (*callback_leave)(class ir_instruction *ir, void *data), - void *data_leave) -{ - ir_hierarchical_visitor v; - - v.callback_enter = callback_enter; - v.callback_leave = callback_leave; - v.data_enter = data_enter; - v.data_leave = data_leave; - - ir->accept(&v); -} diff --git a/src/glsl/ir_hierarchical_visitor.h b/src/glsl/ir_hierarchical_visitor.h deleted file mode 100644 index 28517b6e4f4..00000000000 --- a/src/glsl/ir_hierarchical_visitor.h +++ /dev/null @@ -1,209 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef IR_HIERARCHICAL_VISITOR_H -#define IR_HIERARCHICAL_VISITOR_H - -/** - * Enumeration values returned by visit methods to guide processing - */ -enum ir_visitor_status { - visit_continue, /**< Continue visiting as normal. */ - visit_continue_with_parent, /**< Don't visit siblings, continue w/parent. */ - visit_stop /**< Stop visiting immediately. */ -}; - - -#ifdef __cplusplus -/** - * Base class of hierarchical visitors of IR instruction trees - * - * Hierarchical visitors differ from traditional visitors in a couple of - * important ways. Rather than having a single \c visit method for each - * subclass in the composite, there are three kinds of visit methods. - * Leaf-node classes have a traditional \c visit method. Internal-node - * classes have a \c visit_enter method, which is invoked just before - * processing child nodes, and a \c visit_leave method which is invoked just - * after processing child nodes. - * - * In addition, each visit method and the \c accept methods in the composite - * have a return value which guides the navigation. Any of the visit methods - * can choose to continue visiting the tree as normal (by returning \c - * visit_continue), terminate visiting any further nodes immediately (by - * returning \c visit_stop), or stop visiting sibling nodes (by returning \c - * visit_continue_with_parent). - * - * These two changes combine to allow nagivation of children to be implemented - * in the composite's \c accept method. The \c accept method for a leaf-node - * class will simply call the \c visit method, as usual, and pass its return - * value on. The \c accept method for internal-node classes will call the \c - * visit_enter method, call the \c accept method of each child node, and, - * finally, call the \c visit_leave method. If any of these return a value - * other that \c visit_continue, the correct action must be taken. - * - * The final benefit is that the hierarchical visitor base class need not be - * abstract. Default implementations of every \c visit, \c visit_enter, and - * \c visit_leave method can be provided. By default each of these methods - * simply returns \c visit_continue. This allows a significant reduction in - * derived class code. - * - * For more information about hierarchical visitors, see: - * - * http://c2.com/cgi/wiki?HierarchicalVisitorPattern - * http://c2.com/cgi/wiki?HierarchicalVisitorDiscussion - */ - -class ir_hierarchical_visitor { -public: - ir_hierarchical_visitor(); - - /** - * \name Visit methods for leaf-node classes - */ - /*@{*/ - virtual ir_visitor_status visit(class ir_rvalue *); - virtual ir_visitor_status visit(class ir_variable *); - virtual ir_visitor_status visit(class ir_constant *); - virtual ir_visitor_status visit(class ir_loop_jump *); - virtual ir_visitor_status visit(class ir_barrier *); - - /** - * ir_dereference_variable isn't technically a leaf, but it is treated as a - * leaf here for a couple reasons. By not automatically visiting the one - * child ir_variable node from the ir_dereference_variable, ir_variable - * nodes can always be handled as variable declarations. Code that used - * non-hierarchical visitors had to set an "in a dereference" flag to - * determine how to handle an ir_variable. By forcing the visitor to - * handle the ir_variable within the ir_dereference_variable visitor, this - * kludge can be avoided. - * - * In addition, I can envision no use for having separate enter and leave - * methods. Anything that could be done in the enter and leave methods - * that couldn't just be done in the visit method. - */ - virtual ir_visitor_status visit(class ir_dereference_variable *); - /*@}*/ - - /** - * \name Visit methods for internal-node classes - */ - /*@{*/ - virtual ir_visitor_status visit_enter(class ir_loop *); - virtual ir_visitor_status visit_leave(class ir_loop *); - virtual ir_visitor_status visit_enter(class ir_function_signature *); - virtual ir_visitor_status visit_leave(class ir_function_signature *); - virtual ir_visitor_status visit_enter(class ir_function *); - virtual ir_visitor_status visit_leave(class ir_function *); - virtual ir_visitor_status visit_enter(class ir_expression *); - virtual ir_visitor_status visit_leave(class ir_expression *); - virtual ir_visitor_status visit_enter(class ir_texture *); - virtual ir_visitor_status visit_leave(class ir_texture *); - virtual ir_visitor_status visit_enter(class ir_swizzle *); - virtual ir_visitor_status visit_leave(class ir_swizzle *); - virtual ir_visitor_status visit_enter(class ir_dereference_array *); - virtual ir_visitor_status visit_leave(class ir_dereference_array *); - virtual ir_visitor_status visit_enter(class ir_dereference_record *); - virtual ir_visitor_status visit_leave(class ir_dereference_record *); - virtual ir_visitor_status visit_enter(class ir_assignment *); - virtual ir_visitor_status visit_leave(class ir_assignment *); - virtual ir_visitor_status visit_enter(class ir_call *); - virtual ir_visitor_status visit_leave(class ir_call *); - virtual ir_visitor_status visit_enter(class ir_return *); - virtual ir_visitor_status visit_leave(class ir_return *); - virtual ir_visitor_status visit_enter(class ir_discard *); - virtual ir_visitor_status visit_leave(class ir_discard *); - virtual ir_visitor_status visit_enter(class ir_if *); - virtual ir_visitor_status visit_leave(class ir_if *); - virtual ir_visitor_status visit_enter(class ir_emit_vertex *); - virtual ir_visitor_status visit_leave(class ir_emit_vertex *); - virtual ir_visitor_status visit_enter(class ir_end_primitive *); - virtual ir_visitor_status visit_leave(class ir_end_primitive *); - /*@}*/ - - - /** - * Utility function to process a linked list of instructions with a visitor - */ - void run(struct exec_list *instructions); - - /* Some visitors may need to insert new variable declarations and - * assignments for portions of a subtree, which means they need a - * pointer to the current instruction in the stream, not just their - * node in the tree rooted at that instruction. - * - * This is implemented by visit_list_elements -- if the visitor is - * not called by it, nothing good will happen. - */ - class ir_instruction *base_ir; - - /** - * Callback function that is invoked on entry to each node visited. - * - * \warning - * Visitor classes derived from \c ir_hierarchical_visitor \b may \b not - * invoke this function. This can be used, for example, to cause the - * callback to be invoked on every node type except one. - */ - void (*callback_enter)(class ir_instruction *ir, void *data); - - /** - * Callback function that is invoked on exit of each node visited. - * - * \warning - * Visitor classes derived from \c ir_hierarchical_visitor \b may \b not - * invoke this function. This can be used, for example, to cause the - * callback to be invoked on every node type except one. - */ - void (*callback_leave)(class ir_instruction *ir, void *data); - - /** - * Extra data parameter passed to the per-node callback_enter function - */ - void *data_enter; - - /** - * Extra data parameter passed to the per-node callback_leave function - */ - void *data_leave; - - /** - * Currently in the LHS of an assignment? - * - * This is set and cleared by the \c ir_assignment::accept method. - */ - bool in_assignee; -}; - -void visit_tree(ir_instruction *ir, - void (*callback_enter)(class ir_instruction *ir, void *data), - void *data_enter, - void (*callback_leave)(class ir_instruction *ir, void *data) = NULL, - void *data_leave = NULL); - -ir_visitor_status visit_list_elements(ir_hierarchical_visitor *v, exec_list *l, - bool statement_list = true); -#endif /* __cplusplus */ - -#endif /* IR_HIERARCHICAL_VISITOR_H */ diff --git a/src/glsl/ir_hv_accept.cpp b/src/glsl/ir_hv_accept.cpp deleted file mode 100644 index 213992af28c..00000000000 --- a/src/glsl/ir_hv_accept.cpp +++ /dev/null @@ -1,439 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir.h" - -/** - * \file ir_hv_accept.cpp - * Implementations of all hierarchical visitor accept methods for IR - * instructions. - */ - -/** - * Process a list of nodes using a hierarchical vistor. - * - * If statement_list is true (the default), this is a list of statements, so - * v->base_ir will be set to point to each statement just before iterating - * over it, and restored after iteration is complete. If statement_list is - * false, this is a list that appears inside a statement (e.g. a parameter - * list), so v->base_ir will be left alone. - * - * \warning - * This function will operate correctly if a node being processed is removed - * from the list. However, if nodes are added to the list after the node being - * processed, some of the added nodes may not be processed. - */ -ir_visitor_status -visit_list_elements(ir_hierarchical_visitor *v, exec_list *l, - bool statement_list) -{ - ir_instruction *prev_base_ir = v->base_ir; - - foreach_in_list_safe(ir_instruction, ir, l) { - if (statement_list) - v->base_ir = ir; - ir_visitor_status s = ir->accept(v); - - if (s != visit_continue) - return s; - } - if (statement_list) - v->base_ir = prev_base_ir; - - return visit_continue; -} - - -ir_visitor_status -ir_rvalue::accept(ir_hierarchical_visitor *v) -{ - return v->visit(this); -} - - -ir_visitor_status -ir_variable::accept(ir_hierarchical_visitor *v) -{ - return v->visit(this); -} - - -ir_visitor_status -ir_loop::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = visit_list_elements(v, &this->body_instructions); - if (s == visit_stop) - return s; - - return v->visit_leave(this); -} - - -ir_visitor_status -ir_loop_jump::accept(ir_hierarchical_visitor *v) -{ - return v->visit(this); -} - - -ir_visitor_status -ir_function_signature::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = visit_list_elements(v, &this->parameters); - if (s == visit_stop) - return s; - - s = visit_list_elements(v, &this->body); - return (s == visit_stop) ? s : v->visit_leave(this); -} - - -ir_visitor_status -ir_function::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = visit_list_elements(v, &this->signatures, false); - return (s == visit_stop) ? s : v->visit_leave(this); -} - - -ir_visitor_status -ir_expression::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - for (unsigned i = 0; i < this->get_num_operands(); i++) { - switch (this->operands[i]->accept(v)) { - case visit_continue: - break; - - case visit_continue_with_parent: - // I wish for Java's labeled break-statement here. - goto done; - - case visit_stop: - return s; - } - } - -done: - return v->visit_leave(this); -} - -ir_visitor_status -ir_texture::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = this->sampler->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - if (this->coordinate) { - s = this->coordinate->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - } - - if (this->projector) { - s = this->projector->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - } - - if (this->shadow_comparitor) { - s = this->shadow_comparitor->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - } - - if (this->offset) { - s = this->offset->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - } - - switch (this->op) { - case ir_tex: - case ir_lod: - case ir_query_levels: - case ir_texture_samples: - case ir_samples_identical: - break; - case ir_txb: - s = this->lod_info.bias->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - break; - case ir_txl: - case ir_txf: - case ir_txs: - s = this->lod_info.lod->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - break; - case ir_txf_ms: - s = this->lod_info.sample_index->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - break; - case ir_txd: - s = this->lod_info.grad.dPdx->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = this->lod_info.grad.dPdy->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - break; - case ir_tg4: - s = this->lod_info.component->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - break; - } - - return (s == visit_stop) ? s : v->visit_leave(this); -} - - -ir_visitor_status -ir_swizzle::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = this->val->accept(v); - return (s == visit_stop) ? s : v->visit_leave(this); -} - - -ir_visitor_status -ir_dereference_variable::accept(ir_hierarchical_visitor *v) -{ - return v->visit(this); -} - - -ir_visitor_status -ir_dereference_array::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - /* The array index is not the target of the assignment, so clear the - * 'in_assignee' flag. Restore it after returning from the array index. - */ - const bool was_in_assignee = v->in_assignee; - v->in_assignee = false; - s = this->array_index->accept(v); - v->in_assignee = was_in_assignee; - - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = this->array->accept(v); - return (s == visit_stop) ? s : v->visit_leave(this); -} - - -ir_visitor_status -ir_dereference_record::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = this->record->accept(v); - return (s == visit_stop) ? s : v->visit_leave(this); -} - - -ir_visitor_status -ir_assignment::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - v->in_assignee = true; - s = this->lhs->accept(v); - v->in_assignee = false; - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = this->rhs->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - if (this->condition) - s = this->condition->accept(v); - - return (s == visit_stop) ? s : v->visit_leave(this); -} - - -ir_visitor_status -ir_constant::accept(ir_hierarchical_visitor *v) -{ - return v->visit(this); -} - - -ir_visitor_status -ir_call::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - if (this->return_deref != NULL) { - v->in_assignee = true; - s = this->return_deref->accept(v); - v->in_assignee = false; - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - } - - s = visit_list_elements(v, &this->actual_parameters, false); - if (s == visit_stop) - return s; - - return v->visit_leave(this); -} - - -ir_visitor_status -ir_return::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - ir_rvalue *val = this->get_value(); - if (val) { - s = val->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - } - - return v->visit_leave(this); -} - - -ir_visitor_status -ir_discard::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - if (this->condition != NULL) { - s = this->condition->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - } - - return v->visit_leave(this); -} - - -ir_visitor_status -ir_if::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = this->condition->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - if (s != visit_continue_with_parent) { - s = visit_list_elements(v, &this->then_instructions); - if (s == visit_stop) - return s; - } - - if (s != visit_continue_with_parent) { - s = visit_list_elements(v, &this->else_instructions); - if (s == visit_stop) - return s; - } - - return v->visit_leave(this); -} - -ir_visitor_status -ir_emit_vertex::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = this->stream->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - return (s == visit_stop) ? s : v->visit_leave(this); -} - - -ir_visitor_status -ir_end_primitive::accept(ir_hierarchical_visitor *v) -{ - ir_visitor_status s = v->visit_enter(this); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - s = this->stream->accept(v); - if (s != visit_continue) - return (s == visit_continue_with_parent) ? visit_continue : s; - - return (s == visit_stop) ? s : v->visit_leave(this); -} - -ir_visitor_status -ir_barrier::accept(ir_hierarchical_visitor *v) -{ - return v->visit(this); -} diff --git a/src/glsl/ir_import_prototypes.cpp b/src/glsl/ir_import_prototypes.cpp deleted file mode 100644 index b0429fbc3af..00000000000 --- a/src/glsl/ir_import_prototypes.cpp +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_import_prototypes.cpp - * Import function prototypes from one IR tree into another. - * - * \author Ian Romanick - */ -#include "ir.h" -#include "glsl_symbol_table.h" - -namespace { - -/** - * Visitor used to import function prototypes - * - * Normally the \c clone method of either \c ir_function or - * \c ir_function_signature could be used. However, we don't want a complete - * clone of the \c ir_function_signature. We want everything \b except the - * body of the function. - */ -class import_prototype_visitor : public ir_hierarchical_visitor { -public: - /** - */ - import_prototype_visitor(exec_list *list, glsl_symbol_table *symbols, - void *mem_ctx) - { - this->mem_ctx = mem_ctx; - this->list = list; - this->symbols = symbols; - this->function = NULL; - } - - virtual ir_visitor_status visit_enter(ir_function *ir) - { - assert(this->function == NULL); - - this->function = this->symbols->get_function(ir->name); - if (!this->function) { - this->function = new(this->mem_ctx) ir_function(ir->name); - - list->push_tail(this->function); - - /* Add the new function to the symbol table. - */ - this->symbols->add_function(this->function); - } - return visit_continue; - } - - virtual ir_visitor_status visit_leave(ir_function *ir) - { - (void) ir; - assert(this->function != NULL); - - this->function = NULL; - return visit_continue; - } - - ir_visitor_status visit_enter(ir_function_signature *ir) - { - assert(this->function != NULL); - - ir_function_signature *copy = ir->clone_prototype(mem_ctx, NULL); - - this->function->add_signature(copy); - - /* Do not process child nodes of the ir_function_signature. There can - * never be any nodes inside the ir_function_signature that we care - * about. Instead continue with the next sibling. - */ - return visit_continue_with_parent; - } - -private: - exec_list *list; - ir_function *function; - glsl_symbol_table *symbols; - void *mem_ctx; -}; - -} /* anonymous namespace */ - -/** - * Import function prototypes from one IR tree into another - * - * \param source Source instruction stream containing functions whose - * prototypes are to be imported - * \param dest Destination instruction stream where new \c ir_function and - * \c ir_function_signature nodes will be stored - * \param symbols Symbol table where new functions will be stored - * \param mem_ctx ralloc memory context used for new allocations - */ -void -import_prototypes(const exec_list *source, exec_list *dest, - glsl_symbol_table *symbols, void *mem_ctx) -{ - import_prototype_visitor v(dest, symbols, mem_ctx); - - /* Making source be const is just extra documentation. - */ - v.run(const_cast(source)); -} diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h deleted file mode 100644 index be86f547f77..00000000000 --- a/src/glsl/ir_optimization.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - - -/** - * \file ir_optimization.h - * - * Prototypes for optimization passes to be called by the compiler and drivers. - */ - -/* Operations for lower_instructions() */ -#define SUB_TO_ADD_NEG 0x01 -#define DIV_TO_MUL_RCP 0x02 -#define EXP_TO_EXP2 0x04 -#define POW_TO_EXP2 0x08 -#define LOG_TO_LOG2 0x10 -#define MOD_TO_FLOOR 0x20 -#define INT_DIV_TO_MUL_RCP 0x40 -#define LDEXP_TO_ARITH 0x80 -#define CARRY_TO_ARITH 0x100 -#define BORROW_TO_ARITH 0x200 -#define SAT_TO_CLAMP 0x400 -#define DOPS_TO_DFRAC 0x800 -#define DFREXP_DLDEXP_TO_ARITH 0x1000 - -/** - * \see class lower_packing_builtins_visitor - */ -enum lower_packing_builtins_op { - LOWER_PACK_UNPACK_NONE = 0x0000, - - LOWER_PACK_SNORM_2x16 = 0x0001, - LOWER_UNPACK_SNORM_2x16 = 0x0002, - - LOWER_PACK_UNORM_2x16 = 0x0004, - LOWER_UNPACK_UNORM_2x16 = 0x0008, - - LOWER_PACK_HALF_2x16 = 0x0010, - LOWER_UNPACK_HALF_2x16 = 0x0020, - - LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040, - LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, - - LOWER_PACK_SNORM_4x8 = 0x0100, - LOWER_UNPACK_SNORM_4x8 = 0x0200, - - LOWER_PACK_UNORM_4x8 = 0x0400, - LOWER_UNPACK_UNORM_4x8 = 0x0800, - - LOWER_PACK_USE_BFI = 0x1000, - LOWER_PACK_USE_BFE = 0x2000, -}; - -bool do_common_optimization(exec_list *ir, bool linked, - bool uniform_locations_assigned, - const struct gl_shader_compiler_options *options, - bool native_integers); - -bool do_rebalance_tree(exec_list *instructions); -bool do_algebraic(exec_list *instructions, bool native_integers, - const struct gl_shader_compiler_options *options); -bool opt_conditional_discard(exec_list *instructions); -bool do_constant_folding(exec_list *instructions); -bool do_constant_variable(exec_list *instructions); -bool do_constant_variable_unlinked(exec_list *instructions); -bool do_copy_propagation(exec_list *instructions); -bool do_copy_propagation_elements(exec_list *instructions); -bool do_constant_propagation(exec_list *instructions); -void do_dead_builtin_varyings(struct gl_context *ctx, - gl_shader *producer, gl_shader *consumer, - unsigned num_tfeedback_decls, - class tfeedback_decl *tfeedback_decls); -bool do_dead_code(exec_list *instructions, bool uniform_locations_assigned); -bool do_dead_code_local(exec_list *instructions); -bool do_dead_code_unlinked(exec_list *instructions); -bool do_dead_functions(exec_list *instructions); -bool opt_flip_matrices(exec_list *instructions); -bool do_function_inlining(exec_list *instructions); -bool do_lower_jumps(exec_list *instructions, bool pull_out_jumps = true, bool lower_sub_return = true, bool lower_main_return = false, bool lower_continue = false, bool lower_break = false); -bool do_lower_texture_projection(exec_list *instructions); -bool do_if_simplification(exec_list *instructions); -bool opt_flatten_nested_if_blocks(exec_list *instructions); -bool do_discard_simplification(exec_list *instructions); -bool lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth = 0); -bool do_mat_op_to_vec(exec_list *instructions); -bool do_minmax_prune(exec_list *instructions); -bool do_noop_swizzle(exec_list *instructions); -bool do_structure_splitting(exec_list *instructions); -bool do_swizzle_swizzle(exec_list *instructions); -bool do_vectorize(exec_list *instructions); -bool do_tree_grafting(exec_list *instructions); -bool do_vec_index_to_cond_assign(exec_list *instructions); -bool do_vec_index_to_swizzle(exec_list *instructions); -bool lower_discard(exec_list *instructions); -void lower_discard_flow(exec_list *instructions); -bool lower_instructions(exec_list *instructions, unsigned what_to_lower); -bool lower_noise(exec_list *instructions); -bool lower_variable_index_to_cond_assign(gl_shader_stage stage, - exec_list *instructions, bool lower_input, bool lower_output, - bool lower_temp, bool lower_uniform); -bool lower_quadop_vector(exec_list *instructions, bool dont_lower_swz); -bool lower_const_arrays_to_uniforms(exec_list *instructions); -bool lower_clip_distance(gl_shader *shader); -void lower_output_reads(unsigned stage, exec_list *instructions); -bool lower_packing_builtins(exec_list *instructions, int op_mask); -void lower_shared_reference(struct gl_shader *shader, unsigned *shared_size); -void lower_ubo_reference(struct gl_shader *shader); -void lower_packed_varyings(void *mem_ctx, - unsigned locations_used, ir_variable_mode mode, - unsigned gs_input_vertices, gl_shader *shader); -bool lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index); -bool lower_vector_derefs(gl_shader *shader); -void lower_named_interface_blocks(void *mem_ctx, gl_shader *shader); -bool optimize_redundant_jumps(exec_list *instructions); -bool optimize_split_arrays(exec_list *instructions, bool linked); -bool lower_offset_arrays(exec_list *instructions); -void optimize_dead_builtin_variables(exec_list *instructions, - enum ir_variable_mode other); -bool lower_tess_level(gl_shader *shader); - -bool lower_vertex_id(gl_shader *shader); - -bool lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state); - -ir_rvalue * -compare_index_block(exec_list *instructions, ir_variable *index, - unsigned base, unsigned components, void *mem_ctx); diff --git a/src/glsl/ir_print_visitor.cpp b/src/glsl/ir_print_visitor.cpp deleted file mode 100644 index 960b23fe0ed..00000000000 --- a/src/glsl/ir_print_visitor.cpp +++ /dev/null @@ -1,604 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir_print_visitor.h" -#include "compiler/glsl_types.h" -#include "glsl_parser_extras.h" -#include "main/macros.h" -#include "util/hash_table.h" - -static void print_type(FILE *f, const glsl_type *t); - -void -ir_instruction::print(void) const -{ - this->fprint(stdout); -} - -void -ir_instruction::fprint(FILE *f) const -{ - ir_instruction *deconsted = const_cast(this); - - ir_print_visitor v(f); - deconsted->accept(&v); -} - -extern "C" { -void -_mesa_print_ir(FILE *f, exec_list *instructions, - struct _mesa_glsl_parse_state *state) -{ - if (state) { - for (unsigned i = 0; i < state->num_user_structures; i++) { - const glsl_type *const s = state->user_structures[i]; - - fprintf(f, "(structure (%s) (%s@%p) (%u) (\n", - s->name, s->name, (void *) s, s->length); - - for (unsigned j = 0; j < s->length; j++) { - fprintf(f, "\t(("); - print_type(f, s->fields.structure[j].type); - fprintf(f, ")(%s))\n", s->fields.structure[j].name); - } - - fprintf(f, ")\n"); - } - } - - fprintf(f, "(\n"); - foreach_in_list(ir_instruction, ir, instructions) { - ir->fprint(f); - if (ir->ir_type != ir_type_function) - fprintf(f, "\n"); - } - fprintf(f, ")\n"); -} - -void -fprint_ir(FILE *f, const void *instruction) -{ - const ir_instruction *ir = (const ir_instruction *)instruction; - ir->fprint(f); -} - -} /* extern "C" */ - -ir_print_visitor::ir_print_visitor(FILE *f) - : f(f) -{ - indentation = 0; - printable_names = - _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); - symbols = _mesa_symbol_table_ctor(); - mem_ctx = ralloc_context(NULL); -} - -ir_print_visitor::~ir_print_visitor() -{ - _mesa_hash_table_destroy(printable_names, NULL); - _mesa_symbol_table_dtor(symbols); - ralloc_free(mem_ctx); -} - -void ir_print_visitor::indent(void) -{ - for (int i = 0; i < indentation; i++) - fprintf(f, " "); -} - -const char * -ir_print_visitor::unique_name(ir_variable *var) -{ - /* var->name can be NULL in function prototypes when a type is given for a - * parameter but no name is given. In that case, just return an empty - * string. Don't worry about tracking the generated name in the printable - * names hash because this is the only scope where it can ever appear. - */ - if (var->name == NULL) { - static unsigned arg = 1; - return ralloc_asprintf(this->mem_ctx, "parameter@%u", arg++); - } - - /* Do we already have a name for this variable? */ - struct hash_entry * entry = - _mesa_hash_table_search(this->printable_names, var); - - if (entry != NULL) { - return (const char *) entry->data; - } - - /* If there's no conflict, just use the original name */ - const char* name = NULL; - if (_mesa_symbol_table_find_symbol(this->symbols, -1, var->name) == NULL) { - name = var->name; - } else { - static unsigned i = 1; - name = ralloc_asprintf(this->mem_ctx, "%s@%u", var->name, ++i); - } - _mesa_hash_table_insert(this->printable_names, var, (void *) name); - _mesa_symbol_table_add_symbol(this->symbols, -1, name, var); - return name; -} - -static void -print_type(FILE *f, const glsl_type *t) -{ - if (t->base_type == GLSL_TYPE_ARRAY) { - fprintf(f, "(array "); - print_type(f, t->fields.array); - fprintf(f, " %u)", t->length); - } else if ((t->base_type == GLSL_TYPE_STRUCT) - && !is_gl_identifier(t->name)) { - fprintf(f, "%s@%p", t->name, (void *) t); - } else { - fprintf(f, "%s", t->name); - } -} - -void ir_print_visitor::visit(ir_rvalue *) -{ - fprintf(f, "error"); -} - -void ir_print_visitor::visit(ir_variable *ir) -{ - fprintf(f, "(declare "); - - char loc[256] = {0}; - if (ir->data.location != -1) - snprintf(loc, sizeof(loc), "location=%i ", ir->data.location); - - const char *const cent = (ir->data.centroid) ? "centroid " : ""; - const char *const samp = (ir->data.sample) ? "sample " : ""; - const char *const patc = (ir->data.patch) ? "patch " : ""; - const char *const inv = (ir->data.invariant) ? "invariant " : ""; - const char *const mode[] = { "", "uniform ", "shader_storage ", - "shader_shared ", "shader_in ", "shader_out ", - "in ", "out ", "inout ", - "const_in ", "sys ", "temporary " }; - STATIC_ASSERT(ARRAY_SIZE(mode) == ir_var_mode_count); - const char *const stream [] = {"", "stream1 ", "stream2 ", "stream3 "}; - const char *const interp[] = { "", "smooth", "flat", "noperspective" }; - STATIC_ASSERT(ARRAY_SIZE(interp) == INTERP_QUALIFIER_COUNT); - - fprintf(f, "(%s%s%s%s%s%s%s%s) ", - loc, cent, samp, patc, inv, mode[ir->data.mode], - stream[ir->data.stream], - interp[ir->data.interpolation]); - - print_type(f, ir->type); - fprintf(f, " %s)", unique_name(ir)); -} - - -void ir_print_visitor::visit(ir_function_signature *ir) -{ - _mesa_symbol_table_push_scope(symbols); - fprintf(f, "(signature "); - indentation++; - - print_type(f, ir->return_type); - fprintf(f, "\n"); - indent(); - - fprintf(f, "(parameters\n"); - indentation++; - - foreach_in_list(ir_variable, inst, &ir->parameters) { - indent(); - inst->accept(this); - fprintf(f, "\n"); - } - indentation--; - - indent(); - fprintf(f, ")\n"); - - indent(); - - fprintf(f, "(\n"); - indentation++; - - foreach_in_list(ir_instruction, inst, &ir->body) { - indent(); - inst->accept(this); - fprintf(f, "\n"); - } - indentation--; - indent(); - fprintf(f, "))\n"); - indentation--; - _mesa_symbol_table_pop_scope(symbols); -} - - -void ir_print_visitor::visit(ir_function *ir) -{ - fprintf(f, "(%s function %s\n", ir->is_subroutine ? "subroutine" : "", ir->name); - indentation++; - foreach_in_list(ir_function_signature, sig, &ir->signatures) { - indent(); - sig->accept(this); - fprintf(f, "\n"); - } - indentation--; - indent(); - fprintf(f, ")\n\n"); -} - - -void ir_print_visitor::visit(ir_expression *ir) -{ - fprintf(f, "(expression "); - - print_type(f, ir->type); - - fprintf(f, " %s ", ir->operator_string()); - - for (unsigned i = 0; i < ir->get_num_operands(); i++) { - ir->operands[i]->accept(this); - } - - fprintf(f, ") "); -} - - -void ir_print_visitor::visit(ir_texture *ir) -{ - fprintf(f, "(%s ", ir->opcode_string()); - - if (ir->op == ir_samples_identical) { - ir->sampler->accept(this); - fprintf(f, " "); - ir->coordinate->accept(this); - fprintf(f, ")"); - return; - } - - print_type(f, ir->type); - fprintf(f, " "); - - ir->sampler->accept(this); - fprintf(f, " "); - - if (ir->op != ir_txs && ir->op != ir_query_levels && - ir->op != ir_texture_samples) { - ir->coordinate->accept(this); - - fprintf(f, " "); - - if (ir->offset != NULL) { - ir->offset->accept(this); - } else { - fprintf(f, "0"); - } - - fprintf(f, " "); - } - - if (ir->op != ir_txf && ir->op != ir_txf_ms && - ir->op != ir_txs && ir->op != ir_tg4 && - ir->op != ir_query_levels && ir->op != ir_texture_samples) { - if (ir->projector) - ir->projector->accept(this); - else - fprintf(f, "1"); - - if (ir->shadow_comparitor) { - fprintf(f, " "); - ir->shadow_comparitor->accept(this); - } else { - fprintf(f, " ()"); - } - } - - fprintf(f, " "); - switch (ir->op) - { - case ir_tex: - case ir_lod: - case ir_query_levels: - case ir_texture_samples: - break; - case ir_txb: - ir->lod_info.bias->accept(this); - break; - case ir_txl: - case ir_txf: - case ir_txs: - ir->lod_info.lod->accept(this); - break; - case ir_txf_ms: - ir->lod_info.sample_index->accept(this); - break; - case ir_txd: - fprintf(f, "("); - ir->lod_info.grad.dPdx->accept(this); - fprintf(f, " "); - ir->lod_info.grad.dPdy->accept(this); - fprintf(f, ")"); - break; - case ir_tg4: - ir->lod_info.component->accept(this); - break; - case ir_samples_identical: - unreachable(!"ir_samples_identical was already handled"); - }; - fprintf(f, ")"); -} - - -void ir_print_visitor::visit(ir_swizzle *ir) -{ - const unsigned swiz[4] = { - ir->mask.x, - ir->mask.y, - ir->mask.z, - ir->mask.w, - }; - - fprintf(f, "(swiz "); - for (unsigned i = 0; i < ir->mask.num_components; i++) { - fprintf(f, "%c", "xyzw"[swiz[i]]); - } - fprintf(f, " "); - ir->val->accept(this); - fprintf(f, ")"); -} - - -void ir_print_visitor::visit(ir_dereference_variable *ir) -{ - ir_variable *var = ir->variable_referenced(); - fprintf(f, "(var_ref %s) ", unique_name(var)); -} - - -void ir_print_visitor::visit(ir_dereference_array *ir) -{ - fprintf(f, "(array_ref "); - ir->array->accept(this); - ir->array_index->accept(this); - fprintf(f, ") "); -} - - -void ir_print_visitor::visit(ir_dereference_record *ir) -{ - fprintf(f, "(record_ref "); - ir->record->accept(this); - fprintf(f, " %s) ", ir->field); -} - - -void ir_print_visitor::visit(ir_assignment *ir) -{ - fprintf(f, "(assign "); - - if (ir->condition) - ir->condition->accept(this); - - char mask[5]; - unsigned j = 0; - - for (unsigned i = 0; i < 4; i++) { - if ((ir->write_mask & (1 << i)) != 0) { - mask[j] = "xyzw"[i]; - j++; - } - } - mask[j] = '\0'; - - fprintf(f, " (%s) ", mask); - - ir->lhs->accept(this); - - fprintf(f, " "); - - ir->rhs->accept(this); - fprintf(f, ") "); -} - - -void ir_print_visitor::visit(ir_constant *ir) -{ - fprintf(f, "(constant "); - print_type(f, ir->type); - fprintf(f, " ("); - - if (ir->type->is_array()) { - for (unsigned i = 0; i < ir->type->length; i++) - ir->get_array_element(i)->accept(this); - } else if (ir->type->is_record()) { - ir_constant *value = (ir_constant *) ir->components.get_head(); - for (unsigned i = 0; i < ir->type->length; i++) { - fprintf(f, "(%s ", ir->type->fields.structure[i].name); - value->accept(this); - fprintf(f, ")"); - - value = (ir_constant *) value->next; - } - } else { - for (unsigned i = 0; i < ir->type->components(); i++) { - if (i != 0) - fprintf(f, " "); - switch (ir->type->base_type) { - case GLSL_TYPE_UINT: fprintf(f, "%u", ir->value.u[i]); break; - case GLSL_TYPE_INT: fprintf(f, "%d", ir->value.i[i]); break; - case GLSL_TYPE_FLOAT: - if (ir->value.f[i] == 0.0f) - /* 0.0 == -0.0, so print with %f to get the proper sign. */ - fprintf(f, "%f", ir->value.f[i]); - else if (fabs(ir->value.f[i]) < 0.000001f) - fprintf(f, "%a", ir->value.f[i]); - else if (fabs(ir->value.f[i]) > 1000000.0f) - fprintf(f, "%e", ir->value.f[i]); - else - fprintf(f, "%f", ir->value.f[i]); - break; - case GLSL_TYPE_BOOL: fprintf(f, "%d", ir->value.b[i]); break; - case GLSL_TYPE_DOUBLE: - if (ir->value.d[i] == 0.0) - /* 0.0 == -0.0, so print with %f to get the proper sign. */ - fprintf(f, "%.1f", ir->value.d[i]); - else if (fabs(ir->value.d[i]) < 0.000001) - fprintf(f, "%a", ir->value.d[i]); - else if (fabs(ir->value.d[i]) > 1000000.0) - fprintf(f, "%e", ir->value.d[i]); - else - fprintf(f, "%f", ir->value.d[i]); - break; - default: assert(0); - } - } - } - fprintf(f, ")) "); -} - - -void -ir_print_visitor::visit(ir_call *ir) -{ - fprintf(f, "(call %s ", ir->callee_name()); - if (ir->return_deref) - ir->return_deref->accept(this); - fprintf(f, " ("); - foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { - param->accept(this); - } - fprintf(f, "))\n"); -} - - -void -ir_print_visitor::visit(ir_return *ir) -{ - fprintf(f, "(return"); - - ir_rvalue *const value = ir->get_value(); - if (value) { - fprintf(f, " "); - value->accept(this); - } - - fprintf(f, ")"); -} - - -void -ir_print_visitor::visit(ir_discard *ir) -{ - fprintf(f, "(discard "); - - if (ir->condition != NULL) { - fprintf(f, " "); - ir->condition->accept(this); - } - - fprintf(f, ")"); -} - - -void -ir_print_visitor::visit(ir_if *ir) -{ - fprintf(f, "(if "); - ir->condition->accept(this); - - fprintf(f, "(\n"); - indentation++; - - foreach_in_list(ir_instruction, inst, &ir->then_instructions) { - indent(); - inst->accept(this); - fprintf(f, "\n"); - } - - indentation--; - indent(); - fprintf(f, ")\n"); - - indent(); - if (!ir->else_instructions.is_empty()) { - fprintf(f, "(\n"); - indentation++; - - foreach_in_list(ir_instruction, inst, &ir->else_instructions) { - indent(); - inst->accept(this); - fprintf(f, "\n"); - } - indentation--; - indent(); - fprintf(f, "))\n"); - } else { - fprintf(f, "())\n"); - } -} - - -void -ir_print_visitor::visit(ir_loop *ir) -{ - fprintf(f, "(loop (\n"); - indentation++; - - foreach_in_list(ir_instruction, inst, &ir->body_instructions) { - indent(); - inst->accept(this); - fprintf(f, "\n"); - } - indentation--; - indent(); - fprintf(f, "))\n"); -} - - -void -ir_print_visitor::visit(ir_loop_jump *ir) -{ - fprintf(f, "%s", ir->is_break() ? "break" : "continue"); -} - -void -ir_print_visitor::visit(ir_emit_vertex *ir) -{ - fprintf(f, "(emit-vertex "); - ir->stream->accept(this); - fprintf(f, ")\n"); -} - -void -ir_print_visitor::visit(ir_end_primitive *ir) -{ - fprintf(f, "(end-primitive "); - ir->stream->accept(this); - fprintf(f, ")\n"); -} - -void -ir_print_visitor::visit(ir_barrier *) -{ - fprintf(f, "(barrier)\n"); -} diff --git a/src/glsl/ir_print_visitor.h b/src/glsl/ir_print_visitor.h deleted file mode 100644 index 965e63ade8b..00000000000 --- a/src/glsl/ir_print_visitor.h +++ /dev/null @@ -1,96 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef IR_PRINT_VISITOR_H -#define IR_PRINT_VISITOR_H - -#include "ir.h" -#include "ir_visitor.h" - -extern "C" { -#include "program/symbol_table.h" -} - -/** - * Abstract base class of visitors of IR instruction trees - */ -class ir_print_visitor : public ir_visitor { -public: - ir_print_visitor(FILE *f); - virtual ~ir_print_visitor(); - - void indent(void); - - /** - * \name Visit methods - * - * As typical for the visitor pattern, there must be one \c visit method for - * each concrete subclass of \c ir_instruction. Virtual base classes within - * the hierarchy should not have \c visit methods. - */ - /*@{*/ - virtual void visit(ir_rvalue *); - virtual void visit(ir_variable *); - virtual void visit(ir_function_signature *); - virtual void visit(ir_function *); - virtual void visit(ir_expression *); - virtual void visit(ir_texture *); - virtual void visit(ir_swizzle *); - virtual void visit(ir_dereference_variable *); - virtual void visit(ir_dereference_array *); - virtual void visit(ir_dereference_record *); - virtual void visit(ir_assignment *); - virtual void visit(ir_constant *); - virtual void visit(ir_call *); - virtual void visit(ir_return *); - virtual void visit(ir_discard *); - virtual void visit(ir_if *); - virtual void visit(ir_loop *); - virtual void visit(ir_loop_jump *); - virtual void visit(ir_emit_vertex *); - virtual void visit(ir_end_primitive *); - virtual void visit(ir_barrier *); - /*@}*/ - -private: - /** - * Fetch/generate a unique name for ir_variable. - * - * GLSL IR permits multiple ir_variables to share the same name. This works - * fine until we try to print it, when we really need a unique one. - */ - const char *unique_name(ir_variable *var); - - /** A mapping from ir_variable * -> unique printable names. */ - hash_table *printable_names; - _mesa_symbol_table *symbols; - - void *mem_ctx; - FILE *f; - - int indentation; -}; - -#endif /* IR_PRINT_VISITOR_H */ diff --git a/src/glsl/ir_reader.cpp b/src/glsl/ir_reader.cpp deleted file mode 100644 index 15315aac522..00000000000 --- a/src/glsl/ir_reader.cpp +++ /dev/null @@ -1,1167 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir_reader.h" -#include "glsl_parser_extras.h" -#include "compiler/glsl_types.h" -#include "s_expression.h" - -static const bool debug = false; - -namespace { - -class ir_reader { -public: - ir_reader(_mesa_glsl_parse_state *); - - void read(exec_list *instructions, const char *src, bool scan_for_protos); - -private: - void *mem_ctx; - _mesa_glsl_parse_state *state; - - void ir_read_error(s_expression *, const char *fmt, ...); - - const glsl_type *read_type(s_expression *); - - void scan_for_prototypes(exec_list *, s_expression *); - ir_function *read_function(s_expression *, bool skip_body); - void read_function_sig(ir_function *, s_expression *, bool skip_body); - - void read_instructions(exec_list *, s_expression *, ir_loop *); - ir_instruction *read_instruction(s_expression *, ir_loop *); - ir_variable *read_declaration(s_expression *); - ir_if *read_if(s_expression *, ir_loop *); - ir_loop *read_loop(s_expression *); - ir_call *read_call(s_expression *); - ir_return *read_return(s_expression *); - ir_rvalue *read_rvalue(s_expression *); - ir_assignment *read_assignment(s_expression *); - ir_expression *read_expression(s_expression *); - ir_swizzle *read_swizzle(s_expression *); - ir_constant *read_constant(s_expression *); - ir_texture *read_texture(s_expression *); - ir_emit_vertex *read_emit_vertex(s_expression *); - ir_end_primitive *read_end_primitive(s_expression *); - ir_barrier *read_barrier(s_expression *); - - ir_dereference *read_dereference(s_expression *); - ir_dereference_variable *read_var_ref(s_expression *); -}; - -} /* anonymous namespace */ - -ir_reader::ir_reader(_mesa_glsl_parse_state *state) : state(state) -{ - this->mem_ctx = state; -} - -void -_mesa_glsl_read_ir(_mesa_glsl_parse_state *state, exec_list *instructions, - const char *src, bool scan_for_protos) -{ - ir_reader r(state); - r.read(instructions, src, scan_for_protos); -} - -void -ir_reader::read(exec_list *instructions, const char *src, bool scan_for_protos) -{ - void *sx_mem_ctx = ralloc_context(NULL); - s_expression *expr = s_expression::read_expression(sx_mem_ctx, src); - if (expr == NULL) { - ir_read_error(NULL, "couldn't parse S-Expression."); - return; - } - - if (scan_for_protos) { - scan_for_prototypes(instructions, expr); - if (state->error) - return; - } - - read_instructions(instructions, expr, NULL); - ralloc_free(sx_mem_ctx); - - if (debug) - validate_ir_tree(instructions); -} - -void -ir_reader::ir_read_error(s_expression *expr, const char *fmt, ...) -{ - va_list ap; - - state->error = true; - - if (state->current_function != NULL) - ralloc_asprintf_append(&state->info_log, "In function %s:\n", - state->current_function->function_name()); - ralloc_strcat(&state->info_log, "error: "); - - va_start(ap, fmt); - ralloc_vasprintf_append(&state->info_log, fmt, ap); - va_end(ap); - ralloc_strcat(&state->info_log, "\n"); - - if (expr != NULL) { - ralloc_strcat(&state->info_log, "...in this context:\n "); - expr->print(); - ralloc_strcat(&state->info_log, "\n\n"); - } -} - -const glsl_type * -ir_reader::read_type(s_expression *expr) -{ - s_expression *s_base_type; - s_int *s_size; - - s_pattern pat[] = { "array", s_base_type, s_size }; - if (MATCH(expr, pat)) { - const glsl_type *base_type = read_type(s_base_type); - if (base_type == NULL) { - ir_read_error(NULL, "when reading base type of array type"); - return NULL; - } - - return glsl_type::get_array_instance(base_type, s_size->value()); - } - - s_symbol *type_sym = SX_AS_SYMBOL(expr); - if (type_sym == NULL) { - ir_read_error(expr, "expected "); - return NULL; - } - - const glsl_type *type = state->symbols->get_type(type_sym->value()); - if (type == NULL) - ir_read_error(expr, "invalid type: %s", type_sym->value()); - - return type; -} - - -void -ir_reader::scan_for_prototypes(exec_list *instructions, s_expression *expr) -{ - s_list *list = SX_AS_LIST(expr); - if (list == NULL) { - ir_read_error(expr, "Expected ( ...); found an atom."); - return; - } - - foreach_in_list(s_list, sub, &list->subexpressions) { - if (!sub->is_list()) - continue; // not a (function ...); ignore it. - - s_symbol *tag = SX_AS_SYMBOL(sub->subexpressions.get_head()); - if (tag == NULL || strcmp(tag->value(), "function") != 0) - continue; // not a (function ...); ignore it. - - ir_function *f = read_function(sub, true); - if (f == NULL) - return; - instructions->push_tail(f); - } -} - -ir_function * -ir_reader::read_function(s_expression *expr, bool skip_body) -{ - bool added = false; - s_symbol *name; - - s_pattern pat[] = { "function", name }; - if (!PARTIAL_MATCH(expr, pat)) { - ir_read_error(expr, "Expected (function (signature ...) ...)"); - return NULL; - } - - ir_function *f = state->symbols->get_function(name->value()); - if (f == NULL) { - f = new(mem_ctx) ir_function(name->value()); - added = state->symbols->add_function(f); - assert(added); - } - - /* Skip over "function" tag and function name (which are guaranteed to be - * present by the above PARTIAL_MATCH call). - */ - exec_node *node = ((s_list *) expr)->subexpressions.head->next->next; - for (/* nothing */; !node->is_tail_sentinel(); node = node->next) { - s_expression *s_sig = (s_expression *) node; - read_function_sig(f, s_sig, skip_body); - } - return added ? f : NULL; -} - -static bool -always_available(const _mesa_glsl_parse_state *) -{ - return true; -} - -void -ir_reader::read_function_sig(ir_function *f, s_expression *expr, bool skip_body) -{ - s_expression *type_expr; - s_list *paramlist; - s_list *body_list; - - s_pattern pat[] = { "signature", type_expr, paramlist, body_list }; - if (!MATCH(expr, pat)) { - ir_read_error(expr, "Expected (signature (parameters ...) " - "( ...))"); - return; - } - - const glsl_type *return_type = read_type(type_expr); - if (return_type == NULL) - return; - - s_symbol *paramtag = SX_AS_SYMBOL(paramlist->subexpressions.get_head()); - if (paramtag == NULL || strcmp(paramtag->value(), "parameters") != 0) { - ir_read_error(paramlist, "Expected (parameters ...)"); - return; - } - - // Read the parameters list into a temporary place. - exec_list hir_parameters; - state->symbols->push_scope(); - - /* Skip over the "parameters" tag. */ - exec_node *node = paramlist->subexpressions.head->next; - for (/* nothing */; !node->is_tail_sentinel(); node = node->next) { - ir_variable *var = read_declaration((s_expression *) node); - if (var == NULL) - return; - - hir_parameters.push_tail(var); - } - - ir_function_signature *sig = - f->exact_matching_signature(state, &hir_parameters); - if (sig == NULL && skip_body) { - /* If scanning for prototypes, generate a new signature. */ - /* ir_reader doesn't know what languages support a given built-in, so - * just say that they're always available. For now, other mechanisms - * guarantee the right built-ins are available. - */ - sig = new(mem_ctx) ir_function_signature(return_type, always_available); - f->add_signature(sig); - } else if (sig != NULL) { - const char *badvar = sig->qualifiers_match(&hir_parameters); - if (badvar != NULL) { - ir_read_error(expr, "function `%s' parameter `%s' qualifiers " - "don't match prototype", f->name, badvar); - return; - } - - if (sig->return_type != return_type) { - ir_read_error(expr, "function `%s' return type doesn't " - "match prototype", f->name); - return; - } - } else { - /* No prototype for this body exists - skip it. */ - state->symbols->pop_scope(); - return; - } - assert(sig != NULL); - - sig->replace_parameters(&hir_parameters); - - if (!skip_body && !body_list->subexpressions.is_empty()) { - if (sig->is_defined) { - ir_read_error(expr, "function %s redefined", f->name); - return; - } - state->current_function = sig; - read_instructions(&sig->body, body_list, NULL); - state->current_function = NULL; - sig->is_defined = true; - } - - state->symbols->pop_scope(); -} - -void -ir_reader::read_instructions(exec_list *instructions, s_expression *expr, - ir_loop *loop_ctx) -{ - // Read in a list of instructions - s_list *list = SX_AS_LIST(expr); - if (list == NULL) { - ir_read_error(expr, "Expected ( ...); found an atom."); - return; - } - - foreach_in_list(s_expression, sub, &list->subexpressions) { - ir_instruction *ir = read_instruction(sub, loop_ctx); - if (ir != NULL) { - /* Global variable declarations should be moved to the top, before - * any functions that might use them. Functions are added to the - * instruction stream when scanning for prototypes, so without this - * hack, they always appear before variable declarations. - */ - if (state->current_function == NULL && ir->as_variable() != NULL) - instructions->push_head(ir); - else - instructions->push_tail(ir); - } - } -} - - -ir_instruction * -ir_reader::read_instruction(s_expression *expr, ir_loop *loop_ctx) -{ - s_symbol *symbol = SX_AS_SYMBOL(expr); - if (symbol != NULL) { - if (strcmp(symbol->value(), "break") == 0 && loop_ctx != NULL) - return new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); - if (strcmp(symbol->value(), "continue") == 0 && loop_ctx != NULL) - return new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); - } - - s_list *list = SX_AS_LIST(expr); - if (list == NULL || list->subexpressions.is_empty()) { - ir_read_error(expr, "Invalid instruction.\n"); - return NULL; - } - - s_symbol *tag = SX_AS_SYMBOL(list->subexpressions.get_head()); - if (tag == NULL) { - ir_read_error(expr, "expected instruction tag"); - return NULL; - } - - ir_instruction *inst = NULL; - if (strcmp(tag->value(), "declare") == 0) { - inst = read_declaration(list); - } else if (strcmp(tag->value(), "assign") == 0) { - inst = read_assignment(list); - } else if (strcmp(tag->value(), "if") == 0) { - inst = read_if(list, loop_ctx); - } else if (strcmp(tag->value(), "loop") == 0) { - inst = read_loop(list); - } else if (strcmp(tag->value(), "call") == 0) { - inst = read_call(list); - } else if (strcmp(tag->value(), "return") == 0) { - inst = read_return(list); - } else if (strcmp(tag->value(), "function") == 0) { - inst = read_function(list, false); - } else if (strcmp(tag->value(), "emit-vertex") == 0) { - inst = read_emit_vertex(list); - } else if (strcmp(tag->value(), "end-primitive") == 0) { - inst = read_end_primitive(list); - } else if (strcmp(tag->value(), "barrier") == 0) { - inst = read_barrier(list); - } else { - inst = read_rvalue(list); - if (inst == NULL) - ir_read_error(NULL, "when reading instruction"); - } - return inst; -} - -ir_variable * -ir_reader::read_declaration(s_expression *expr) -{ - s_list *s_quals; - s_expression *s_type; - s_symbol *s_name; - - s_pattern pat[] = { "declare", s_quals, s_type, s_name }; - if (!MATCH(expr, pat)) { - ir_read_error(expr, "expected (declare () )"); - return NULL; - } - - const glsl_type *type = read_type(s_type); - if (type == NULL) - return NULL; - - ir_variable *var = new(mem_ctx) ir_variable(type, s_name->value(), - ir_var_auto); - - foreach_in_list(s_symbol, qualifier, &s_quals->subexpressions) { - if (!qualifier->is_symbol()) { - ir_read_error(expr, "qualifier list must contain only symbols"); - return NULL; - } - - // FINISHME: Check for duplicate/conflicting qualifiers. - if (strcmp(qualifier->value(), "centroid") == 0) { - var->data.centroid = 1; - } else if (strcmp(qualifier->value(), "sample") == 0) { - var->data.sample = 1; - } else if (strcmp(qualifier->value(), "patch") == 0) { - var->data.patch = 1; - } else if (strcmp(qualifier->value(), "invariant") == 0) { - var->data.invariant = 1; - } else if (strcmp(qualifier->value(), "uniform") == 0) { - var->data.mode = ir_var_uniform; - } else if (strcmp(qualifier->value(), "shader_storage") == 0) { - var->data.mode = ir_var_shader_storage; - } else if (strcmp(qualifier->value(), "auto") == 0) { - var->data.mode = ir_var_auto; - } else if (strcmp(qualifier->value(), "in") == 0) { - var->data.mode = ir_var_function_in; - } else if (strcmp(qualifier->value(), "shader_in") == 0) { - var->data.mode = ir_var_shader_in; - } else if (strcmp(qualifier->value(), "const_in") == 0) { - var->data.mode = ir_var_const_in; - } else if (strcmp(qualifier->value(), "out") == 0) { - var->data.mode = ir_var_function_out; - } else if (strcmp(qualifier->value(), "shader_out") == 0) { - var->data.mode = ir_var_shader_out; - } else if (strcmp(qualifier->value(), "inout") == 0) { - var->data.mode = ir_var_function_inout; - } else if (strcmp(qualifier->value(), "temporary") == 0) { - var->data.mode = ir_var_temporary; - } else if (strcmp(qualifier->value(), "stream1") == 0) { - var->data.stream = 1; - } else if (strcmp(qualifier->value(), "stream2") == 0) { - var->data.stream = 2; - } else if (strcmp(qualifier->value(), "stream3") == 0) { - var->data.stream = 3; - } else if (strcmp(qualifier->value(), "smooth") == 0) { - var->data.interpolation = INTERP_QUALIFIER_SMOOTH; - } else if (strcmp(qualifier->value(), "flat") == 0) { - var->data.interpolation = INTERP_QUALIFIER_FLAT; - } else if (strcmp(qualifier->value(), "noperspective") == 0) { - var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; - } else { - ir_read_error(expr, "unknown qualifier: %s", qualifier->value()); - return NULL; - } - } - - // Add the variable to the symbol table - state->symbols->add_variable(var); - - return var; -} - - -ir_if * -ir_reader::read_if(s_expression *expr, ir_loop *loop_ctx) -{ - s_expression *s_cond; - s_expression *s_then; - s_expression *s_else; - - s_pattern pat[] = { "if", s_cond, s_then, s_else }; - if (!MATCH(expr, pat)) { - ir_read_error(expr, "expected (if (...) (...))"); - return NULL; - } - - ir_rvalue *condition = read_rvalue(s_cond); - if (condition == NULL) { - ir_read_error(NULL, "when reading condition of (if ...)"); - return NULL; - } - - ir_if *iff = new(mem_ctx) ir_if(condition); - - read_instructions(&iff->then_instructions, s_then, loop_ctx); - read_instructions(&iff->else_instructions, s_else, loop_ctx); - if (state->error) { - delete iff; - iff = NULL; - } - return iff; -} - - -ir_loop * -ir_reader::read_loop(s_expression *expr) -{ - s_expression *s_body; - - s_pattern loop_pat[] = { "loop", s_body }; - if (!MATCH(expr, loop_pat)) { - ir_read_error(expr, "expected (loop )"); - return NULL; - } - - ir_loop *loop = new(mem_ctx) ir_loop; - - read_instructions(&loop->body_instructions, s_body, loop); - if (state->error) { - delete loop; - loop = NULL; - } - return loop; -} - - -ir_return * -ir_reader::read_return(s_expression *expr) -{ - s_expression *s_retval; - - s_pattern return_value_pat[] = { "return", s_retval}; - s_pattern return_void_pat[] = { "return" }; - if (MATCH(expr, return_value_pat)) { - ir_rvalue *retval = read_rvalue(s_retval); - if (retval == NULL) { - ir_read_error(NULL, "when reading return value"); - return NULL; - } - return new(mem_ctx) ir_return(retval); - } else if (MATCH(expr, return_void_pat)) { - return new(mem_ctx) ir_return; - } else { - ir_read_error(expr, "expected (return ) or (return)"); - return NULL; - } -} - - -ir_rvalue * -ir_reader::read_rvalue(s_expression *expr) -{ - s_list *list = SX_AS_LIST(expr); - if (list == NULL || list->subexpressions.is_empty()) - return NULL; - - s_symbol *tag = SX_AS_SYMBOL(list->subexpressions.get_head()); - if (tag == NULL) { - ir_read_error(expr, "expected rvalue tag"); - return NULL; - } - - ir_rvalue *rvalue = read_dereference(list); - if (rvalue != NULL || state->error) - return rvalue; - else if (strcmp(tag->value(), "swiz") == 0) { - rvalue = read_swizzle(list); - } else if (strcmp(tag->value(), "expression") == 0) { - rvalue = read_expression(list); - } else if (strcmp(tag->value(), "constant") == 0) { - rvalue = read_constant(list); - } else { - rvalue = read_texture(list); - if (rvalue == NULL && !state->error) - ir_read_error(expr, "unrecognized rvalue tag: %s", tag->value()); - } - - return rvalue; -} - -ir_assignment * -ir_reader::read_assignment(s_expression *expr) -{ - s_expression *cond_expr = NULL; - s_expression *lhs_expr, *rhs_expr; - s_list *mask_list; - - s_pattern pat4[] = { "assign", mask_list, lhs_expr, rhs_expr }; - s_pattern pat5[] = { "assign", cond_expr, mask_list, lhs_expr, rhs_expr }; - if (!MATCH(expr, pat4) && !MATCH(expr, pat5)) { - ir_read_error(expr, "expected (assign [] () " - " )"); - return NULL; - } - - ir_rvalue *condition = NULL; - if (cond_expr != NULL) { - condition = read_rvalue(cond_expr); - if (condition == NULL) { - ir_read_error(NULL, "when reading condition of assignment"); - return NULL; - } - } - - unsigned mask = 0; - - s_symbol *mask_symbol; - s_pattern mask_pat[] = { mask_symbol }; - if (MATCH(mask_list, mask_pat)) { - const char *mask_str = mask_symbol->value(); - unsigned mask_length = strlen(mask_str); - if (mask_length > 4) { - ir_read_error(expr, "invalid write mask: %s", mask_str); - return NULL; - } - - const unsigned idx_map[] = { 3, 0, 1, 2 }; /* w=bit 3, x=0, y=1, z=2 */ - - for (unsigned i = 0; i < mask_length; i++) { - if (mask_str[i] < 'w' || mask_str[i] > 'z') { - ir_read_error(expr, "write mask contains invalid character: %c", - mask_str[i]); - return NULL; - } - mask |= 1 << idx_map[mask_str[i] - 'w']; - } - } else if (!mask_list->subexpressions.is_empty()) { - ir_read_error(mask_list, "expected () or ()"); - return NULL; - } - - ir_dereference *lhs = read_dereference(lhs_expr); - if (lhs == NULL) { - ir_read_error(NULL, "when reading left-hand side of assignment"); - return NULL; - } - - ir_rvalue *rhs = read_rvalue(rhs_expr); - if (rhs == NULL) { - ir_read_error(NULL, "when reading right-hand side of assignment"); - return NULL; - } - - if (mask == 0 && (lhs->type->is_vector() || lhs->type->is_scalar())) { - ir_read_error(expr, "non-zero write mask required."); - return NULL; - } - - return new(mem_ctx) ir_assignment(lhs, rhs, condition, mask); -} - -ir_call * -ir_reader::read_call(s_expression *expr) -{ - s_symbol *name; - s_list *params; - s_list *s_return = NULL; - - ir_dereference_variable *return_deref = NULL; - - s_pattern void_pat[] = { "call", name, params }; - s_pattern non_void_pat[] = { "call", name, s_return, params }; - if (MATCH(expr, non_void_pat)) { - return_deref = read_var_ref(s_return); - if (return_deref == NULL) { - ir_read_error(s_return, "when reading a call's return storage"); - return NULL; - } - } else if (!MATCH(expr, void_pat)) { - ir_read_error(expr, "expected (call [] ( ...))"); - return NULL; - } - - exec_list parameters; - - foreach_in_list(s_expression, e, ¶ms->subexpressions) { - ir_rvalue *param = read_rvalue(e); - if (param == NULL) { - ir_read_error(e, "when reading parameter to function call"); - return NULL; - } - parameters.push_tail(param); - } - - ir_function *f = state->symbols->get_function(name->value()); - if (f == NULL) { - ir_read_error(expr, "found call to undefined function %s", - name->value()); - return NULL; - } - - ir_function_signature *callee = - f->matching_signature(state, ¶meters, true); - if (callee == NULL) { - ir_read_error(expr, "couldn't find matching signature for function " - "%s", name->value()); - return NULL; - } - - if (callee->return_type == glsl_type::void_type && return_deref) { - ir_read_error(expr, "call has return value storage but void type"); - return NULL; - } else if (callee->return_type != glsl_type::void_type && !return_deref) { - ir_read_error(expr, "call has non-void type but no return value storage"); - return NULL; - } - - return new(mem_ctx) ir_call(callee, return_deref, ¶meters); -} - -ir_expression * -ir_reader::read_expression(s_expression *expr) -{ - s_expression *s_type; - s_symbol *s_op; - s_expression *s_arg[4] = {NULL}; - - s_pattern pat[] = { "expression", s_type, s_op, s_arg[0] }; - if (!PARTIAL_MATCH(expr, pat)) { - ir_read_error(expr, "expected (expression " - " [] [] [])"); - return NULL; - } - s_arg[1] = (s_expression *) s_arg[0]->next; // may be tail sentinel - s_arg[2] = (s_expression *) s_arg[1]->next; // may be tail sentinel or NULL - if (s_arg[2]) - s_arg[3] = (s_expression *) s_arg[2]->next; // may be tail sentinel or NULL - - const glsl_type *type = read_type(s_type); - if (type == NULL) - return NULL; - - /* Read the operator */ - ir_expression_operation op = ir_expression::get_operator(s_op->value()); - if (op == (ir_expression_operation) -1) { - ir_read_error(expr, "invalid operator: %s", s_op->value()); - return NULL; - } - - /* Skip "expression" by subtracting 3. */ - int num_operands = (int) ((s_list *) expr)->subexpressions.length() - 3; - - int expected_operands = ir_expression::get_num_operands(op); - if (num_operands != expected_operands) { - ir_read_error(expr, "found %d expression operands, expected %d", - num_operands, expected_operands); - return NULL; - } - - ir_rvalue *arg[4] = {NULL}; - for (int i = 0; i < num_operands; i++) { - arg[i] = read_rvalue(s_arg[i]); - if (arg[i] == NULL) { - ir_read_error(NULL, "when reading operand #%d of %s", i, s_op->value()); - return NULL; - } - } - - return new(mem_ctx) ir_expression(op, type, arg[0], arg[1], arg[2], arg[3]); -} - -ir_swizzle * -ir_reader::read_swizzle(s_expression *expr) -{ - s_symbol *swiz; - s_expression *sub; - - s_pattern pat[] = { "swiz", swiz, sub }; - if (!MATCH(expr, pat)) { - ir_read_error(expr, "expected (swiz )"); - return NULL; - } - - if (strlen(swiz->value()) > 4) { - ir_read_error(expr, "expected a valid swizzle; found %s", swiz->value()); - return NULL; - } - - ir_rvalue *rvalue = read_rvalue(sub); - if (rvalue == NULL) - return NULL; - - ir_swizzle *ir = ir_swizzle::create(rvalue, swiz->value(), - rvalue->type->vector_elements); - if (ir == NULL) - ir_read_error(expr, "invalid swizzle"); - - return ir; -} - -ir_constant * -ir_reader::read_constant(s_expression *expr) -{ - s_expression *type_expr; - s_list *values; - - s_pattern pat[] = { "constant", type_expr, values }; - if (!MATCH(expr, pat)) { - ir_read_error(expr, "expected (constant (...))"); - return NULL; - } - - const glsl_type *type = read_type(type_expr); - if (type == NULL) - return NULL; - - if (values == NULL) { - ir_read_error(expr, "expected (constant (...))"); - return NULL; - } - - if (type->is_array()) { - unsigned elements_supplied = 0; - exec_list elements; - foreach_in_list(s_expression, elt, &values->subexpressions) { - ir_constant *ir_elt = read_constant(elt); - if (ir_elt == NULL) - return NULL; - elements.push_tail(ir_elt); - elements_supplied++; - } - - if (elements_supplied != type->length) { - ir_read_error(values, "expected exactly %u array elements, " - "given %u", type->length, elements_supplied); - return NULL; - } - return new(mem_ctx) ir_constant(type, &elements); - } - - ir_constant_data data = { { 0 } }; - - // Read in list of values (at most 16). - unsigned k = 0; - foreach_in_list(s_expression, expr, &values->subexpressions) { - if (k >= 16) { - ir_read_error(values, "expected at most 16 numbers"); - return NULL; - } - - if (type->base_type == GLSL_TYPE_FLOAT) { - s_number *value = SX_AS_NUMBER(expr); - if (value == NULL) { - ir_read_error(values, "expected numbers"); - return NULL; - } - data.f[k] = value->fvalue(); - } else { - s_int *value = SX_AS_INT(expr); - if (value == NULL) { - ir_read_error(values, "expected integers"); - return NULL; - } - - switch (type->base_type) { - case GLSL_TYPE_UINT: { - data.u[k] = value->value(); - break; - } - case GLSL_TYPE_INT: { - data.i[k] = value->value(); - break; - } - case GLSL_TYPE_BOOL: { - data.b[k] = value->value(); - break; - } - default: - ir_read_error(values, "unsupported constant type"); - return NULL; - } - } - ++k; - } - if (k != type->components()) { - ir_read_error(values, "expected %u constant values, found %u", - type->components(), k); - return NULL; - } - - return new(mem_ctx) ir_constant(type, &data); -} - -ir_dereference_variable * -ir_reader::read_var_ref(s_expression *expr) -{ - s_symbol *s_var; - s_pattern var_pat[] = { "var_ref", s_var }; - - if (MATCH(expr, var_pat)) { - ir_variable *var = state->symbols->get_variable(s_var->value()); - if (var == NULL) { - ir_read_error(expr, "undeclared variable: %s", s_var->value()); - return NULL; - } - return new(mem_ctx) ir_dereference_variable(var); - } - return NULL; -} - -ir_dereference * -ir_reader::read_dereference(s_expression *expr) -{ - s_expression *s_subject; - s_expression *s_index; - s_symbol *s_field; - - s_pattern array_pat[] = { "array_ref", s_subject, s_index }; - s_pattern record_pat[] = { "record_ref", s_subject, s_field }; - - ir_dereference_variable *var_ref = read_var_ref(expr); - if (var_ref != NULL) { - return var_ref; - } else if (MATCH(expr, array_pat)) { - ir_rvalue *subject = read_rvalue(s_subject); - if (subject == NULL) { - ir_read_error(NULL, "when reading the subject of an array_ref"); - return NULL; - } - - ir_rvalue *idx = read_rvalue(s_index); - if (idx == NULL) { - ir_read_error(NULL, "when reading the index of an array_ref"); - return NULL; - } - return new(mem_ctx) ir_dereference_array(subject, idx); - } else if (MATCH(expr, record_pat)) { - ir_rvalue *subject = read_rvalue(s_subject); - if (subject == NULL) { - ir_read_error(NULL, "when reading the subject of a record_ref"); - return NULL; - } - return new(mem_ctx) ir_dereference_record(subject, s_field->value()); - } - return NULL; -} - -ir_texture * -ir_reader::read_texture(s_expression *expr) -{ - s_symbol *tag = NULL; - s_expression *s_type = NULL; - s_expression *s_sampler = NULL; - s_expression *s_coord = NULL; - s_expression *s_offset = NULL; - s_expression *s_proj = NULL; - s_list *s_shadow = NULL; - s_expression *s_lod = NULL; - s_expression *s_sample_index = NULL; - s_expression *s_component = NULL; - - ir_texture_opcode op = ir_tex; /* silence warning */ - - s_pattern tex_pattern[] = - { "tex", s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow }; - s_pattern lod_pattern[] = - { "lod", s_type, s_sampler, s_coord }; - s_pattern txf_pattern[] = - { "txf", s_type, s_sampler, s_coord, s_offset, s_lod }; - s_pattern txf_ms_pattern[] = - { "txf_ms", s_type, s_sampler, s_coord, s_sample_index }; - s_pattern txs_pattern[] = - { "txs", s_type, s_sampler, s_lod }; - s_pattern tg4_pattern[] = - { "tg4", s_type, s_sampler, s_coord, s_offset, s_component }; - s_pattern query_levels_pattern[] = - { "query_levels", s_type, s_sampler }; - s_pattern texture_samples_pattern[] = - { "samples", s_type, s_sampler }; - s_pattern other_pattern[] = - { tag, s_type, s_sampler, s_coord, s_offset, s_proj, s_shadow, s_lod }; - - if (MATCH(expr, lod_pattern)) { - op = ir_lod; - } else if (MATCH(expr, tex_pattern)) { - op = ir_tex; - } else if (MATCH(expr, txf_pattern)) { - op = ir_txf; - } else if (MATCH(expr, txf_ms_pattern)) { - op = ir_txf_ms; - } else if (MATCH(expr, txs_pattern)) { - op = ir_txs; - } else if (MATCH(expr, tg4_pattern)) { - op = ir_tg4; - } else if (MATCH(expr, query_levels_pattern)) { - op = ir_query_levels; - } else if (MATCH(expr, texture_samples_pattern)) { - op = ir_texture_samples; - } else if (MATCH(expr, other_pattern)) { - op = ir_texture::get_opcode(tag->value()); - if (op == (ir_texture_opcode) -1) - return NULL; - } else { - ir_read_error(NULL, "unexpected texture pattern %s", tag->value()); - return NULL; - } - - ir_texture *tex = new(mem_ctx) ir_texture(op); - - // Read return type - const glsl_type *type = read_type(s_type); - if (type == NULL) { - ir_read_error(NULL, "when reading type in (%s ...)", - tex->opcode_string()); - return NULL; - } - - // Read sampler (must be a deref) - ir_dereference *sampler = read_dereference(s_sampler); - if (sampler == NULL) { - ir_read_error(NULL, "when reading sampler in (%s ...)", - tex->opcode_string()); - return NULL; - } - tex->set_sampler(sampler, type); - - if (op != ir_txs) { - // Read coordinate (any rvalue) - tex->coordinate = read_rvalue(s_coord); - if (tex->coordinate == NULL) { - ir_read_error(NULL, "when reading coordinate in (%s ...)", - tex->opcode_string()); - return NULL; - } - - if (op != ir_txf_ms && op != ir_lod) { - // Read texel offset - either 0 or an rvalue. - s_int *si_offset = SX_AS_INT(s_offset); - if (si_offset == NULL || si_offset->value() != 0) { - tex->offset = read_rvalue(s_offset); - if (tex->offset == NULL) { - ir_read_error(s_offset, "expected 0 or an expression"); - return NULL; - } - } - } - } - - if (op != ir_txf && op != ir_txf_ms && - op != ir_txs && op != ir_lod && op != ir_tg4 && - op != ir_query_levels && op != ir_texture_samples) { - s_int *proj_as_int = SX_AS_INT(s_proj); - if (proj_as_int && proj_as_int->value() == 1) { - tex->projector = NULL; - } else { - tex->projector = read_rvalue(s_proj); - if (tex->projector == NULL) { - ir_read_error(NULL, "when reading projective divide in (%s ..)", - tex->opcode_string()); - return NULL; - } - } - - if (s_shadow->subexpressions.is_empty()) { - tex->shadow_comparitor = NULL; - } else { - tex->shadow_comparitor = read_rvalue(s_shadow); - if (tex->shadow_comparitor == NULL) { - ir_read_error(NULL, "when reading shadow comparitor in (%s ..)", - tex->opcode_string()); - return NULL; - } - } - } - - switch (op) { - case ir_txb: - tex->lod_info.bias = read_rvalue(s_lod); - if (tex->lod_info.bias == NULL) { - ir_read_error(NULL, "when reading LOD bias in (txb ...)"); - return NULL; - } - break; - case ir_txl: - case ir_txf: - case ir_txs: - tex->lod_info.lod = read_rvalue(s_lod); - if (tex->lod_info.lod == NULL) { - ir_read_error(NULL, "when reading LOD in (%s ...)", - tex->opcode_string()); - return NULL; - } - break; - case ir_txf_ms: - tex->lod_info.sample_index = read_rvalue(s_sample_index); - if (tex->lod_info.sample_index == NULL) { - ir_read_error(NULL, "when reading sample_index in (txf_ms ...)"); - return NULL; - } - break; - case ir_txd: { - s_expression *s_dx, *s_dy; - s_pattern dxdy_pat[] = { s_dx, s_dy }; - if (!MATCH(s_lod, dxdy_pat)) { - ir_read_error(s_lod, "expected (dPdx dPdy) in (txd ...)"); - return NULL; - } - tex->lod_info.grad.dPdx = read_rvalue(s_dx); - if (tex->lod_info.grad.dPdx == NULL) { - ir_read_error(NULL, "when reading dPdx in (txd ...)"); - return NULL; - } - tex->lod_info.grad.dPdy = read_rvalue(s_dy); - if (tex->lod_info.grad.dPdy == NULL) { - ir_read_error(NULL, "when reading dPdy in (txd ...)"); - return NULL; - } - break; - } - case ir_tg4: - tex->lod_info.component = read_rvalue(s_component); - if (tex->lod_info.component == NULL) { - ir_read_error(NULL, "when reading component in (tg4 ...)"); - return NULL; - } - break; - default: - // tex and lod don't have any extra parameters. - break; - }; - return tex; -} - -ir_emit_vertex * -ir_reader::read_emit_vertex(s_expression *expr) -{ - s_expression *s_stream = NULL; - - s_pattern pat[] = { "emit-vertex", s_stream }; - - if (MATCH(expr, pat)) { - ir_rvalue *stream = read_dereference(s_stream); - if (stream == NULL) { - ir_read_error(NULL, "when reading stream info in emit-vertex"); - return NULL; - } - return new(mem_ctx) ir_emit_vertex(stream); - } - ir_read_error(NULL, "when reading emit-vertex"); - return NULL; -} - -ir_end_primitive * -ir_reader::read_end_primitive(s_expression *expr) -{ - s_expression *s_stream = NULL; - - s_pattern pat[] = { "end-primitive", s_stream }; - - if (MATCH(expr, pat)) { - ir_rvalue *stream = read_dereference(s_stream); - if (stream == NULL) { - ir_read_error(NULL, "when reading stream info in end-primitive"); - return NULL; - } - return new(mem_ctx) ir_end_primitive(stream); - } - ir_read_error(NULL, "when reading end-primitive"); - return NULL; -} - -ir_barrier * -ir_reader::read_barrier(s_expression *expr) -{ - s_pattern pat[] = { "barrier" }; - - if (MATCH(expr, pat)) { - return new(mem_ctx) ir_barrier(); - } - ir_read_error(NULL, "when reading barrier"); - return NULL; -} diff --git a/src/glsl/ir_reader.h b/src/glsl/ir_reader.h deleted file mode 100644 index aef2ca23bd2..00000000000 --- a/src/glsl/ir_reader.h +++ /dev/null @@ -1,34 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef IR_READER_H -#define IR_READER_H - -#include "ir.h" - -void _mesa_glsl_read_ir(_mesa_glsl_parse_state *state, exec_list *instructions, - const char *src, bool scan_for_prototypes); - -#endif /* IR_READER_H */ diff --git a/src/glsl/ir_rvalue_visitor.cpp b/src/glsl/ir_rvalue_visitor.cpp deleted file mode 100644 index 6ab6cf02176..00000000000 --- a/src/glsl/ir_rvalue_visitor.cpp +++ /dev/null @@ -1,316 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_rvalue_visitor.cpp - * - * Generic class to implement the common pattern we have of wanting to - * visit each ir_rvalue * and possibly change that node to a different - * class. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "compiler/glsl_types.h" - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_expression *ir) -{ - unsigned int operand; - - for (operand = 0; operand < ir->get_num_operands(); operand++) { - handle_rvalue(&ir->operands[operand]); - } - - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_texture *ir) -{ - handle_rvalue(&ir->coordinate); - handle_rvalue(&ir->projector); - handle_rvalue(&ir->shadow_comparitor); - handle_rvalue(&ir->offset); - - switch (ir->op) { - case ir_tex: - case ir_lod: - case ir_query_levels: - case ir_texture_samples: - case ir_samples_identical: - break; - case ir_txb: - handle_rvalue(&ir->lod_info.bias); - break; - case ir_txf: - case ir_txl: - case ir_txs: - handle_rvalue(&ir->lod_info.lod); - break; - case ir_txf_ms: - handle_rvalue(&ir->lod_info.sample_index); - break; - case ir_txd: - handle_rvalue(&ir->lod_info.grad.dPdx); - handle_rvalue(&ir->lod_info.grad.dPdy); - break; - case ir_tg4: - handle_rvalue(&ir->lod_info.component); - break; - } - - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_swizzle *ir) -{ - handle_rvalue(&ir->val); - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_dereference_array *ir) -{ - /* The array index is not the target of the assignment, so clear the - * 'in_assignee' flag. Restore it after returning from the array index. - */ - const bool was_in_assignee = this->in_assignee; - this->in_assignee = false; - handle_rvalue(&ir->array_index); - this->in_assignee = was_in_assignee; - - handle_rvalue(&ir->array); - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_dereference_record *ir) -{ - handle_rvalue(&ir->record); - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_assignment *ir) -{ - handle_rvalue(&ir->rhs); - handle_rvalue(&ir->condition); - - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_call *ir) -{ - foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { - ir_rvalue *new_param = param; - handle_rvalue(&new_param); - - if (new_param != param) { - param->replace_with(new_param); - } - } - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_discard *ir) -{ - handle_rvalue(&ir->condition); - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_return *ir) -{ - handle_rvalue(&ir->value);; - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_if *ir) -{ - handle_rvalue(&ir->condition); - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_emit_vertex *ir) -{ - handle_rvalue(&ir->stream); - return visit_continue; -} - -ir_visitor_status -ir_rvalue_base_visitor::rvalue_visit(ir_end_primitive *ir) -{ - handle_rvalue(&ir->stream); - return visit_continue; -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_expression *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_texture *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_swizzle *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_dereference_array *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_dereference_record *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_assignment *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_call *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_discard *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_return *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_if *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_emit_vertex *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_visitor::visit_leave(ir_end_primitive *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_expression *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_texture *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_swizzle *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_dereference_array *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_dereference_record *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_assignment *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_call *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_discard *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_return *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_if *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_emit_vertex *ir) -{ - return rvalue_visit(ir); -} - -ir_visitor_status -ir_rvalue_enter_visitor::visit_enter(ir_end_primitive *ir) -{ - return rvalue_visit(ir); -} diff --git a/src/glsl/ir_rvalue_visitor.h b/src/glsl/ir_rvalue_visitor.h deleted file mode 100644 index 185c72a5ba1..00000000000 --- a/src/glsl/ir_rvalue_visitor.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_rvalue_visitor.h - * - * Generic class to implement the common pattern we have of wanting to - * visit each ir_rvalue * and possibly change that node to a different - * class. Just implement handle_rvalue() and you will be called with - * a pointer to each rvalue in the tree. - */ - -class ir_rvalue_base_visitor : public ir_hierarchical_visitor { -public: - ir_visitor_status rvalue_visit(ir_assignment *); - ir_visitor_status rvalue_visit(ir_call *); - ir_visitor_status rvalue_visit(ir_dereference_array *); - ir_visitor_status rvalue_visit(ir_dereference_record *); - ir_visitor_status rvalue_visit(ir_discard *); - ir_visitor_status rvalue_visit(ir_expression *); - ir_visitor_status rvalue_visit(ir_if *); - ir_visitor_status rvalue_visit(ir_return *); - ir_visitor_status rvalue_visit(ir_swizzle *); - ir_visitor_status rvalue_visit(ir_texture *); - ir_visitor_status rvalue_visit(ir_emit_vertex *); - ir_visitor_status rvalue_visit(ir_end_primitive *); - - virtual void handle_rvalue(ir_rvalue **rvalue) = 0; -}; - -class ir_rvalue_visitor : public ir_rvalue_base_visitor { -public: - - virtual ir_visitor_status visit_leave(ir_assignment *); - virtual ir_visitor_status visit_leave(ir_call *); - virtual ir_visitor_status visit_leave(ir_dereference_array *); - virtual ir_visitor_status visit_leave(ir_dereference_record *); - virtual ir_visitor_status visit_leave(ir_discard *); - virtual ir_visitor_status visit_leave(ir_expression *); - virtual ir_visitor_status visit_leave(ir_if *); - virtual ir_visitor_status visit_leave(ir_return *); - virtual ir_visitor_status visit_leave(ir_swizzle *); - virtual ir_visitor_status visit_leave(ir_texture *); - virtual ir_visitor_status visit_leave(ir_emit_vertex *); - virtual ir_visitor_status visit_leave(ir_end_primitive *); -}; - -class ir_rvalue_enter_visitor : public ir_rvalue_base_visitor { -public: - - virtual ir_visitor_status visit_enter(ir_assignment *); - virtual ir_visitor_status visit_enter(ir_call *); - virtual ir_visitor_status visit_enter(ir_dereference_array *); - virtual ir_visitor_status visit_enter(ir_dereference_record *); - virtual ir_visitor_status visit_enter(ir_discard *); - virtual ir_visitor_status visit_enter(ir_expression *); - virtual ir_visitor_status visit_enter(ir_if *); - virtual ir_visitor_status visit_enter(ir_return *); - virtual ir_visitor_status visit_enter(ir_swizzle *); - virtual ir_visitor_status visit_enter(ir_texture *); - virtual ir_visitor_status visit_enter(ir_emit_vertex *); - virtual ir_visitor_status visit_enter(ir_end_primitive *); -}; diff --git a/src/glsl/ir_set_program_inouts.cpp b/src/glsl/ir_set_program_inouts.cpp deleted file mode 100644 index df06923b870..00000000000 --- a/src/glsl/ir_set_program_inouts.cpp +++ /dev/null @@ -1,453 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_set_program_inouts.cpp - * - * Sets the InputsRead and OutputsWritten of Mesa programs. - * - * Additionally, for fragment shaders, sets the InterpQualifier array, the - * IsCentroid and IsSample bitfields, and the UsesDFdy flag. - * - * Mesa programs (gl_program, not gl_shader_program) have a set of - * flags indicating which varyings are read and written. Computing - * which are actually read from some sort of backend code can be - * tricky when variable array indexing involved. So this pass - * provides support for setting InputsRead and OutputsWritten right - * from the GLSL IR. - */ - -#include "main/core.h" /* for struct gl_program */ -#include "ir.h" -#include "ir_visitor.h" -#include "compiler/glsl_types.h" - -namespace { - -class ir_set_program_inouts_visitor : public ir_hierarchical_visitor { -public: - ir_set_program_inouts_visitor(struct gl_program *prog, - gl_shader_stage shader_stage) - { - this->prog = prog; - this->shader_stage = shader_stage; - } - ~ir_set_program_inouts_visitor() - { - } - - virtual ir_visitor_status visit_enter(ir_dereference_array *); - virtual ir_visitor_status visit_enter(ir_function_signature *); - virtual ir_visitor_status visit_enter(ir_expression *); - virtual ir_visitor_status visit_enter(ir_discard *); - virtual ir_visitor_status visit_enter(ir_texture *); - virtual ir_visitor_status visit(ir_dereference_variable *); - -private: - void mark_whole_variable(ir_variable *var); - bool try_mark_partial_variable(ir_variable *var, ir_rvalue *index); - - struct gl_program *prog; - gl_shader_stage shader_stage; -}; - -} /* anonymous namespace */ - -static inline bool -is_shader_inout(ir_variable *var) -{ - return var->data.mode == ir_var_shader_in || - var->data.mode == ir_var_shader_out || - var->data.mode == ir_var_system_value; -} - -static void -mark(struct gl_program *prog, ir_variable *var, int offset, int len, - gl_shader_stage stage) -{ - /* As of GLSL 1.20, varyings can only be floats, floating-point - * vectors or matrices, or arrays of them. For Mesa programs using - * InputsRead/OutputsWritten, everything but matrices uses one - * slot, while matrices use a slot per column. Presumably - * something doing a more clever packing would use something other - * than InputsRead/OutputsWritten. - */ - - for (int i = 0; i < len; i++) { - int idx = var->data.location + var->data.index + offset + i; - bool is_patch_generic = var->data.patch && - idx != VARYING_SLOT_TESS_LEVEL_INNER && - idx != VARYING_SLOT_TESS_LEVEL_OUTER; - GLbitfield64 bitfield; - - if (is_patch_generic) { - assert(idx >= VARYING_SLOT_PATCH0 && idx < VARYING_SLOT_TESS_MAX); - bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0); - } - else { - assert(idx < VARYING_SLOT_MAX); - bitfield = BITFIELD64_BIT(idx); - } - - if (var->data.mode == ir_var_shader_in) { - if (is_patch_generic) - prog->PatchInputsRead |= bitfield; - else - prog->InputsRead |= bitfield; - - /* double inputs read is only for vertex inputs */ - if (stage == MESA_SHADER_VERTEX && - var->type->without_array()->is_dual_slot_double()) - prog->DoubleInputsRead |= bitfield; - - if (stage == MESA_SHADER_FRAGMENT) { - gl_fragment_program *fprog = (gl_fragment_program *) prog; - fprog->InterpQualifier[idx] = - (glsl_interp_qualifier) var->data.interpolation; - if (var->data.centroid) - fprog->IsCentroid |= bitfield; - if (var->data.sample) - fprog->IsSample |= bitfield; - } - } else if (var->data.mode == ir_var_system_value) { - prog->SystemValuesRead |= bitfield; - } else { - assert(var->data.mode == ir_var_shader_out); - if (is_patch_generic) - prog->PatchOutputsWritten |= bitfield; - else - prog->OutputsWritten |= bitfield; - } - } -} - -/** - * Mark an entire variable as used. Caller must ensure that the variable - * represents a shader input or output. - */ -void -ir_set_program_inouts_visitor::mark_whole_variable(ir_variable *var) -{ - const glsl_type *type = var->type; - bool vertex_input = false; - if (this->shader_stage == MESA_SHADER_GEOMETRY && - var->data.mode == ir_var_shader_in && type->is_array()) { - type = type->fields.array; - } - - if (this->shader_stage == MESA_SHADER_TESS_CTRL && - var->data.mode == ir_var_shader_in) { - assert(type->is_array()); - type = type->fields.array; - } - - if (this->shader_stage == MESA_SHADER_TESS_CTRL && - var->data.mode == ir_var_shader_out && !var->data.patch) { - assert(type->is_array()); - type = type->fields.array; - } - - if (this->shader_stage == MESA_SHADER_TESS_EVAL && - var->data.mode == ir_var_shader_in && !var->data.patch) { - assert(type->is_array()); - type = type->fields.array; - } - - if (this->shader_stage == MESA_SHADER_VERTEX && - var->data.mode == ir_var_shader_in) - vertex_input = true; - - mark(this->prog, var, 0, type->count_attribute_slots(vertex_input), - this->shader_stage); -} - -/* Default handler: Mark all the locations in the variable as used. */ -ir_visitor_status -ir_set_program_inouts_visitor::visit(ir_dereference_variable *ir) -{ - if (!is_shader_inout(ir->var)) - return visit_continue; - - mark_whole_variable(ir->var); - - return visit_continue; -} - -/** - * Try to mark a portion of the given variable as used. Caller must ensure - * that the variable represents a shader input or output which can be indexed - * into in array fashion (an array or matrix). For the purpose of geometry - * shader inputs (which are always arrays*), this means that the array element - * must be something that can be indexed into in array fashion. - * - * *Except gl_PrimitiveIDIn, as noted below. - * - * For tessellation control shaders all inputs and non-patch outputs are - * arrays. For tessellation evaluation shaders non-patch inputs are arrays. - * - * If the index can't be interpreted as a constant, or some other problem - * occurs, then nothing will be marked and false will be returned. - */ -bool -ir_set_program_inouts_visitor::try_mark_partial_variable(ir_variable *var, - ir_rvalue *index) -{ - const glsl_type *type = var->type; - - if (this->shader_stage == MESA_SHADER_GEOMETRY && - var->data.mode == ir_var_shader_in) { - /* The only geometry shader input that is not an array is - * gl_PrimitiveIDIn, and in that case, this code will never be reached, - * because gl_PrimitiveIDIn can't be indexed into in array fashion. - */ - assert(type->is_array()); - type = type->fields.array; - } - - if (this->shader_stage == MESA_SHADER_TESS_CTRL && - var->data.mode == ir_var_shader_in) { - assert(type->is_array()); - type = type->fields.array; - } - - if (this->shader_stage == MESA_SHADER_TESS_CTRL && - var->data.mode == ir_var_shader_out && !var->data.patch) { - assert(type->is_array()); - type = type->fields.array; - } - - if (this->shader_stage == MESA_SHADER_TESS_EVAL && - var->data.mode == ir_var_shader_in && !var->data.patch) { - assert(type->is_array()); - type = type->fields.array; - } - - /* TODO: implement proper arrays of arrays support - * for now let the caller mark whole variable as used. - */ - if (type->is_array() && type->fields.array->is_array()) - return false; - - /* The code below only handles: - * - * - Indexing into matrices - * - Indexing into arrays of (matrices, vectors, or scalars) - * - * All other possibilities are either prohibited by GLSL (vertex inputs and - * fragment outputs can't be structs) or should have been eliminated by - * lowering passes (do_vec_index_to_swizzle() gets rid of indexing into - * vectors, and lower_packed_varyings() gets rid of structs that occur in - * varyings). - */ - if (!(type->is_matrix() || - (type->is_array() && - (type->fields.array->is_numeric() || - type->fields.array->is_boolean())))) { - assert(!"Unexpected indexing in ir_set_program_inouts"); - - /* For safety in release builds, in case we ever encounter unexpected - * indexing, give up and let the caller mark the whole variable as used. - */ - return false; - } - - ir_constant *index_as_constant = index->as_constant(); - if (!index_as_constant) - return false; - - unsigned elem_width; - unsigned num_elems; - if (type->is_array()) { - num_elems = type->length; - if (type->fields.array->is_matrix()) - elem_width = type->fields.array->matrix_columns; - else - elem_width = 1; - } else { - num_elems = type->matrix_columns; - elem_width = 1; - } - - if (index_as_constant->value.u[0] >= num_elems) { - /* Constant index outside the bounds of the matrix/array. This could - * arise as a result of constant folding of a legal GLSL program. - * - * Even though the spec says that indexing outside the bounds of a - * matrix/array results in undefined behaviour, we don't want to pass - * out-of-range values to mark() (since this could result in slots that - * don't exist being marked as used), so just let the caller mark the - * whole variable as used. - */ - return false; - } - - /* double element width for double types that takes two slots */ - if (this->shader_stage != MESA_SHADER_VERTEX || - var->data.mode != ir_var_shader_in) { - if (type->without_array()->is_dual_slot_double()) - elem_width *= 2; - } - - mark(this->prog, var, index_as_constant->value.u[0] * elem_width, - elem_width, this->shader_stage); - return true; -} - -static bool -is_multiple_vertices(gl_shader_stage stage, ir_variable *var) -{ - if (var->data.patch) - return false; - - if (var->data.mode == ir_var_shader_in) - return stage == MESA_SHADER_GEOMETRY || - stage == MESA_SHADER_TESS_CTRL || - stage == MESA_SHADER_TESS_EVAL; - if (var->data.mode == ir_var_shader_out) - return stage == MESA_SHADER_TESS_CTRL; - - return false; -} - -ir_visitor_status -ir_set_program_inouts_visitor::visit_enter(ir_dereference_array *ir) -{ - /* Note: for geometry shader inputs, lower_named_interface_blocks may - * create 2D arrays, so we need to be able to handle those. 2D arrays - * shouldn't be able to crop up for any other reason. - */ - if (ir_dereference_array * const inner_array = - ir->array->as_dereference_array()) { - /* ir => foo[i][j] - * inner_array => foo[i] - */ - if (ir_dereference_variable * const deref_var = - inner_array->array->as_dereference_variable()) { - if (is_multiple_vertices(this->shader_stage, deref_var->var)) { - /* foo is a geometry or tessellation shader input, so i is - * the vertex, and j the part of the input we're accessing. - */ - if (try_mark_partial_variable(deref_var->var, ir->array_index)) - { - /* We've now taken care of foo and j, but i might contain a - * subexpression that accesses shader inputs. So manually - * visit i and then continue with the parent. - */ - inner_array->array_index->accept(this); - return visit_continue_with_parent; - } - } - } - } else if (ir_dereference_variable * const deref_var = - ir->array->as_dereference_variable()) { - /* ir => foo[i], where foo is a variable. */ - if (is_multiple_vertices(this->shader_stage, deref_var->var)) { - /* foo is a geometry or tessellation shader input, so i is - * the vertex, and we're accessing the entire input. - */ - mark_whole_variable(deref_var->var); - /* We've now taken care of foo, but i might contain a subexpression - * that accesses shader inputs. So manually visit i and then - * continue with the parent. - */ - ir->array_index->accept(this); - return visit_continue_with_parent; - } else if (is_shader_inout(deref_var->var)) { - /* foo is a shader input/output, but not a geometry shader input, - * so i is the part of the input we're accessing. - */ - if (try_mark_partial_variable(deref_var->var, ir->array_index)) - return visit_continue_with_parent; - } - } - - /* The expression is something we don't recognize. Just visit its - * subexpressions. - */ - return visit_continue; -} - -ir_visitor_status -ir_set_program_inouts_visitor::visit_enter(ir_function_signature *ir) -{ - /* We don't want to descend into the function parameters and - * consider them as shader inputs or outputs. - */ - visit_list_elements(this, &ir->body); - return visit_continue_with_parent; -} - -ir_visitor_status -ir_set_program_inouts_visitor::visit_enter(ir_expression *ir) -{ - if (this->shader_stage == MESA_SHADER_FRAGMENT && - (ir->operation == ir_unop_dFdy || - ir->operation == ir_unop_dFdy_coarse || - ir->operation == ir_unop_dFdy_fine)) { - gl_fragment_program *fprog = (gl_fragment_program *) prog; - fprog->UsesDFdy = true; - } - return visit_continue; -} - -ir_visitor_status -ir_set_program_inouts_visitor::visit_enter(ir_discard *) -{ - /* discards are only allowed in fragment shaders. */ - assert(this->shader_stage == MESA_SHADER_FRAGMENT); - - gl_fragment_program *fprog = (gl_fragment_program *) prog; - fprog->UsesKill = true; - - return visit_continue; -} - -ir_visitor_status -ir_set_program_inouts_visitor::visit_enter(ir_texture *ir) -{ - if (ir->op == ir_tg4) - prog->UsesGather = true; - return visit_continue; -} - -void -do_set_program_inouts(exec_list *instructions, struct gl_program *prog, - gl_shader_stage shader_stage) -{ - ir_set_program_inouts_visitor v(prog, shader_stage); - - prog->InputsRead = 0; - prog->OutputsWritten = 0; - prog->PatchInputsRead = 0; - prog->PatchOutputsWritten = 0; - prog->SystemValuesRead = 0; - if (shader_stage == MESA_SHADER_FRAGMENT) { - gl_fragment_program *fprog = (gl_fragment_program *) prog; - memset(fprog->InterpQualifier, 0, sizeof(fprog->InterpQualifier)); - fprog->IsCentroid = 0; - fprog->IsSample = 0; - fprog->UsesDFdy = false; - fprog->UsesKill = false; - } - visit_list_elements(&v, instructions); -} diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h deleted file mode 100644 index 1854279925b..00000000000 --- a/src/glsl/ir_uniform.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef IR_UNIFORM_H -#define IR_UNIFORM_H - - -/* stdbool.h is necessary because this file is included in both C and C++ code. - */ -#include - -#include "program/prog_parameter.h" /* For union gl_constant_value. */ - -/** - * Used by GL_ARB_explicit_uniform_location extension code in the linker - * and glUniform* functions to identify inactive explicit uniform locations. - */ -#define INACTIVE_UNIFORM_EXPLICIT_LOCATION ((gl_uniform_storage *) -1) - -#ifdef __cplusplus -extern "C" { -#endif - -enum PACKED gl_uniform_driver_format { - uniform_native = 0, /**< Store data in the native format. */ - uniform_int_float, /**< Store integer data as floats. */ -}; - -struct gl_uniform_driver_storage { - /** - * Number of bytes from one array element to the next. - */ - uint8_t element_stride; - - /** - * Number of bytes from one vector in a matrix to the next. - */ - uint8_t vector_stride; - - /** - * Base format of the stored data. - */ - enum gl_uniform_driver_format format; - - /** - * Pointer to the base of the data. - */ - void *data; -}; - -struct gl_opaque_uniform_index { - /** - * Base opaque uniform index - * - * If \c gl_uniform_storage::base_type is an opaque type, this - * represents its uniform index. If \c - * gl_uniform_storage::array_elements is not zero, the array will - * use opaque uniform indices \c index through \c index + \c - * gl_uniform_storage::array_elements - 1, inclusive. - * - * Note that the index may be different in each shader stage. - */ - uint8_t index; - - /** - * Whether this opaque uniform is used in this shader stage. - */ - bool active; -}; - -struct gl_uniform_storage { - char *name; - /** Type of this uniform data stored. - * - * In the case of an array, it's the type of a single array element. - */ - const struct glsl_type *type; - - /** - * The number of elements in this uniform. - * - * For non-arrays, this is always 0. For arrays, the value is the size of - * the array. - */ - unsigned array_elements; - - /** - * Has this uniform ever been set? - */ - bool initialized; - - struct gl_opaque_uniform_index opaque[MESA_SHADER_STAGES]; - - /** - * Storage used by the driver for the uniform - */ - unsigned num_driver_storage; - struct gl_uniform_driver_storage *driver_storage; - - /** - * Storage used by Mesa for the uniform - * - * This form of the uniform is used by Mesa's implementation of \c - * glGetUniform. It can also be used by drivers to obtain the value of the - * uniform if the \c ::driver_storage interface is not used. - */ - union gl_constant_value *storage; - - /** Fields for GL_ARB_uniform_buffer_object - * @{ - */ - - /** - * GL_UNIFORM_BLOCK_INDEX: index of the uniform block containing - * the uniform, or -1 for the default uniform block. Note that the - * index is into the linked program's UniformBlocks[] array, not - * the linked shader's. - */ - int block_index; - - /** GL_UNIFORM_OFFSET: byte offset within the uniform block, or -1. */ - int offset; - - /** - * GL_UNIFORM_MATRIX_STRIDE: byte stride between columns or rows of - * a matrix. Set to 0 for non-matrices in UBOs, or -1 for uniforms - * in the default uniform block. - */ - int matrix_stride; - - /** - * GL_UNIFORM_ARRAY_STRIDE: byte stride between elements of the - * array. Set to zero for non-arrays in UBOs, or -1 for uniforms - * in the default uniform block. - */ - int array_stride; - - /** GL_UNIFORM_ROW_MAJOR: true iff it's a row-major matrix in a UBO */ - bool row_major; - - /** @} */ - - /** - * This is a compiler-generated uniform that should not be advertised - * via the API. - */ - bool hidden; - - /** - * This is a built-in uniform that should not be modified through any gl API. - */ - bool builtin; - - /** - * This is a shader storage buffer variable, not an uniform. - */ - bool is_shader_storage; - - /** - * Index within gl_shader_program::AtomicBuffers[] of the atomic - * counter buffer this uniform is stored in, or -1 if this is not - * an atomic counter. - */ - int atomic_buffer_index; - - /** - * The 'base location' for this uniform in the uniform remap table. For - * arrays this is the first element in the array. - * for subroutines this is in shader subroutine uniform remap table. - */ - unsigned remap_location; - - /** - * The number of compatible subroutines with this subroutine uniform. - */ - unsigned num_compatible_subroutines; - - /** - * A single integer identifying the number of active array elements of - * the top-level shader storage block member (GL_TOP_LEVEL_ARRAY_SIZE). - */ - unsigned top_level_array_size; - - /** - * A single integer identifying the stride between array elements of the - * top-level shader storage block member. (GL_TOP_LEVEL_ARRAY_STRIDE). - */ - unsigned top_level_array_stride; -}; - -#ifdef __cplusplus -} -#endif - -#endif /* IR_UNIFORM_H */ diff --git a/src/glsl/ir_validate.cpp b/src/glsl/ir_validate.cpp deleted file mode 100644 index cad7069bf98..00000000000 --- a/src/glsl/ir_validate.cpp +++ /dev/null @@ -1,930 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_validate.cpp - * - * Attempts to verify that various invariants of the IR tree are true. - * - * In particular, at the moment it makes sure that no single - * ir_instruction node except for ir_variable appears multiple times - * in the ir tree. ir_variable does appear multiple times: Once as a - * declaration in an exec_list, and multiple times as the endpoint of - * a dereference chain. - */ - -#include "ir.h" -#include "ir_hierarchical_visitor.h" -#include "util/hash_table.h" -#include "util/set.h" -#include "compiler/glsl_types.h" - -namespace { - -class ir_validate : public ir_hierarchical_visitor { -public: - ir_validate() - { - this->ir_set = _mesa_set_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - - this->current_function = NULL; - - this->callback_enter = ir_validate::validate_ir; - this->data_enter = ir_set; - } - - ~ir_validate() - { - _mesa_set_destroy(this->ir_set, NULL); - } - - virtual ir_visitor_status visit(ir_variable *v); - virtual ir_visitor_status visit(ir_dereference_variable *ir); - - virtual ir_visitor_status visit_enter(ir_discard *ir); - virtual ir_visitor_status visit_enter(ir_if *ir); - - virtual ir_visitor_status visit_enter(ir_function *ir); - virtual ir_visitor_status visit_leave(ir_function *ir); - virtual ir_visitor_status visit_enter(ir_function_signature *ir); - - virtual ir_visitor_status visit_leave(ir_expression *ir); - virtual ir_visitor_status visit_leave(ir_swizzle *ir); - - virtual ir_visitor_status visit_enter(class ir_dereference_array *); - - virtual ir_visitor_status visit_enter(ir_assignment *ir); - virtual ir_visitor_status visit_enter(ir_call *ir); - - static void validate_ir(ir_instruction *ir, void *data); - - ir_function *current_function; - - struct set *ir_set; -}; - -} /* anonymous namespace */ - -ir_visitor_status -ir_validate::visit(ir_dereference_variable *ir) -{ - if ((ir->var == NULL) || (ir->var->as_variable() == NULL)) { - printf("ir_dereference_variable @ %p does not specify a variable %p\n", - (void *) ir, (void *) ir->var); - abort(); - } - - if (_mesa_set_search(ir_set, ir->var) == NULL) { - printf("ir_dereference_variable @ %p specifies undeclared variable " - "`%s' @ %p\n", - (void *) ir, ir->var->name, (void *) ir->var); - abort(); - } - - this->validate_ir(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_validate::visit_enter(class ir_dereference_array *ir) -{ - if (!ir->array->type->is_array() && !ir->array->type->is_matrix() && - !ir->array->type->is_vector()) { - printf("ir_dereference_array @ %p does not specify an array, a vector " - "or a matrix\n", - (void *) ir); - ir->print(); - printf("\n"); - abort(); - } - - if (!ir->array_index->type->is_scalar()) { - printf("ir_dereference_array @ %p does not have scalar index: %s\n", - (void *) ir, ir->array_index->type->name); - abort(); - } - - if (!ir->array_index->type->is_integer()) { - printf("ir_dereference_array @ %p does not have integer index: %s\n", - (void *) ir, ir->array_index->type->name); - abort(); - } - - return visit_continue; -} - -ir_visitor_status -ir_validate::visit_enter(ir_discard *ir) -{ - if (ir->condition && ir->condition->type != glsl_type::bool_type) { - printf("ir_discard condition %s type instead of bool.\n", - ir->condition->type->name); - ir->print(); - printf("\n"); - abort(); - } - - return visit_continue; -} - -ir_visitor_status -ir_validate::visit_enter(ir_if *ir) -{ - if (ir->condition->type != glsl_type::bool_type) { - printf("ir_if condition %s type instead of bool.\n", - ir->condition->type->name); - ir->print(); - printf("\n"); - abort(); - } - - return visit_continue; -} - - -ir_visitor_status -ir_validate::visit_enter(ir_function *ir) -{ - /* Function definitions cannot be nested. - */ - if (this->current_function != NULL) { - printf("Function definition nested inside another function " - "definition:\n"); - printf("%s %p inside %s %p\n", - ir->name, (void *) ir, - this->current_function->name, (void *) this->current_function); - abort(); - } - - /* Store the current function hierarchy being traversed. This is used - * by the function signature visitor to ensure that the signatures are - * linked with the correct functions. - */ - this->current_function = ir; - - this->validate_ir(ir, this->data_enter); - - /* Verify that all of the things stored in the list of signatures are, - * in fact, function signatures. - */ - foreach_in_list(ir_instruction, sig, &ir->signatures) { - if (sig->ir_type != ir_type_function_signature) { - printf("Non-signature in signature list of function `%s'\n", - ir->name); - abort(); - } - } - - return visit_continue; -} - -ir_visitor_status -ir_validate::visit_leave(ir_function *ir) -{ - assert(ralloc_parent(ir->name) == ir); - - this->current_function = NULL; - return visit_continue; -} - -ir_visitor_status -ir_validate::visit_enter(ir_function_signature *ir) -{ - if (this->current_function != ir->function()) { - printf("Function signature nested inside wrong function " - "definition:\n"); - printf("%p inside %s %p instead of %s %p\n", - (void *) ir, - this->current_function->name, (void *) this->current_function, - ir->function_name(), (void *) ir->function()); - abort(); - } - - if (ir->return_type == NULL) { - printf("Function signature %p for function %s has NULL return type.\n", - (void *) ir, ir->function_name()); - abort(); - } - - this->validate_ir(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_validate::visit_leave(ir_expression *ir) -{ - switch (ir->operation) { - case ir_unop_bit_not: - assert(ir->operands[0]->type == ir->type); - break; - case ir_unop_logic_not: - assert(ir->type->base_type == GLSL_TYPE_BOOL); - assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); - break; - - case ir_unop_neg: - case ir_unop_abs: - case ir_unop_sign: - case ir_unop_rcp: - case ir_unop_rsq: - case ir_unop_sqrt: - assert(ir->type == ir->operands[0]->type); - break; - - case ir_unop_exp: - case ir_unop_log: - case ir_unop_exp2: - case ir_unop_log2: - case ir_unop_saturate: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(ir->type == ir->operands[0]->type); - break; - - case ir_unop_f2i: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(ir->type->base_type == GLSL_TYPE_INT); - break; - case ir_unop_f2u: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(ir->type->base_type == GLSL_TYPE_UINT); - break; - case ir_unop_i2f: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - break; - case ir_unop_f2b: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(ir->type->base_type == GLSL_TYPE_BOOL); - break; - case ir_unop_b2f: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - break; - case ir_unop_i2b: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); - assert(ir->type->base_type == GLSL_TYPE_BOOL); - break; - case ir_unop_b2i: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); - assert(ir->type->base_type == GLSL_TYPE_INT); - break; - case ir_unop_u2f: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - break; - case ir_unop_i2u: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); - assert(ir->type->base_type == GLSL_TYPE_UINT); - break; - case ir_unop_u2i: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); - assert(ir->type->base_type == GLSL_TYPE_INT); - break; - case ir_unop_bitcast_i2f: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - break; - case ir_unop_bitcast_f2i: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(ir->type->base_type == GLSL_TYPE_INT); - break; - case ir_unop_bitcast_u2f: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - break; - case ir_unop_bitcast_f2u: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(ir->type->base_type == GLSL_TYPE_UINT); - break; - - case ir_unop_trunc: - case ir_unop_round_even: - case ir_unop_ceil: - case ir_unop_floor: - case ir_unop_fract: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || - ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->operands[0]->type == ir->type); - break; - case ir_unop_sin: - case ir_unop_cos: - case ir_unop_dFdx: - case ir_unop_dFdx_coarse: - case ir_unop_dFdx_fine: - case ir_unop_dFdy: - case ir_unop_dFdy_coarse: - case ir_unop_dFdy_fine: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(ir->operands[0]->type == ir->type); - break; - - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_unorm_2x16: - case ir_unop_pack_half_2x16: - assert(ir->type == glsl_type::uint_type); - assert(ir->operands[0]->type == glsl_type::vec2_type); - break; - - case ir_unop_pack_snorm_4x8: - case ir_unop_pack_unorm_4x8: - assert(ir->type == glsl_type::uint_type); - assert(ir->operands[0]->type == glsl_type::vec4_type); - break; - - case ir_unop_pack_double_2x32: - assert(ir->type == glsl_type::double_type); - assert(ir->operands[0]->type == glsl_type::uvec2_type); - break; - - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_unorm_2x16: - case ir_unop_unpack_half_2x16: - assert(ir->type == glsl_type::vec2_type); - assert(ir->operands[0]->type == glsl_type::uint_type); - break; - - case ir_unop_unpack_snorm_4x8: - case ir_unop_unpack_unorm_4x8: - assert(ir->type == glsl_type::vec4_type); - assert(ir->operands[0]->type == glsl_type::uint_type); - break; - - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - assert(ir->type == glsl_type::float_type); - assert(ir->operands[0]->type == glsl_type::uint_type); - break; - - case ir_unop_unpack_double_2x32: - assert(ir->type == glsl_type::uvec2_type); - assert(ir->operands[0]->type == glsl_type::double_type); - break; - - case ir_unop_bitfield_reverse: - assert(ir->operands[0]->type == ir->type); - assert(ir->type->is_integer()); - break; - - case ir_unop_bit_count: - case ir_unop_find_msb: - case ir_unop_find_lsb: - assert(ir->operands[0]->type->vector_elements == ir->type->vector_elements); - assert(ir->operands[0]->type->is_integer()); - assert(ir->type->base_type == GLSL_TYPE_INT); - break; - - case ir_unop_noise: - /* XXX what can we assert here? */ - break; - - case ir_unop_interpolate_at_centroid: - assert(ir->operands[0]->type == ir->type); - assert(ir->operands[0]->type->is_float()); - break; - - case ir_unop_get_buffer_size: - assert(ir->type == glsl_type::int_type); - assert(ir->operands[0]->type == glsl_type::uint_type); - break; - - case ir_unop_ssbo_unsized_array_length: - assert(ir->type == glsl_type::int_type); - assert(ir->operands[0]->type->is_array()); - assert(ir->operands[0]->type->is_unsized_array()); - break; - - case ir_unop_d2f: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - break; - case ir_unop_f2d: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT); - assert(ir->type->base_type == GLSL_TYPE_DOUBLE); - break; - case ir_unop_d2i: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->type->base_type == GLSL_TYPE_INT); - break; - case ir_unop_i2d: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_INT); - assert(ir->type->base_type == GLSL_TYPE_DOUBLE); - break; - case ir_unop_d2u: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->type->base_type == GLSL_TYPE_UINT); - break; - case ir_unop_u2d: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_UINT); - assert(ir->type->base_type == GLSL_TYPE_DOUBLE); - break; - case ir_unop_d2b: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->type->base_type == GLSL_TYPE_BOOL); - break; - - case ir_unop_frexp_sig: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || - ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->type->base_type == GLSL_TYPE_DOUBLE); - break; - case ir_unop_frexp_exp: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || - ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->type->base_type == GLSL_TYPE_INT); - break; - case ir_unop_subroutine_to_int: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_SUBROUTINE); - assert(ir->type->base_type == GLSL_TYPE_INT); - break; - case ir_binop_add: - case ir_binop_sub: - case ir_binop_mul: - case ir_binop_div: - case ir_binop_mod: - case ir_binop_min: - case ir_binop_max: - case ir_binop_pow: - assert(ir->operands[0]->type->base_type == - ir->operands[1]->type->base_type); - - if (ir->operands[0]->type->is_scalar()) - assert(ir->operands[1]->type == ir->type); - else if (ir->operands[1]->type->is_scalar()) - assert(ir->operands[0]->type == ir->type); - else if (ir->operands[0]->type->is_vector() && - ir->operands[1]->type->is_vector()) { - assert(ir->operands[0]->type == ir->operands[1]->type); - assert(ir->operands[0]->type == ir->type); - } - break; - - case ir_binop_imul_high: - assert(ir->type == ir->operands[0]->type); - assert(ir->type == ir->operands[1]->type); - assert(ir->type->is_integer()); - break; - - case ir_binop_carry: - case ir_binop_borrow: - assert(ir->type == ir->operands[0]->type); - assert(ir->type == ir->operands[1]->type); - assert(ir->type->base_type == GLSL_TYPE_UINT); - break; - - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_nequal: - /* The semantics of the IR operators differ from the GLSL <, >, <=, >=, - * ==, and != operators. The IR operators perform a component-wise - * comparison on scalar or vector types and return a boolean scalar or - * vector type of the same size. - */ - assert(ir->type->base_type == GLSL_TYPE_BOOL); - assert(ir->operands[0]->type == ir->operands[1]->type); - assert(ir->operands[0]->type->is_vector() - || ir->operands[0]->type->is_scalar()); - assert(ir->operands[0]->type->vector_elements - == ir->type->vector_elements); - break; - - case ir_binop_all_equal: - case ir_binop_any_nequal: - /* GLSL == and != operate on scalars, vectors, matrices and arrays, and - * return a scalar boolean. The IR matches that. - */ - assert(ir->type == glsl_type::bool_type); - assert(ir->operands[0]->type == ir->operands[1]->type); - break; - - case ir_binop_lshift: - case ir_binop_rshift: - assert(ir->operands[0]->type->is_integer() && - ir->operands[1]->type->is_integer()); - if (ir->operands[0]->type->is_scalar()) { - assert(ir->operands[1]->type->is_scalar()); - } - if (ir->operands[0]->type->is_vector() && - ir->operands[1]->type->is_vector()) { - assert(ir->operands[0]->type->components() == - ir->operands[1]->type->components()); - } - assert(ir->type == ir->operands[0]->type); - break; - - case ir_binop_bit_and: - case ir_binop_bit_xor: - case ir_binop_bit_or: - assert(ir->operands[0]->type->base_type == - ir->operands[1]->type->base_type); - assert(ir->type->is_integer()); - if (ir->operands[0]->type->is_vector() && - ir->operands[1]->type->is_vector()) { - assert(ir->operands[0]->type->vector_elements == - ir->operands[1]->type->vector_elements); - } - break; - - case ir_binop_logic_and: - case ir_binop_logic_xor: - case ir_binop_logic_or: - assert(ir->type->base_type == GLSL_TYPE_BOOL); - assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); - assert(ir->operands[1]->type->base_type == GLSL_TYPE_BOOL); - break; - - case ir_binop_dot: - assert(ir->type == glsl_type::float_type || - ir->type == glsl_type::double_type); - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || - ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->operands[0]->type->is_vector()); - assert(ir->operands[0]->type == ir->operands[1]->type); - break; - - case ir_binop_pack_half_2x16_split: - assert(ir->type == glsl_type::uint_type); - assert(ir->operands[0]->type == glsl_type::float_type); - assert(ir->operands[1]->type == glsl_type::float_type); - break; - - case ir_binop_ubo_load: - assert(ir->operands[0]->type == glsl_type::uint_type); - - assert(ir->operands[1]->type == glsl_type::uint_type); - break; - - case ir_binop_ldexp: - assert(ir->operands[0]->type == ir->type); - assert(ir->operands[0]->type->is_float() || - ir->operands[0]->type->is_double()); - assert(ir->operands[1]->type->base_type == GLSL_TYPE_INT); - assert(ir->operands[0]->type->components() == - ir->operands[1]->type->components()); - break; - - case ir_binop_vector_extract: - assert(ir->operands[0]->type->is_vector()); - assert(ir->operands[1]->type->is_scalar() - && ir->operands[1]->type->is_integer()); - break; - - case ir_binop_interpolate_at_offset: - assert(ir->operands[0]->type == ir->type); - assert(ir->operands[0]->type->is_float()); - assert(ir->operands[1]->type->components() == 2); - assert(ir->operands[1]->type->is_float()); - break; - - case ir_binop_interpolate_at_sample: - assert(ir->operands[0]->type == ir->type); - assert(ir->operands[0]->type->is_float()); - assert(ir->operands[1]->type == glsl_type::int_type); - break; - - case ir_triop_fma: - assert(ir->type->base_type == GLSL_TYPE_FLOAT || - ir->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->type == ir->operands[0]->type); - assert(ir->type == ir->operands[1]->type); - assert(ir->type == ir->operands[2]->type); - break; - - case ir_triop_lrp: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_FLOAT || - ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE); - assert(ir->operands[0]->type == ir->operands[1]->type); - assert(ir->operands[2]->type == ir->operands[0]->type || - ir->operands[2]->type == glsl_type::float_type || - ir->operands[2]->type == glsl_type::double_type); - break; - - case ir_triop_csel: - assert(ir->operands[0]->type->base_type == GLSL_TYPE_BOOL); - assert(ir->type->vector_elements == ir->operands[0]->type->vector_elements); - assert(ir->type == ir->operands[1]->type); - assert(ir->type == ir->operands[2]->type); - break; - - case ir_triop_bitfield_extract: - assert(ir->type->is_integer()); - assert(ir->operands[0]->type == ir->type); - assert(ir->operands[1]->type == ir->type); - assert(ir->operands[2]->type == ir->type); - break; - - case ir_triop_vector_insert: - assert(ir->operands[0]->type->is_vector()); - assert(ir->operands[1]->type->is_scalar()); - assert(ir->operands[0]->type->base_type == ir->operands[1]->type->base_type); - assert(ir->operands[2]->type->is_scalar() - && ir->operands[2]->type->is_integer()); - assert(ir->type == ir->operands[0]->type); - break; - - case ir_quadop_bitfield_insert: - assert(ir->type->is_integer()); - assert(ir->operands[0]->type == ir->type); - assert(ir->operands[1]->type == ir->type); - assert(ir->operands[2]->type == ir->type); - assert(ir->operands[3]->type == ir->type); - break; - - case ir_quadop_vector: - /* The vector operator collects some number of scalars and generates a - * vector from them. - * - * - All of the operands must be scalar. - * - Number of operands must matche the size of the resulting vector. - * - Base type of the operands must match the base type of the result. - */ - assert(ir->type->is_vector()); - switch (ir->type->vector_elements) { - case 2: - assert(ir->operands[0]->type->is_scalar()); - assert(ir->operands[0]->type->base_type == ir->type->base_type); - assert(ir->operands[1]->type->is_scalar()); - assert(ir->operands[1]->type->base_type == ir->type->base_type); - assert(ir->operands[2] == NULL); - assert(ir->operands[3] == NULL); - break; - case 3: - assert(ir->operands[0]->type->is_scalar()); - assert(ir->operands[0]->type->base_type == ir->type->base_type); - assert(ir->operands[1]->type->is_scalar()); - assert(ir->operands[1]->type->base_type == ir->type->base_type); - assert(ir->operands[2]->type->is_scalar()); - assert(ir->operands[2]->type->base_type == ir->type->base_type); - assert(ir->operands[3] == NULL); - break; - case 4: - assert(ir->operands[0]->type->is_scalar()); - assert(ir->operands[0]->type->base_type == ir->type->base_type); - assert(ir->operands[1]->type->is_scalar()); - assert(ir->operands[1]->type->base_type == ir->type->base_type); - assert(ir->operands[2]->type->is_scalar()); - assert(ir->operands[2]->type->base_type == ir->type->base_type); - assert(ir->operands[3]->type->is_scalar()); - assert(ir->operands[3]->type->base_type == ir->type->base_type); - break; - default: - /* The is_vector assertion above should prevent execution from ever - * getting here. - */ - assert(!"Should not get here."); - break; - } - } - - return visit_continue; -} - -ir_visitor_status -ir_validate::visit_leave(ir_swizzle *ir) -{ - unsigned int chans[4] = {ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w}; - - for (unsigned int i = 0; i < ir->type->vector_elements; i++) { - if (chans[i] >= ir->val->type->vector_elements) { - printf("ir_swizzle @ %p specifies a channel not present " - "in the value.\n", (void *) ir); - ir->print(); - abort(); - } - } - - return visit_continue; -} - -ir_visitor_status -ir_validate::visit(ir_variable *ir) -{ - /* An ir_variable is the one thing that can (and will) appear multiple times - * in an IR tree. It is added to the hashtable so that it can be used - * in the ir_dereference_variable handler to ensure that a variable is - * declared before it is dereferenced. - */ - if (ir->name && ir->is_name_ralloced()) - assert(ralloc_parent(ir->name) == ir); - - _mesa_set_add(ir_set, ir); - - /* If a variable is an array, verify that the maximum array index is in - * bounds. There was once an error in AST-to-HIR conversion that set this - * to be out of bounds. - */ - if (ir->type->array_size() > 0) { - if (ir->data.max_array_access >= ir->type->length) { - printf("ir_variable has maximum access out of bounds (%d vs %d)\n", - ir->data.max_array_access, ir->type->length - 1); - ir->print(); - abort(); - } - } - - /* If a variable is an interface block (or an array of interface blocks), - * verify that the maximum array index for each interface member is in - * bounds. - */ - if (ir->is_interface_instance()) { - const glsl_struct_field *fields = - ir->get_interface_type()->fields.structure; - for (unsigned i = 0; i < ir->get_interface_type()->length; i++) { - if (fields[i].type->array_size() > 0) { - const unsigned *const max_ifc_array_access = - ir->get_max_ifc_array_access(); - - assert(max_ifc_array_access != NULL); - - if (max_ifc_array_access[i] >= fields[i].type->length) { - printf("ir_variable has maximum access out of bounds for " - "field %s (%d vs %d)\n", fields[i].name, - max_ifc_array_access[i], fields[i].type->length); - ir->print(); - abort(); - } - } - } - } - - if (ir->constant_initializer != NULL && !ir->data.has_initializer) { - printf("ir_variable didn't have an initializer, but has a constant " - "initializer value.\n"); - ir->print(); - abort(); - } - - if (ir->data.mode == ir_var_uniform - && is_gl_identifier(ir->name) - && ir->get_state_slots() == NULL) { - printf("built-in uniform has no state\n"); - ir->print(); - abort(); - } - - return visit_continue; -} - -ir_visitor_status -ir_validate::visit_enter(ir_assignment *ir) -{ - const ir_dereference *const lhs = ir->lhs; - if (lhs->type->is_scalar() || lhs->type->is_vector()) { - if (ir->write_mask == 0) { - printf("Assignment LHS is %s, but write mask is 0:\n", - lhs->type->is_scalar() ? "scalar" : "vector"); - ir->print(); - abort(); - } - - int lhs_components = 0; - for (int i = 0; i < 4; i++) { - if (ir->write_mask & (1 << i)) - lhs_components++; - } - - if (lhs_components != ir->rhs->type->vector_elements) { - printf("Assignment count of LHS write mask channels enabled not\n" - "matching RHS vector size (%d LHS, %d RHS).\n", - lhs_components, ir->rhs->type->vector_elements); - ir->print(); - abort(); - } - } - - this->validate_ir(ir, this->data_enter); - - return visit_continue; -} - -ir_visitor_status -ir_validate::visit_enter(ir_call *ir) -{ - ir_function_signature *const callee = ir->callee; - - if (callee->ir_type != ir_type_function_signature) { - printf("IR called by ir_call is not ir_function_signature!\n"); - abort(); - } - - if (ir->return_deref) { - if (ir->return_deref->type != callee->return_type) { - printf("callee type %s does not match return storage type %s\n", - callee->return_type->name, ir->return_deref->type->name); - abort(); - } - } else if (callee->return_type != glsl_type::void_type) { - printf("ir_call has non-void callee but no return storage\n"); - abort(); - } - - const exec_node *formal_param_node = callee->parameters.head; - const exec_node *actual_param_node = ir->actual_parameters.head; - while (true) { - if (formal_param_node->is_tail_sentinel() - != actual_param_node->is_tail_sentinel()) { - printf("ir_call has the wrong number of parameters:\n"); - goto dump_ir; - } - if (formal_param_node->is_tail_sentinel()) { - break; - } - const ir_variable *formal_param - = (const ir_variable *) formal_param_node; - const ir_rvalue *actual_param - = (const ir_rvalue *) actual_param_node; - if (formal_param->type != actual_param->type) { - printf("ir_call parameter type mismatch:\n"); - goto dump_ir; - } - if (formal_param->data.mode == ir_var_function_out - || formal_param->data.mode == ir_var_function_inout) { - if (!actual_param->is_lvalue()) { - printf("ir_call out/inout parameters must be lvalues:\n"); - goto dump_ir; - } - } - formal_param_node = formal_param_node->next; - actual_param_node = actual_param_node->next; - } - - return visit_continue; - -dump_ir: - ir->print(); - printf("callee:\n"); - callee->print(); - abort(); - return visit_stop; -} - -void -ir_validate::validate_ir(ir_instruction *ir, void *data) -{ - struct set *ir_set = (struct set *) data; - - if (_mesa_set_search(ir_set, ir)) { - printf("Instruction node present twice in ir tree:\n"); - ir->print(); - printf("\n"); - abort(); - } - _mesa_set_add(ir_set, ir); -} - -void -check_node_type(ir_instruction *ir, void *data) -{ - (void) data; - - if (ir->ir_type >= ir_type_max) { - printf("Instruction node with unset type\n"); - ir->print(); printf("\n"); - } - ir_rvalue *value = ir->as_rvalue(); - if (value != NULL) - assert(value->type != glsl_type::error_type); -} - -void -validate_ir_tree(exec_list *instructions) -{ - /* We shouldn't have any reason to validate IR in a release build, - * and it's half composed of assert()s anyway which wouldn't do - * anything. - */ -#ifdef DEBUG - ir_validate v; - - v.run(instructions); - - foreach_in_list(ir_instruction, ir, instructions) { - visit_tree(ir, check_node_type, NULL); - } -#endif -} diff --git a/src/glsl/ir_variable_refcount.cpp b/src/glsl/ir_variable_refcount.cpp deleted file mode 100644 index 8306be10b9c..00000000000 --- a/src/glsl/ir_variable_refcount.cpp +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_variable_refcount.cpp - * - * Provides a visitor which produces a list of variables referenced, - * how many times they were referenced and assigned, and whether they - * were defined in the scope. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_variable_refcount.h" -#include "compiler/glsl_types.h" -#include "util/hash_table.h" - -ir_variable_refcount_visitor::ir_variable_refcount_visitor() -{ - this->mem_ctx = ralloc_context(NULL); - this->ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); -} - -static void -free_entry(struct hash_entry *entry) -{ - ir_variable_refcount_entry *ivre = (ir_variable_refcount_entry *) entry->data; - - /* Free assignment list */ - exec_node *n; - while ((n = ivre->assign_list.pop_head()) != NULL) { - struct assignment_entry *assignment_entry = - exec_node_data(struct assignment_entry, n, link); - free(assignment_entry); - } - - delete ivre; -} - -ir_variable_refcount_visitor::~ir_variable_refcount_visitor() -{ - ralloc_free(this->mem_ctx); - _mesa_hash_table_destroy(this->ht, free_entry); -} - -// constructor -ir_variable_refcount_entry::ir_variable_refcount_entry(ir_variable *var) -{ - this->var = var; - assigned_count = 0; - declaration = false; - referenced_count = 0; -} - - -ir_variable_refcount_entry * -ir_variable_refcount_visitor::get_variable_entry(ir_variable *var) -{ - assert(var); - - struct hash_entry *e = _mesa_hash_table_search(this->ht, var); - if (e) - return (ir_variable_refcount_entry *)e->data; - - ir_variable_refcount_entry *entry = new ir_variable_refcount_entry(var); - assert(entry->referenced_count == 0); - _mesa_hash_table_insert(this->ht, var, entry); - - return entry; -} - - -ir_visitor_status -ir_variable_refcount_visitor::visit(ir_variable *ir) -{ - ir_variable_refcount_entry *entry = this->get_variable_entry(ir); - if (entry) - entry->declaration = true; - - return visit_continue; -} - - -ir_visitor_status -ir_variable_refcount_visitor::visit(ir_dereference_variable *ir) -{ - ir_variable *const var = ir->variable_referenced(); - ir_variable_refcount_entry *entry = this->get_variable_entry(var); - - if (entry) - entry->referenced_count++; - - return visit_continue; -} - - -ir_visitor_status -ir_variable_refcount_visitor::visit_enter(ir_function_signature *ir) -{ - /* We don't want to descend into the function parameters and - * dead-code eliminate them, so just accept the body here. - */ - visit_list_elements(this, &ir->body); - return visit_continue_with_parent; -} - - -ir_visitor_status -ir_variable_refcount_visitor::visit_leave(ir_assignment *ir) -{ - ir_variable_refcount_entry *entry; - entry = this->get_variable_entry(ir->lhs->variable_referenced()); - if (entry) { - entry->assigned_count++; - - /* Build a list for dead code optimisation. Don't add assignment if it - * was declared out of scope (outside the instruction stream). Also don't - * bother adding any more to the list if there are more references than - * assignments as this means the variable is used and won't be optimised - * out. - */ - assert(entry->referenced_count >= entry->assigned_count); - if (entry->referenced_count == entry->assigned_count) { - struct assignment_entry *assignment_entry = - (struct assignment_entry *)calloc(1, sizeof(*assignment_entry)); - assignment_entry->assign = ir; - entry->assign_list.push_head(&assignment_entry->link); - } - } - - return visit_continue; -} diff --git a/src/glsl/ir_variable_refcount.h b/src/glsl/ir_variable_refcount.h deleted file mode 100644 index 08a11c01495..00000000000 --- a/src/glsl/ir_variable_refcount.h +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file ir_variable_refcount.h - * - * Provides a visitor which produces a list of variables referenced, - * how many times they were referenced and assigned, and whether they - * were defined in the scope. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "compiler/glsl_types.h" - -struct assignment_entry { - exec_node link; - ir_assignment *assign; -}; - -class ir_variable_refcount_entry -{ -public: - ir_variable_refcount_entry(ir_variable *var); - - ir_variable *var; /* The key: the variable's pointer. */ - - /** - * List of assignments to the variable, if any. - * This is intended to be used for dead code optimisation and may - * not be a complete list. - */ - exec_list assign_list; - - /** Number of times the variable is referenced, including assignments. */ - unsigned referenced_count; - - /** Number of times the variable is assigned. */ - unsigned assigned_count; - - bool declaration; /* If the variable had a decl in the instruction stream */ -}; - -class ir_variable_refcount_visitor : public ir_hierarchical_visitor { -public: - ir_variable_refcount_visitor(void); - ~ir_variable_refcount_visitor(void); - - virtual ir_visitor_status visit(ir_variable *); - virtual ir_visitor_status visit(ir_dereference_variable *); - - virtual ir_visitor_status visit_enter(ir_function_signature *); - virtual ir_visitor_status visit_leave(ir_assignment *); - - ir_variable_refcount_entry *get_variable_entry(ir_variable *var); - - struct hash_table *ht; - - void *mem_ctx; -}; diff --git a/src/glsl/ir_visitor.h b/src/glsl/ir_visitor.h deleted file mode 100644 index 7c38481cd53..00000000000 --- a/src/glsl/ir_visitor.h +++ /dev/null @@ -1,93 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef IR_VISITOR_H -#define IR_VISITOR_H - -#ifdef __cplusplus -/** - * Abstract base class of visitors of IR instruction trees - */ -class ir_visitor { -public: - virtual ~ir_visitor() - { - /* empty */ - } - - /** - * \name Visit methods - * - * As typical for the visitor pattern, there must be one \c visit method for - * each concrete subclass of \c ir_instruction. Virtual base classes within - * the hierarchy should not have \c visit methods. - */ - /*@{*/ - virtual void visit(class ir_rvalue *) { assert(!"unhandled error_type"); } - virtual void visit(class ir_variable *) = 0; - virtual void visit(class ir_function_signature *) = 0; - virtual void visit(class ir_function *) = 0; - virtual void visit(class ir_expression *) = 0; - virtual void visit(class ir_texture *) = 0; - virtual void visit(class ir_swizzle *) = 0; - virtual void visit(class ir_dereference_variable *) = 0; - virtual void visit(class ir_dereference_array *) = 0; - virtual void visit(class ir_dereference_record *) = 0; - virtual void visit(class ir_assignment *) = 0; - virtual void visit(class ir_constant *) = 0; - virtual void visit(class ir_call *) = 0; - virtual void visit(class ir_return *) = 0; - virtual void visit(class ir_discard *) = 0; - virtual void visit(class ir_if *) = 0; - virtual void visit(class ir_loop *) = 0; - virtual void visit(class ir_loop_jump *) = 0; - virtual void visit(class ir_emit_vertex *) = 0; - virtual void visit(class ir_end_primitive *) = 0; - virtual void visit(class ir_barrier *) = 0; - /*@}*/ -}; - -/* NOTE: function calls may never return due to discards inside them - * This is usually not an issue, but if it is, keep it in mind - */ -class ir_control_flow_visitor : public ir_visitor { -public: - virtual void visit(class ir_variable *) {} - virtual void visit(class ir_expression *) {} - virtual void visit(class ir_texture *) {} - virtual void visit(class ir_swizzle *) {} - virtual void visit(class ir_dereference_variable *) {} - virtual void visit(class ir_dereference_array *) {} - virtual void visit(class ir_dereference_record *) {} - virtual void visit(class ir_assignment *) {} - virtual void visit(class ir_constant *) {} - virtual void visit(class ir_call *) {} - virtual void visit(class ir_emit_vertex *) {} - virtual void visit(class ir_end_primitive *) {} - virtual void visit(class ir_barrier *) {} -}; -#endif /* __cplusplus */ - -#endif /* IR_VISITOR_H */ diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp deleted file mode 100644 index 277d4737ab7..00000000000 --- a/src/glsl/link_atomics.cpp +++ /dev/null @@ -1,346 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "glsl_parser_extras.h" -#include "ir.h" -#include "ir_uniform.h" -#include "linker.h" -#include "program/hash_table.h" -#include "main/macros.h" - -namespace { - /* - * Atomic counter as seen by the program. - */ - struct active_atomic_counter { - unsigned uniform_loc; - ir_variable *var; - }; - - /* - * Atomic counter buffer referenced by the program. There is a one - * to one correspondence between these and the objects that can be - * queried using glGetActiveAtomicCounterBufferiv(). - */ - struct active_atomic_buffer { - active_atomic_buffer() - : counters(0), num_counters(0), stage_references(), size(0) - {} - - ~active_atomic_buffer() - { - free(counters); - } - - void push_back(unsigned uniform_loc, ir_variable *var) - { - active_atomic_counter *new_counters; - - new_counters = (active_atomic_counter *) - realloc(counters, sizeof(active_atomic_counter) * - (num_counters + 1)); - - if (new_counters == NULL) { - _mesa_error_no_memory(__func__); - return; - } - - counters = new_counters; - counters[num_counters].uniform_loc = uniform_loc; - counters[num_counters].var = var; - num_counters++; - } - - active_atomic_counter *counters; - unsigned num_counters; - unsigned stage_references[MESA_SHADER_STAGES]; - unsigned size; - }; - - int - cmp_actives(const void *a, const void *b) - { - const active_atomic_counter *const first = (active_atomic_counter *) a; - const active_atomic_counter *const second = (active_atomic_counter *) b; - - return int(first->var->data.offset) - int(second->var->data.offset); - } - - bool - check_atomic_counters_overlap(const ir_variable *x, const ir_variable *y) - { - return ((x->data.offset >= y->data.offset && - x->data.offset < y->data.offset + y->type->atomic_size()) || - (y->data.offset >= x->data.offset && - y->data.offset < x->data.offset + x->type->atomic_size())); - } - - void - process_atomic_variable(const glsl_type *t, struct gl_shader_program *prog, - unsigned *uniform_loc, ir_variable *var, - active_atomic_buffer *const buffers, - unsigned *num_buffers, int *offset, - const unsigned shader_stage) - { - /* FIXME: Arrays of arrays get counted separately. For example: - * x1[3][3][2] = 9 counters - * x2[3][2] = 3 counters - * x3[2] = 1 counter - * - * However this code marks all the counters as active even when they - * might not be used. - */ - if (t->is_array() && t->fields.array->is_array()) { - for (unsigned i = 0; i < t->length; i++) { - process_atomic_variable(t->fields.array, prog, uniform_loc, - var, buffers, num_buffers, offset, - shader_stage); - } - } else { - active_atomic_buffer *buf = &buffers[var->data.binding]; - gl_uniform_storage *const storage = - &prog->UniformStorage[*uniform_loc]; - - /* If this is the first time the buffer is used, increment - * the counter of buffers used. - */ - if (buf->size == 0) - (*num_buffers)++; - - buf->push_back(*uniform_loc, var); - - buf->stage_references[shader_stage]++; - buf->size = MAX2(buf->size, *offset + t->atomic_size()); - - storage->offset = *offset; - *offset += t->atomic_size(); - - (*uniform_loc)++; - } - } - - active_atomic_buffer * - find_active_atomic_counters(struct gl_context *ctx, - struct gl_shader_program *prog, - unsigned *num_buffers) - { - active_atomic_buffer *const buffers = - new active_atomic_buffer[ctx->Const.MaxAtomicBufferBindings]; - - *num_buffers = 0; - - for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) { - struct gl_shader *sh = prog->_LinkedShaders[i]; - if (sh == NULL) - continue; - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); - - if (var && var->type->contains_atomic()) { - int offset = var->data.offset; - unsigned uniform_loc = var->data.location; - process_atomic_variable(var->type, prog, &uniform_loc, - var, buffers, num_buffers, &offset, i); - } - } - } - - for (unsigned i = 0; i < ctx->Const.MaxAtomicBufferBindings; i++) { - if (buffers[i].size == 0) - continue; - - qsort(buffers[i].counters, buffers[i].num_counters, - sizeof(active_atomic_counter), - cmp_actives); - - for (unsigned j = 1; j < buffers[i].num_counters; j++) { - /* If an overlapping counter found, it must be a reference to the - * same counter from a different shader stage. - */ - if (check_atomic_counters_overlap(buffers[i].counters[j-1].var, - buffers[i].counters[j].var) - && strcmp(buffers[i].counters[j-1].var->name, - buffers[i].counters[j].var->name) != 0) { - linker_error(prog, "Atomic counter %s declared at offset %d " - "which is already in use.", - buffers[i].counters[j].var->name, - buffers[i].counters[j].var->data.offset); - } - } - } - return buffers; - } -} - -void -link_assign_atomic_counter_resources(struct gl_context *ctx, - struct gl_shader_program *prog) -{ - unsigned num_buffers; - unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {}; - active_atomic_buffer *abs = - find_active_atomic_counters(ctx, prog, &num_buffers); - - prog->AtomicBuffers = rzalloc_array(prog, gl_active_atomic_buffer, - num_buffers); - prog->NumAtomicBuffers = num_buffers; - - unsigned i = 0; - for (unsigned binding = 0; - binding < ctx->Const.MaxAtomicBufferBindings; - binding++) { - - /* If the binding was not used, skip. - */ - if (abs[binding].size == 0) - continue; - - active_atomic_buffer &ab = abs[binding]; - gl_active_atomic_buffer &mab = prog->AtomicBuffers[i]; - - /* Assign buffer-specific fields. */ - mab.Binding = binding; - mab.MinimumSize = ab.size; - mab.Uniforms = rzalloc_array(prog->AtomicBuffers, GLuint, - ab.num_counters); - mab.NumUniforms = ab.num_counters; - - /* Assign counter-specific fields. */ - for (unsigned j = 0; j < ab.num_counters; j++) { - ir_variable *const var = ab.counters[j].var; - gl_uniform_storage *const storage = - &prog->UniformStorage[ab.counters[j].uniform_loc]; - - mab.Uniforms[j] = ab.counters[j].uniform_loc; - if (!var->data.explicit_binding) - var->data.binding = i; - - storage->atomic_buffer_index = i; - storage->offset = var->data.offset; - storage->array_stride = (var->type->is_array() ? - var->type->without_array()->atomic_size() : 0); - if (!var->type->is_matrix()) - storage->matrix_stride = 0; - } - - /* Assign stage-specific fields. */ - for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { - if (ab.stage_references[j]) { - mab.StageReferences[j] = GL_TRUE; - num_atomic_buffers[j]++; - } else { - mab.StageReferences[j] = GL_FALSE; - } - } - - i++; - } - - /* Store a list pointers to atomic buffers per stage and store the index - * to the intra-stage buffer list in uniform storage. - */ - for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { - if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) { - prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j]; - prog->_LinkedShaders[j]->AtomicBuffers = - rzalloc_array(prog, gl_active_atomic_buffer *, - num_atomic_buffers[j]); - - unsigned intra_stage_idx = 0; - for (unsigned i = 0; i < num_buffers; i++) { - struct gl_active_atomic_buffer *atomic_buffer = - &prog->AtomicBuffers[i]; - if (atomic_buffer->StageReferences[j]) { - prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] = - atomic_buffer; - - for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) { - prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index = - intra_stage_idx; - prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active = - true; - } - - intra_stage_idx++; - } - } - } - } - - delete [] abs; - assert(i == num_buffers); -} - -void -link_check_atomic_counter_resources(struct gl_context *ctx, - struct gl_shader_program *prog) -{ - unsigned num_buffers; - active_atomic_buffer *const abs = - find_active_atomic_counters(ctx, prog, &num_buffers); - unsigned atomic_counters[MESA_SHADER_STAGES] = {}; - unsigned atomic_buffers[MESA_SHADER_STAGES] = {}; - unsigned total_atomic_counters = 0; - unsigned total_atomic_buffers = 0; - - /* Sum the required resources. Note that this counts buffers and - * counters referenced by several shader stages multiple times - * against the combined limit -- That's the behavior the spec - * requires. - */ - for (unsigned i = 0; i < ctx->Const.MaxAtomicBufferBindings; i++) { - if (abs[i].size == 0) - continue; - - for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) { - const unsigned n = abs[i].stage_references[j]; - - if (n) { - atomic_counters[j] += n; - total_atomic_counters += n; - atomic_buffers[j]++; - total_atomic_buffers++; - } - } - } - - /* Check that they are within the supported limits. */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (atomic_counters[i] > ctx->Const.Program[i].MaxAtomicCounters) - linker_error(prog, "Too many %s shader atomic counters", - _mesa_shader_stage_to_string(i)); - - if (atomic_buffers[i] > ctx->Const.Program[i].MaxAtomicBuffers) - linker_error(prog, "Too many %s shader atomic counter buffers", - _mesa_shader_stage_to_string(i)); - } - - if (total_atomic_counters > ctx->Const.MaxCombinedAtomicCounters) - linker_error(prog, "Too many combined atomic counters"); - - if (total_atomic_buffers > ctx->Const.MaxCombinedAtomicBuffers) - linker_error(prog, "Too many combined atomic buffers"); - - delete [] abs; -} diff --git a/src/glsl/link_functions.cpp b/src/glsl/link_functions.cpp deleted file mode 100644 index 537f4dc77ac..00000000000 --- a/src/glsl/link_functions.cpp +++ /dev/null @@ -1,348 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "main/core.h" -#include "glsl_symbol_table.h" -#include "glsl_parser_extras.h" -#include "ir.h" -#include "program.h" -#include "program/hash_table.h" -#include "linker.h" - -static ir_function_signature * -find_matching_signature(const char *name, const exec_list *actual_parameters, - gl_shader **shader_list, unsigned num_shaders, - bool use_builtin); - -namespace { - -class call_link_visitor : public ir_hierarchical_visitor { -public: - call_link_visitor(gl_shader_program *prog, gl_shader *linked, - gl_shader **shader_list, unsigned num_shaders) - { - this->prog = prog; - this->shader_list = shader_list; - this->num_shaders = num_shaders; - this->success = true; - this->linked = linked; - - this->locals = hash_table_ctor(0, hash_table_pointer_hash, - hash_table_pointer_compare); - } - - ~call_link_visitor() - { - hash_table_dtor(this->locals); - } - - virtual ir_visitor_status visit(ir_variable *ir) - { - hash_table_insert(locals, ir, ir); - return visit_continue; - } - - virtual ir_visitor_status visit_enter(ir_call *ir) - { - /* If ir is an ir_call from a function that was imported from another - * shader callee will point to an ir_function_signature in the original - * shader. In this case the function signature MUST NOT BE MODIFIED. - * Doing so will modify the original shader. This may prevent that - * shader from being linkable in other programs. - */ - const ir_function_signature *const callee = ir->callee; - assert(callee != NULL); - const char *const name = callee->function_name(); - - /* Determine if the requested function signature already exists in the - * final linked shader. If it does, use it as the target of the call. - */ - ir_function_signature *sig = - find_matching_signature(name, &callee->parameters, &linked, 1, - ir->use_builtin); - if (sig != NULL) { - ir->callee = sig; - return visit_continue; - } - - /* Try to find the signature in one of the other shaders that is being - * linked. If it's not found there, return an error. - */ - sig = find_matching_signature(name, &ir->actual_parameters, shader_list, - num_shaders, ir->use_builtin); - if (sig == NULL) { - /* FINISHME: Log the full signature of unresolved function. - */ - linker_error(this->prog, "unresolved reference to function `%s'\n", - name); - this->success = false; - return visit_stop; - } - - /* Find the prototype information in the linked shader. Generate any - * details that may be missing. - */ - ir_function *f = linked->symbols->get_function(name); - if (f == NULL) { - f = new(linked) ir_function(name); - - /* Add the new function to the linked IR. Put it at the end - * so that it comes after any global variable declarations - * that it refers to. - */ - linked->symbols->add_function(f); - linked->ir->push_tail(f); - } - - ir_function_signature *linked_sig = - f->exact_matching_signature(NULL, &callee->parameters); - if ((linked_sig == NULL) - || ((linked_sig != NULL) - && (linked_sig->is_builtin() != ir->use_builtin))) { - linked_sig = new(linked) ir_function_signature(callee->return_type); - f->add_signature(linked_sig); - } - - /* At this point linked_sig and called may be the same. If ir is an - * ir_call from linked then linked_sig and callee will be - * ir_function_signatures that have no definitions (is_defined is false). - */ - assert(!linked_sig->is_defined); - assert(linked_sig->body.is_empty()); - - /* Create an in-place clone of the function definition. This multistep - * process introduces some complexity here, but it has some advantages. - * The parameter list and the and function body are cloned separately. - * The clone of the parameter list is used to prime the hashtable used - * to replace variable references in the cloned body. - * - * The big advantage is that the ir_function_signature does not change. - * This means that we don't have to process the rest of the IR tree to - * patch ir_call nodes. In addition, there is no way to remove or - * replace signature stored in a function. One could easily be added, - * but this avoids the need. - */ - struct hash_table *ht = hash_table_ctor(0, hash_table_pointer_hash, - hash_table_pointer_compare); - exec_list formal_parameters; - foreach_in_list(const ir_instruction, original, &sig->parameters) { - assert(const_cast(original)->as_variable()); - - ir_instruction *copy = original->clone(linked, ht); - formal_parameters.push_tail(copy); - } - - linked_sig->replace_parameters(&formal_parameters); - - linked_sig->is_intrinsic = sig->is_intrinsic; - - if (sig->is_defined) { - foreach_in_list(const ir_instruction, original, &sig->body) { - ir_instruction *copy = original->clone(linked, ht); - linked_sig->body.push_tail(copy); - } - - linked_sig->is_defined = true; - } - - hash_table_dtor(ht); - - /* Patch references inside the function to things outside the function - * (i.e., function calls and global variables). - */ - linked_sig->accept(this); - - ir->callee = linked_sig; - - return visit_continue; - } - - virtual ir_visitor_status visit_leave(ir_call *ir) - { - /* Traverse list of function parameters, and for array parameters - * propagate max_array_access. Otherwise arrays that are only referenced - * from inside functions via function parameters will be incorrectly - * optimized. This will lead to incorrect code being generated (or worse). - * Do it when leaving the node so the children would propagate their - * array accesses first. - */ - - const exec_node *formal_param_node = ir->callee->parameters.get_head(); - if (formal_param_node) { - const exec_node *actual_param_node = ir->actual_parameters.get_head(); - while (!actual_param_node->is_tail_sentinel()) { - ir_variable *formal_param = (ir_variable *) formal_param_node; - ir_rvalue *actual_param = (ir_rvalue *) actual_param_node; - - formal_param_node = formal_param_node->get_next(); - actual_param_node = actual_param_node->get_next(); - - if (formal_param->type->is_array()) { - ir_dereference_variable *deref = actual_param->as_dereference_variable(); - if (deref && deref->var && deref->var->type->is_array()) { - deref->var->data.max_array_access = - MAX2(formal_param->data.max_array_access, - deref->var->data.max_array_access); - } - } - } - } - return visit_continue; - } - - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - if (hash_table_find(locals, ir->var) == NULL) { - /* The non-function variable must be a global, so try to find the - * variable in the shader's symbol table. If the variable is not - * found, then it's a global that *MUST* be defined in the original - * shader. - */ - ir_variable *var = linked->symbols->get_variable(ir->var->name); - if (var == NULL) { - /* Clone the ir_variable that the dereference already has and add - * it to the linked shader. - */ - var = ir->var->clone(linked, NULL); - linked->symbols->add_variable(var); - linked->ir->push_head(var); - } else { - if (var->type->is_array()) { - /* It is possible to have a global array declared in multiple - * shaders without a size. The array is implicitly sized by - * the maximal access to it in *any* shader. Because of this, - * we need to track the maximal access to the array as linking - * pulls more functions in that access the array. - */ - var->data.max_array_access = - MAX2(var->data.max_array_access, - ir->var->data.max_array_access); - - if (var->type->length == 0 && ir->var->type->length != 0) - var->type = ir->var->type; - } - if (var->is_interface_instance()) { - /* Similarly, we need implicit sizes of arrays within interface - * blocks to be sized by the maximal access in *any* shader. - */ - unsigned *const linked_max_ifc_array_access = - var->get_max_ifc_array_access(); - unsigned *const ir_max_ifc_array_access = - ir->var->get_max_ifc_array_access(); - - assert(linked_max_ifc_array_access != NULL); - assert(ir_max_ifc_array_access != NULL); - - for (unsigned i = 0; i < var->get_interface_type()->length; - i++) { - linked_max_ifc_array_access[i] = - MAX2(linked_max_ifc_array_access[i], - ir_max_ifc_array_access[i]); - } - } - } - - ir->var = var; - } - - return visit_continue; - } - - /** Was function linking successful? */ - bool success; - -private: - /** - * Shader program being linked - * - * This is only used for logging error messages. - */ - gl_shader_program *prog; - - /** List of shaders available for linking. */ - gl_shader **shader_list; - - /** Number of shaders available for linking. */ - unsigned num_shaders; - - /** - * Final linked shader - * - * This is used two ways. It is used to find global variables in the - * linked shader that are accessed by the function. It is also used to add - * global variables from the shader where the function originated. - */ - gl_shader *linked; - - /** - * Table of variables local to the function. - */ - hash_table *locals; -}; - -} /* anonymous namespace */ - -/** - * Searches a list of shaders for a particular function definition - */ -ir_function_signature * -find_matching_signature(const char *name, const exec_list *actual_parameters, - gl_shader **shader_list, unsigned num_shaders, - bool use_builtin) -{ - for (unsigned i = 0; i < num_shaders; i++) { - ir_function *const f = shader_list[i]->symbols->get_function(name); - - if (f == NULL) - continue; - - ir_function_signature *sig = - f->matching_signature(NULL, actual_parameters, use_builtin); - - if ((sig == NULL) || - (!sig->is_defined && !sig->is_intrinsic)) - continue; - - /* If this function expects to bind to a built-in function and the - * signature that we found isn't a built-in, keep looking. Also keep - * looking if we expect a non-built-in but found a built-in. - */ - if (use_builtin != sig->is_builtin()) - continue; - - return sig; - } - - return NULL; -} - - -bool -link_function_calls(gl_shader_program *prog, gl_shader *main, - gl_shader **shader_list, unsigned num_shaders) -{ - call_link_visitor v(prog, main, shader_list, num_shaders); - - v.run(main->ir); - return v.success; -} diff --git a/src/glsl/link_interface_blocks.cpp b/src/glsl/link_interface_blocks.cpp deleted file mode 100644 index 64c30fea9a3..00000000000 --- a/src/glsl/link_interface_blocks.cpp +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file link_interface_blocks.cpp - * Linker support for GLSL's interface blocks. - */ - -#include "ir.h" -#include "glsl_symbol_table.h" -#include "linker.h" -#include "main/macros.h" -#include "util/hash_table.h" - - -namespace { - -/** - * Check if two interfaces match, according to intrastage interface matching - * rules. If they do, and the first interface uses an unsized array, it will - * be updated to reflect the array size declared in the second interface. - */ -bool -intrastage_match(ir_variable *a, - ir_variable *b, - struct gl_shader_program *prog) -{ - /* Types must match. */ - if (a->get_interface_type() != b->get_interface_type()) { - /* Exception: if both the interface blocks are implicitly declared, - * don't force their types to match. They might mismatch due to the two - * shaders using different GLSL versions, and that's ok. - */ - if (a->data.how_declared != ir_var_declared_implicitly || - b->data.how_declared != ir_var_declared_implicitly) - return false; - } - - /* Presence/absence of interface names must match. */ - if (a->is_interface_instance() != b->is_interface_instance()) - return false; - - /* For uniforms, instance names need not match. For shader ins/outs, - * it's not clear from the spec whether they need to match, but - * Mesa's implementation relies on them matching. - */ - if (a->is_interface_instance() && b->data.mode != ir_var_uniform && - b->data.mode != ir_var_shader_storage && - strcmp(a->name, b->name) != 0) { - return false; - } - - /* If a block is an array then it must match across the shader. - * Unsized arrays are also processed and matched agaist sized arrays. - */ - if (b->type != a->type && - (b->is_interface_instance() || a->is_interface_instance()) && - !validate_intrastage_arrays(prog, b, a)) - return false; - - return true; -} - - -/** - * Check if two interfaces match, according to interstage (in/out) interface - * matching rules. - * - * If \c extra_array_level is true, the consumer interface is required to be - * an array and the producer interface is required to be a non-array. - * This is used for tessellation control and geometry shader consumers. - */ -bool -interstage_match(ir_variable *producer, - ir_variable *consumer, - bool extra_array_level) -{ - /* Unsized arrays should not occur during interstage linking. They - * should have all been assigned a size by link_intrastage_shaders. - */ - assert(!consumer->type->is_unsized_array()); - assert(!producer->type->is_unsized_array()); - - /* Types must match. */ - if (consumer->get_interface_type() != producer->get_interface_type()) { - /* Exception: if both the interface blocks are implicitly declared, - * don't force their types to match. They might mismatch due to the two - * shaders using different GLSL versions, and that's ok. - */ - if (consumer->data.how_declared != ir_var_declared_implicitly || - producer->data.how_declared != ir_var_declared_implicitly) - return false; - } - - /* Ignore outermost array if geom shader */ - const glsl_type *consumer_instance_type; - if (extra_array_level) { - consumer_instance_type = consumer->type->fields.array; - } else { - consumer_instance_type = consumer->type; - } - - /* If a block is an array then it must match across shaders. - * Since unsized arrays have been ruled out, we can check this by just - * making sure the types are equal. - */ - if ((consumer->is_interface_instance() && - consumer_instance_type->is_array()) || - (producer->is_interface_instance() && - producer->type->is_array())) { - if (consumer_instance_type != producer->type) - return false; - } - - return true; -} - - -/** - * This class keeps track of a mapping from an interface block name to the - * necessary information about that interface block to determine whether to - * generate a link error. - * - * Note: this class is expected to be short lived, so it doesn't make copies - * of the strings it references; it simply borrows the pointers from the - * ir_variable class. - */ -class interface_block_definitions -{ -public: - interface_block_definitions() - : mem_ctx(ralloc_context(NULL)), - ht(_mesa_hash_table_create(NULL, _mesa_key_hash_string, - _mesa_key_string_equal)) - { - } - - ~interface_block_definitions() - { - ralloc_free(mem_ctx); - _mesa_hash_table_destroy(ht, NULL); - } - - /** - * Lookup the interface definition. Return NULL if none is found. - */ - ir_variable *lookup(ir_variable *var) - { - if (var->data.explicit_location && - var->data.location >= VARYING_SLOT_VAR0) { - char location_str[11]; - snprintf(location_str, 11, "%d", var->data.location); - - const struct hash_entry *entry = - _mesa_hash_table_search(ht, location_str); - return entry ? (ir_variable *) entry->data : NULL; - } else { - const struct hash_entry *entry = - _mesa_hash_table_search(ht, var->get_interface_type()->name); - return entry ? (ir_variable *) entry->data : NULL; - } - } - - /** - * Add a new interface definition. - */ - void store(ir_variable *var) - { - if (var->data.explicit_location && - var->data.location >= VARYING_SLOT_VAR0) { - /* If explicit location is given then lookup the variable by location. - * We turn the location into a string and use this as the hash key - * rather than the name. Note: We allocate enough space for a 32-bit - * unsigned location value which is overkill but future proof. - */ - char location_str[11]; - snprintf(location_str, 11, "%d", var->data.location); - _mesa_hash_table_insert(ht, ralloc_strdup(mem_ctx, location_str), var); - } else { - _mesa_hash_table_insert(ht, var->get_interface_type()->name, var); - } - } - -private: - /** - * Ralloc context for data structures allocated by this class. - */ - void *mem_ctx; - - /** - * Hash table mapping interface block name to an \c - * ir_variable. - */ - hash_table *ht; -}; - - -}; /* anonymous namespace */ - - -void -validate_intrastage_interface_blocks(struct gl_shader_program *prog, - const gl_shader **shader_list, - unsigned num_shaders) -{ - interface_block_definitions in_interfaces; - interface_block_definitions out_interfaces; - interface_block_definitions uniform_interfaces; - interface_block_definitions buffer_interfaces; - - for (unsigned int i = 0; i < num_shaders; i++) { - if (shader_list[i] == NULL) - continue; - - foreach_in_list(ir_instruction, node, shader_list[i]->ir) { - ir_variable *var = node->as_variable(); - if (!var) - continue; - - const glsl_type *iface_type = var->get_interface_type(); - - if (iface_type == NULL) - continue; - - interface_block_definitions *definitions; - switch (var->data.mode) { - case ir_var_shader_in: - definitions = &in_interfaces; - break; - case ir_var_shader_out: - definitions = &out_interfaces; - break; - case ir_var_uniform: - definitions = &uniform_interfaces; - break; - case ir_var_shader_storage: - definitions = &buffer_interfaces; - break; - default: - /* Only in, out, and uniform interfaces are legal, so we should - * never get here. - */ - assert(!"illegal interface type"); - continue; - } - - ir_variable *prev_def = definitions->lookup(var); - if (prev_def == NULL) { - /* This is the first time we've seen the interface, so save - * it into the appropriate data structure. - */ - definitions->store(var); - } else if (!intrastage_match(prev_def, var, prog)) { - linker_error(prog, "definitions of interface block `%s' do not" - " match\n", iface_type->name); - return; - } - } - } -} - -void -validate_interstage_inout_blocks(struct gl_shader_program *prog, - const gl_shader *producer, - const gl_shader *consumer) -{ - interface_block_definitions definitions; - /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ - const bool extra_array_level = (producer->Stage == MESA_SHADER_VERTEX && - consumer->Stage != MESA_SHADER_FRAGMENT) || - consumer->Stage == MESA_SHADER_GEOMETRY; - - /* Add input interfaces from the consumer to the symbol table. */ - foreach_in_list(ir_instruction, node, consumer->ir) { - ir_variable *var = node->as_variable(); - if (!var || !var->get_interface_type() || var->data.mode != ir_var_shader_in) - continue; - - definitions.store(var); - } - - /* Verify that the producer's output interfaces match. */ - foreach_in_list(ir_instruction, node, producer->ir) { - ir_variable *var = node->as_variable(); - if (!var || !var->get_interface_type() || var->data.mode != ir_var_shader_out) - continue; - - ir_variable *consumer_def = definitions.lookup(var); - - /* The consumer doesn't use this output block. Ignore it. */ - if (consumer_def == NULL) - continue; - - if (!interstage_match(var, consumer_def, extra_array_level)) { - linker_error(prog, "definitions of interface block `%s' do not " - "match\n", var->get_interface_type()->name); - return; - } - } -} - - -void -validate_interstage_uniform_blocks(struct gl_shader_program *prog, - gl_shader **stages, int num_stages) -{ - interface_block_definitions definitions; - - for (int i = 0; i < num_stages; i++) { - if (stages[i] == NULL) - continue; - - const gl_shader *stage = stages[i]; - foreach_in_list(ir_instruction, node, stage->ir) { - ir_variable *var = node->as_variable(); - if (!var || !var->get_interface_type() || - (var->data.mode != ir_var_uniform && - var->data.mode != ir_var_shader_storage)) - continue; - - ir_variable *old_def = definitions.lookup(var); - if (old_def == NULL) { - definitions.store(var); - } else { - /* Interstage uniform matching rules are the same as intrastage - * uniform matchin rules (for uniforms, it is as though all - * shaders are in the same shader stage). - */ - if (!intrastage_match(old_def, var, prog)) { - linker_error(prog, "definitions of interface block `%s' do not " - "match\n", var->get_interface_type()->name); - return; - } - } - } - } -} diff --git a/src/glsl/link_uniform_block_active_visitor.cpp b/src/glsl/link_uniform_block_active_visitor.cpp deleted file mode 100644 index 54fea700b53..00000000000 --- a/src/glsl/link_uniform_block_active_visitor.cpp +++ /dev/null @@ -1,296 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "link_uniform_block_active_visitor.h" -#include "program.h" - -static link_uniform_block_active * -process_block(void *mem_ctx, struct hash_table *ht, ir_variable *var) -{ - const hash_entry *const existing_block = - _mesa_hash_table_search(ht, var->get_interface_type()->name); - - const glsl_type *const block_type = var->is_interface_instance() - ? var->type : var->get_interface_type(); - - - /* If a block with this block-name has not previously been seen, add it. - * If a block with this block-name has been seen, it must be identical to - * the block currently being examined. - */ - if (existing_block == NULL) { - link_uniform_block_active *const b = - rzalloc(mem_ctx, struct link_uniform_block_active); - - b->type = block_type; - b->has_instance_name = var->is_interface_instance(); - b->is_shader_storage = var->data.mode == ir_var_shader_storage; - - if (var->data.explicit_binding) { - b->has_binding = true; - b->binding = var->data.binding; - } else { - b->has_binding = false; - b->binding = 0; - } - - _mesa_hash_table_insert(ht, var->get_interface_type()->name, (void *) b); - return b; - } else { - link_uniform_block_active *const b = - (link_uniform_block_active *) existing_block->data; - - if (b->type != block_type - || b->has_instance_name != var->is_interface_instance()) - return NULL; - else - return b; - } - - assert(!"Should not get here."); - return NULL; -} - -/* For arrays of arrays this function will give us a middle ground between - * detecting inactive uniform blocks and structuring them in a way that makes - * it easy to calculate the offset for indirect indexing. - * - * For example given the shader: - * - * uniform ArraysOfArraysBlock - * { - * vec4 a; - * } i[3][4][5]; - * - * void main() - * { - * vec4 b = i[0][1][1].a; - * gl_Position = i[2][2][3].a + b; - * } - * - * There are only 2 active blocks above but for the sake of indirect indexing - * and not over complicating the code we will end up with a count of 8. - * Here each dimension has 2 different indices counted so we end up with 2*2*2 - */ -static struct uniform_block_array_elements ** -process_arrays(void *mem_ctx, ir_dereference_array *ir, - struct link_uniform_block_active *block) -{ - if (ir) { - struct uniform_block_array_elements **ub_array_ptr = - process_arrays(mem_ctx, ir->array->as_dereference_array(), block); - if (*ub_array_ptr == NULL) { - *ub_array_ptr = rzalloc(mem_ctx, struct uniform_block_array_elements); - (*ub_array_ptr)->ir = ir; - } - - struct uniform_block_array_elements *ub_array = *ub_array_ptr; - ir_constant *c = ir->array_index->as_constant(); - if (c) { - /* Index is a constant, so mark just that element used, - * if not already. - */ - const unsigned idx = c->get_uint_component(0); - - unsigned i; - for (i = 0; i < ub_array->num_array_elements; i++) { - if (ub_array->array_elements[i] == idx) - break; - } - - assert(i <= ub_array->num_array_elements); - - if (i == ub_array->num_array_elements) { - ub_array->array_elements = reralloc(mem_ctx, - ub_array->array_elements, - unsigned, - ub_array->num_array_elements + 1); - - ub_array->array_elements[ub_array->num_array_elements] = idx; - - ub_array->num_array_elements++; - } - } else { - /* The array index is not a constant, - * so mark the entire array used. - */ - assert(ir->array->type->is_array()); - if (ub_array->num_array_elements < ir->array->type->length) { - ub_array->num_array_elements = ir->array->type->length; - ub_array->array_elements = reralloc(mem_ctx, - ub_array->array_elements, - unsigned, - ub_array->num_array_elements); - - for (unsigned i = 0; i < ub_array->num_array_elements; i++) { - ub_array->array_elements[i] = i; - } - } - } - return &ub_array->array; - } else { - return &block->array; - } -} - -ir_visitor_status -link_uniform_block_active_visitor::visit(ir_variable *var) -{ - if (!var->is_in_buffer_block()) - return visit_continue; - - /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec says: - * - * "All members of a named uniform block declared with a shared or - * std140 layout qualifier are considered active, even if they are not - * referenced in any shader in the program. The uniform block itself is - * also considered active, even if no member of the block is - * referenced." - */ - if (var->get_interface_type()->interface_packing == - GLSL_INTERFACE_PACKING_PACKED) - return visit_continue; - - /* Process the block. Bail if there was an error. - */ - link_uniform_block_active *const b = - process_block(this->mem_ctx, this->ht, var); - if (b == NULL) { - linker_error(this->prog, - "uniform block `%s' has mismatching definitions", - var->get_interface_type()->name); - this->success = false; - return visit_stop; - } - - assert(b->array == NULL); - assert(b->type != NULL); - assert(!b->type->is_array() || b->has_instance_name); - - /* For uniform block arrays declared with a shared or std140 layout - * qualifier, mark all its instances as used. - */ - const glsl_type *type = b->type; - struct uniform_block_array_elements **ub_array = &b->array; - while (type->is_array()) { - assert(b->type->length > 0); - - *ub_array = rzalloc(this->mem_ctx, struct uniform_block_array_elements); - (*ub_array)->num_array_elements = type->length; - (*ub_array)->array_elements = reralloc(this->mem_ctx, - (*ub_array)->array_elements, - unsigned, - (*ub_array)->num_array_elements); - - for (unsigned i = 0; i < (*ub_array)->num_array_elements; i++) { - (*ub_array)->array_elements[i] = i; - } - ub_array = &(*ub_array)->array; - type = type->fields.array; - } - - return visit_continue; -} - -ir_visitor_status -link_uniform_block_active_visitor::visit_enter(ir_dereference_array *ir) -{ - /* cycle through arrays of arrays */ - ir_dereference_array *base_ir = ir; - while (base_ir->array->ir_type == ir_type_dereference_array) - base_ir = base_ir->array->as_dereference_array(); - - ir_dereference_variable *const d = - base_ir->array->as_dereference_variable(); - ir_variable *const var = (d == NULL) ? NULL : d->var; - - /* If the r-value being dereferenced is not a variable (e.g., a field of a - * structure) or is not a uniform block instance, continue. - * - * WARNING: It is not enough for the variable to be part of uniform block. - * It must represent the entire block. Arrays (or matrices) inside blocks - * that lack an instance name are handled by the ir_dereference_variable - * function. - */ - if (var == NULL - || !var->is_in_buffer_block() - || !var->is_interface_instance()) - return visit_continue; - - /* Process the block. Bail if there was an error. - */ - link_uniform_block_active *const b = - process_block(this->mem_ctx, this->ht, var); - if (b == NULL) { - linker_error(prog, - "uniform block `%s' has mismatching definitions", - var->get_interface_type()->name); - this->success = false; - return visit_stop; - } - - /* Block arrays must be declared with an instance name. - */ - assert(b->has_instance_name); - assert(b->type != NULL); - - /* If the block array was declared with a shared or - * std140 layout qualifier, all its instances have been already marked - * as used in link_uniform_block_active_visitor::visit(ir_variable *). - */ - if (var->get_interface_type()->interface_packing == - GLSL_INTERFACE_PACKING_PACKED) { - b->var = var; - process_arrays(this->mem_ctx, ir, b); - } - - return visit_continue_with_parent; -} - -ir_visitor_status -link_uniform_block_active_visitor::visit(ir_dereference_variable *ir) -{ - ir_variable *var = ir->var; - - if (!var->is_in_buffer_block()) - return visit_continue; - - assert(!var->is_interface_instance() || !var->type->is_array()); - - /* Process the block. Bail if there was an error. - */ - link_uniform_block_active *const b = - process_block(this->mem_ctx, this->ht, var); - if (b == NULL) { - linker_error(this->prog, - "uniform block `%s' has mismatching definitions", - var->get_interface_type()->name); - this->success = false; - return visit_stop; - } - - assert(b->array == NULL); - assert(b->type != NULL); - - return visit_continue; -} diff --git a/src/glsl/link_uniform_block_active_visitor.h b/src/glsl/link_uniform_block_active_visitor.h deleted file mode 100644 index afb52c14a37..00000000000 --- a/src/glsl/link_uniform_block_active_visitor.h +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H -#define LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H - -#include "ir.h" -#include "util/hash_table.h" - -struct uniform_block_array_elements { - unsigned *array_elements; - unsigned num_array_elements; - - ir_dereference_array *ir; - - struct uniform_block_array_elements *array; -}; - -struct link_uniform_block_active { - const glsl_type *type; - ir_variable *var; - - struct uniform_block_array_elements *array; - - unsigned binding; - - bool has_instance_name; - bool has_binding; - bool is_shader_storage; -}; - -class link_uniform_block_active_visitor : public ir_hierarchical_visitor { -public: - link_uniform_block_active_visitor(void *mem_ctx, struct hash_table *ht, - struct gl_shader_program *prog) - : success(true), prog(prog), ht(ht), mem_ctx(mem_ctx) - { - /* empty */ - } - - virtual ir_visitor_status visit_enter(ir_dereference_array *); - virtual ir_visitor_status visit(ir_dereference_variable *); - virtual ir_visitor_status visit(ir_variable *); - - bool success; - -private: - struct gl_shader_program *prog; - struct hash_table *ht; - void *mem_ctx; -}; - -#endif /* LINK_UNIFORM_BLOCK_ACTIVE_VISITOR_H */ diff --git a/src/glsl/link_uniform_blocks.cpp b/src/glsl/link_uniform_blocks.cpp deleted file mode 100644 index 7d755765852..00000000000 --- a/src/glsl/link_uniform_blocks.cpp +++ /dev/null @@ -1,472 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "main/core.h" -#include "ir.h" -#include "linker.h" -#include "ir_uniform.h" -#include "link_uniform_block_active_visitor.h" -#include "util/hash_table.h" -#include "program.h" - -namespace { - -class ubo_visitor : public program_resource_visitor { -public: - ubo_visitor(void *mem_ctx, gl_uniform_buffer_variable *variables, - unsigned num_variables) - : index(0), offset(0), buffer_size(0), variables(variables), - num_variables(num_variables), mem_ctx(mem_ctx), is_array_instance(false) - { - /* empty */ - } - - void process(const glsl_type *type, const char *name) - { - this->offset = 0; - this->buffer_size = 0; - this->is_array_instance = strchr(name, ']') != NULL; - this->program_resource_visitor::process(type, name); - } - - unsigned index; - unsigned offset; - unsigned buffer_size; - gl_uniform_buffer_variable *variables; - unsigned num_variables; - void *mem_ctx; - bool is_array_instance; - -private: - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major) - { - (void) type; - (void) name; - (void) row_major; - assert(!"Should not get here."); - } - - virtual void enter_record(const glsl_type *type, const char *, - bool row_major, const unsigned packing) { - assert(type->is_record()); - if (packing == GLSL_INTERFACE_PACKING_STD430) - this->offset = glsl_align( - this->offset, type->std430_base_alignment(row_major)); - else - this->offset = glsl_align( - this->offset, type->std140_base_alignment(row_major)); - } - - virtual void leave_record(const glsl_type *type, const char *, - bool row_major, const unsigned packing) { - assert(type->is_record()); - - /* If this is the last field of a structure, apply rule #9. The - * GL_ARB_uniform_buffer_object spec says: - * - * "The structure may have padding at the end; the base offset of - * the member following the sub-structure is rounded up to the next - * multiple of the base alignment of the structure." - */ - if (packing == GLSL_INTERFACE_PACKING_STD430) - this->offset = glsl_align( - this->offset, type->std430_base_alignment(row_major)); - else - this->offset = glsl_align( - this->offset, type->std140_base_alignment(row_major)); - } - - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major, const glsl_type *, - const unsigned packing, - bool last_field) - { - assert(this->index < this->num_variables); - - gl_uniform_buffer_variable *v = &this->variables[this->index++]; - - v->Name = ralloc_strdup(mem_ctx, name); - v->Type = type; - v->RowMajor = type->without_array()->is_matrix() && row_major; - - if (this->is_array_instance) { - v->IndexName = ralloc_strdup(mem_ctx, name); - - char *open_bracket = strchr(v->IndexName, '['); - assert(open_bracket != NULL); - - char *close_bracket = strchr(open_bracket, '.') - 1; - assert(close_bracket != NULL); - - /* Length of the tail without the ']' but with the NUL. - */ - unsigned len = strlen(close_bracket + 1) + 1; - - memmove(open_bracket, close_bracket + 1, len); - } else { - v->IndexName = v->Name; - } - - unsigned alignment = 0; - unsigned size = 0; - - /* From ARB_program_interface_query: - * - * "If the final member of an active shader storage block is array - * with no declared size, the minimum buffer size is computed - * assuming the array was declared as an array with one element." - * - * For that reason, we use the base type of the unsized array to calculate - * its size. We don't need to check if the unsized array is the last member - * of a shader storage block (that check was already done by the parser). - */ - const glsl_type *type_for_size = type; - if (type->is_unsized_array()) { - assert(last_field); - type_for_size = type->without_array(); - } - - if (packing == GLSL_INTERFACE_PACKING_STD430) { - alignment = type->std430_base_alignment(v->RowMajor); - size = type_for_size->std430_size(v->RowMajor); - } else { - alignment = type->std140_base_alignment(v->RowMajor); - size = type_for_size->std140_size(v->RowMajor); - } - - this->offset = glsl_align(this->offset, alignment); - v->Offset = this->offset; - - this->offset += size; - - /* From the GL_ARB_uniform_buffer_object spec: - * - * "For uniform blocks laid out according to [std140] rules, the - * minimum buffer object size returned by the - * UNIFORM_BLOCK_DATA_SIZE query is derived by taking the offset of - * the last basic machine unit consumed by the last uniform of the - * uniform block (including any end-of-array or end-of-structure - * padding), adding one, and rounding up to the next multiple of - * the base alignment required for a vec4." - */ - this->buffer_size = glsl_align(this->offset, 16); - } -}; - -class count_block_size : public program_resource_visitor { -public: - count_block_size() : num_active_uniforms(0) - { - /* empty */ - } - - unsigned num_active_uniforms; - -private: - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major) - { - (void) type; - (void) name; - (void) row_major; - this->num_active_uniforms++; - } -}; - -} /* anonymous namespace */ - -struct block { - const glsl_type *type; - bool has_instance_name; -}; - -static void -process_block_array(struct uniform_block_array_elements *ub_array, char **name, - size_t name_length, gl_uniform_block *blocks, - ubo_visitor *parcel, gl_uniform_buffer_variable *variables, - const struct link_uniform_block_active *const b, - unsigned *block_index, unsigned *binding_offset, - struct gl_context *ctx, struct gl_shader_program *prog) -{ - if (ub_array) { - for (unsigned j = 0; j < ub_array->num_array_elements; j++) { - size_t new_length = name_length; - - /* Append the subscript to the current variable name */ - ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", - ub_array->array_elements[j]); - - process_block_array(ub_array->array, name, new_length, blocks, - parcel, variables, b, block_index, - binding_offset, ctx, prog); - } - } else { - unsigned i = *block_index; - const glsl_type *type = b->type->without_array(); - - blocks[i].Name = ralloc_strdup(blocks, *name); - blocks[i].Uniforms = &variables[(*parcel).index]; - - /* The GL_ARB_shading_language_420pack spec says: - * - * "If the binding identifier is used with a uniform block - * instanced as an array then the first element of the array - * takes the specified block binding and each subsequent - * element takes the next consecutive uniform block binding - * point." - */ - blocks[i].Binding = (b->has_binding) ? b->binding + *binding_offset : 0; - - blocks[i].UniformBufferSize = 0; - blocks[i]._Packing = gl_uniform_block_packing(type->interface_packing); - - parcel->process(type, blocks[i].Name); - - blocks[i].UniformBufferSize = parcel->buffer_size; - - /* Check SSBO size is lower than maximum supported size for SSBO */ - if (b->is_shader_storage && - parcel->buffer_size > ctx->Const.MaxShaderStorageBlockSize) { - linker_error(prog, "shader storage block `%s' has size %d, " - "which is larger than than the maximum allowed (%d)", - b->type->name, - parcel->buffer_size, - ctx->Const.MaxShaderStorageBlockSize); - } - blocks[i].NumUniforms = - (unsigned)(ptrdiff_t)(&variables[parcel->index] - blocks[i].Uniforms); - blocks[i].IsShaderStorage = b->is_shader_storage; - - *block_index = *block_index + 1; - *binding_offset = *binding_offset + 1; - } -} - -/* This function resizes the array types of the block so that later we can use - * this new size to correctly calculate the offest for indirect indexing. - */ -static const glsl_type * -resize_block_array(const glsl_type *type, - struct uniform_block_array_elements *ub_array) -{ - if (type->is_array()) { - struct uniform_block_array_elements *child_array = - type->fields.array->is_array() ? ub_array->array : NULL; - const glsl_type *new_child_type = - resize_block_array(type->fields.array, child_array); - - const glsl_type *new_type = - glsl_type::get_array_instance(new_child_type, - ub_array->num_array_elements); - ub_array->ir->array->type = new_type; - return new_type; - } else { - return type; - } -} - -unsigned -link_uniform_blocks(void *mem_ctx, - struct gl_context *ctx, - struct gl_shader_program *prog, - struct gl_shader **shader_list, - unsigned num_shaders, - struct gl_uniform_block **blocks_ret) -{ - /* This hash table will track all of the uniform blocks that have been - * encountered. Since blocks with the same block-name must be the same, - * the hash is organized by block-name. - */ - struct hash_table *block_hash = - _mesa_hash_table_create(mem_ctx, _mesa_key_hash_string, - _mesa_key_string_equal); - - if (block_hash == NULL) { - _mesa_error_no_memory(__func__); - linker_error(prog, "out of memory\n"); - return 0; - } - - /* Determine which uniform blocks are active. - */ - link_uniform_block_active_visitor v(mem_ctx, block_hash, prog); - for (unsigned i = 0; i < num_shaders; i++) { - visit_list_elements(&v, shader_list[i]->ir); - } - - /* Count the number of active uniform blocks. Count the total number of - * active slots in those uniform blocks. - */ - unsigned num_blocks = 0; - unsigned num_variables = 0; - count_block_size block_size; - struct hash_entry *entry; - - hash_table_foreach (block_hash, entry) { - struct link_uniform_block_active *const b = - (struct link_uniform_block_active *) entry->data; - - assert((b->array != NULL) == b->type->is_array()); - - if (b->array != NULL && - (b->type->without_array()->interface_packing == - GLSL_INTERFACE_PACKING_PACKED)) { - b->type = resize_block_array(b->type, b->array); - b->var->type = b->type; - } - - block_size.num_active_uniforms = 0; - block_size.process(b->type->without_array(), ""); - - if (b->array != NULL) { - unsigned aoa_size = b->type->arrays_of_arrays_size(); - num_blocks += aoa_size; - num_variables += aoa_size * block_size.num_active_uniforms; - } else { - num_blocks++; - num_variables += block_size.num_active_uniforms; - } - - } - - if (num_blocks == 0) { - assert(num_variables == 0); - _mesa_hash_table_destroy(block_hash, NULL); - return 0; - } - - assert(num_variables != 0); - - /* Allocate storage to hold all of the informatation related to uniform - * blocks that can be queried through the API. - */ - gl_uniform_block *blocks = - ralloc_array(mem_ctx, gl_uniform_block, num_blocks); - gl_uniform_buffer_variable *variables = - ralloc_array(blocks, gl_uniform_buffer_variable, num_variables); - - /* Add each variable from each uniform block to the API tracking - * structures. - */ - unsigned i = 0; - ubo_visitor parcel(blocks, variables, num_variables); - - STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD140) - == unsigned(ubo_packing_std140)); - STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_SHARED) - == unsigned(ubo_packing_shared)); - STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_PACKED) - == unsigned(ubo_packing_packed)); - STATIC_ASSERT(unsigned(GLSL_INTERFACE_PACKING_STD430) - == unsigned(ubo_packing_std430)); - - hash_table_foreach (block_hash, entry) { - const struct link_uniform_block_active *const b = - (const struct link_uniform_block_active *) entry->data; - const glsl_type *block_type = b->type; - - if (b->array != NULL) { - unsigned binding_offset = 0; - char *name = ralloc_strdup(NULL, block_type->without_array()->name); - size_t name_length = strlen(name); - - assert(b->has_instance_name); - process_block_array(b->array, &name, name_length, blocks, &parcel, - variables, b, &i, &binding_offset, ctx, prog); - ralloc_free(name); - } else { - blocks[i].Name = ralloc_strdup(blocks, block_type->name); - blocks[i].Uniforms = &variables[parcel.index]; - blocks[i].Binding = (b->has_binding) ? b->binding : 0; - blocks[i].UniformBufferSize = 0; - blocks[i]._Packing = - gl_uniform_block_packing(block_type->interface_packing); - - parcel.process(block_type, - b->has_instance_name ? block_type->name : ""); - - blocks[i].UniformBufferSize = parcel.buffer_size; - - /* Check SSBO size is lower than maximum supported size for SSBO */ - if (b->is_shader_storage && - parcel.buffer_size > ctx->Const.MaxShaderStorageBlockSize) { - linker_error(prog, "shader storage block `%s' has size %d, " - "which is larger than than the maximum allowed (%d)", - block_type->name, - parcel.buffer_size, - ctx->Const.MaxShaderStorageBlockSize); - } - blocks[i].NumUniforms = - (unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms); - - blocks[i].IsShaderStorage = b->is_shader_storage; - - i++; - } - } - - assert(parcel.index == num_variables); - - _mesa_hash_table_destroy(block_hash, NULL); - - *blocks_ret = blocks; - return num_blocks; -} - -bool -link_uniform_blocks_are_compatible(const gl_uniform_block *a, - const gl_uniform_block *b) -{ - assert(strcmp(a->Name, b->Name) == 0); - - /* Page 35 (page 42 of the PDF) in section 4.3.7 of the GLSL 1.50 spec says: - * - * "Matched block names within an interface (as defined above) must - * match in terms of having the same number of declarations with the - * same sequence of types and the same sequence of member names, as - * well as having the same member-wise layout qualification....if a - * matching block is declared as an array, then the array sizes must - * also match... Any mismatch will generate a link error." - * - * Arrays are not yet supported, so there is no check for that. - */ - if (a->NumUniforms != b->NumUniforms) - return false; - - if (a->_Packing != b->_Packing) - return false; - - for (unsigned i = 0; i < a->NumUniforms; i++) { - if (strcmp(a->Uniforms[i].Name, b->Uniforms[i].Name) != 0) - return false; - - if (a->Uniforms[i].Type != b->Uniforms[i].Type) - return false; - - if (a->Uniforms[i].RowMajor != b->Uniforms[i].RowMajor) - return false; - } - - return true; -} diff --git a/src/glsl/link_uniform_initializers.cpp b/src/glsl/link_uniform_initializers.cpp deleted file mode 100644 index 58d21e5125e..00000000000 --- a/src/glsl/link_uniform_initializers.cpp +++ /dev/null @@ -1,355 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "main/core.h" -#include "ir.h" -#include "linker.h" -#include "ir_uniform.h" - -/* These functions are put in a "private" namespace instead of being marked - * static so that the unit tests can access them. See - * http://code.google.com/p/googletest/wiki/AdvancedGuide#Testing_Private_Code - */ -namespace linker { - -gl_uniform_storage * -get_storage(gl_uniform_storage *storage, unsigned num_storage, - const char *name) -{ - for (unsigned int i = 0; i < num_storage; i++) { - if (strcmp(name, storage[i].name) == 0) - return &storage[i]; - } - - return NULL; -} - -static unsigned -get_uniform_block_index(const gl_shader_program *shProg, - const char *uniformBlockName) -{ - for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - if (!strcmp(shProg->BufferInterfaceBlocks[i].Name, uniformBlockName)) - return i; - } - - return GL_INVALID_INDEX; -} - -void -copy_constant_to_storage(union gl_constant_value *storage, - const ir_constant *val, - const enum glsl_base_type base_type, - const unsigned int elements, - unsigned int boolean_true) -{ - for (unsigned int i = 0; i < elements; i++) { - switch (base_type) { - case GLSL_TYPE_UINT: - storage[i].u = val->value.u[i]; - break; - case GLSL_TYPE_INT: - case GLSL_TYPE_SAMPLER: - storage[i].i = val->value.i[i]; - break; - case GLSL_TYPE_FLOAT: - storage[i].f = val->value.f[i]; - break; - case GLSL_TYPE_DOUBLE: - /* XXX need to check on big-endian */ - storage[i * 2].u = *(uint32_t *)&val->value.d[i]; - storage[i * 2 + 1].u = *(((uint32_t *)&val->value.d[i]) + 1); - break; - case GLSL_TYPE_BOOL: - storage[i].b = val->value.b[i] ? boolean_true : 0; - break; - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_IMAGE: - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_VOID: - case GLSL_TYPE_SUBROUTINE: - case GLSL_TYPE_ERROR: - /* All other types should have already been filtered by other - * paths in the caller. - */ - assert(!"Should not get here."); - break; - } - } -} - -/** - * Initialize an opaque uniform from the value of an explicit binding - * qualifier specified in the shader. Atomic counters are different because - * they have no storage and should be handled elsewhere. - */ -void -set_opaque_binding(void *mem_ctx, gl_shader_program *prog, - const glsl_type *type, const char *name, int *binding) -{ - - if (type->is_array() && type->fields.array->is_array()) { - const glsl_type *const element_type = type->fields.array; - - for (unsigned int i = 0; i < type->length; i++) { - const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); - - set_opaque_binding(mem_ctx, prog, element_type, - element_name, binding); - } - } else { - struct gl_uniform_storage *const storage = - get_storage(prog->UniformStorage, prog->NumUniformStorage, name); - - if (storage == NULL) { - assert(storage != NULL); - return; - } - - const unsigned elements = MAX2(storage->array_elements, 1); - - /* Section 4.4.4 (Opaque-Uniform Layout Qualifiers) of the GLSL 4.20 spec - * says: - * - * "If the binding identifier is used with an array, the first element - * of the array takes the specified unit and each subsequent element - * takes the next consecutive unit." - */ - for (unsigned int i = 0; i < elements; i++) { - storage->storage[i].i = (*binding)++; - } - - for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { - gl_shader *shader = prog->_LinkedShaders[sh]; - - if (shader) { - if (storage->type->base_type == GLSL_TYPE_SAMPLER && - storage->opaque[sh].active) { - for (unsigned i = 0; i < elements; i++) { - const unsigned index = storage->opaque[sh].index + i; - shader->SamplerUnits[index] = storage->storage[i].i; - } - - } else if (storage->type->base_type == GLSL_TYPE_IMAGE && - storage->opaque[sh].active) { - for (unsigned i = 0; i < elements; i++) { - const unsigned index = storage->opaque[sh].index + i; - shader->ImageUnits[index] = storage->storage[i].i; - } - } - } - } - - storage->initialized = true; - } -} - -void -set_block_binding(gl_shader_program *prog, const char *block_name, int binding) -{ - const unsigned block_index = get_uniform_block_index(prog, block_name); - - if (block_index == GL_INVALID_INDEX) { - assert(block_index != GL_INVALID_INDEX); - return; - } - - /* This is a field of a UBO. val is the binding index. */ - for (int i = 0; i < MESA_SHADER_STAGES; i++) { - int stage_index = prog->InterfaceBlockStageIndex[i][block_index]; - - if (stage_index != -1) { - struct gl_shader *sh = prog->_LinkedShaders[i]; - sh->BufferInterfaceBlocks[stage_index].Binding = binding; - } - } -} - -void -set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, - const char *name, const glsl_type *type, - ir_constant *val, unsigned int boolean_true) -{ - const glsl_type *t_without_array = type->without_array(); - if (type->is_record()) { - ir_constant *field_constant; - - field_constant = (ir_constant *)val->components.get_head(); - - for (unsigned int i = 0; i < type->length; i++) { - const glsl_type *field_type = type->fields.structure[i].type; - const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name, - type->fields.structure[i].name); - set_uniform_initializer(mem_ctx, prog, field_name, - field_type, field_constant, boolean_true); - field_constant = (ir_constant *)field_constant->next; - } - return; - } else if (t_without_array->is_record() || - (type->is_array() && type->fields.array->is_array())) { - const glsl_type *const element_type = type->fields.array; - - for (unsigned int i = 0; i < type->length; i++) { - const char *element_name = ralloc_asprintf(mem_ctx, "%s[%d]", name, i); - - set_uniform_initializer(mem_ctx, prog, element_name, - element_type, val->array_elements[i], - boolean_true); - } - return; - } - - struct gl_uniform_storage *const storage = - get_storage(prog->UniformStorage, - prog->NumUniformStorage, - name); - if (storage == NULL) { - assert(storage != NULL); - return; - } - - if (val->type->is_array()) { - const enum glsl_base_type base_type = - val->array_elements[0]->type->base_type; - const unsigned int elements = val->array_elements[0]->type->components(); - unsigned int idx = 0; - unsigned dmul = (base_type == GLSL_TYPE_DOUBLE) ? 2 : 1; - - assert(val->type->length >= storage->array_elements); - for (unsigned int i = 0; i < storage->array_elements; i++) { - copy_constant_to_storage(& storage->storage[idx], - val->array_elements[i], - base_type, - elements, - boolean_true); - - idx += elements * dmul; - } - } else { - copy_constant_to_storage(storage->storage, - val, - val->type->base_type, - val->type->components(), - boolean_true); - - if (storage->type->is_sampler()) { - for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { - gl_shader *shader = prog->_LinkedShaders[sh]; - - if (shader && storage->opaque[sh].active) { - unsigned index = storage->opaque[sh].index; - - shader->SamplerUnits[index] = storage->storage[0].i; - } - } - } - } - - storage->initialized = true; -} -} - -void -link_set_uniform_initializers(struct gl_shader_program *prog, - unsigned int boolean_true) -{ - void *mem_ctx = NULL; - - for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *shader = prog->_LinkedShaders[i]; - - if (shader == NULL) - continue; - - foreach_in_list(ir_instruction, node, shader->ir) { - ir_variable *const var = node->as_variable(); - - if (!var || (var->data.mode != ir_var_uniform && - var->data.mode != ir_var_shader_storage)) - continue; - - if (!mem_ctx) - mem_ctx = ralloc_context(NULL); - - if (var->data.explicit_binding) { - const glsl_type *const type = var->type; - - if (type->without_array()->is_sampler() || - type->without_array()->is_image()) { - int binding = var->data.binding; - linker::set_opaque_binding(mem_ctx, prog, var->type, - var->name, &binding); - } else if (var->is_in_buffer_block()) { - const glsl_type *const iface_type = var->get_interface_type(); - - /* If the variable is an array and it is an interface instance, - * we need to set the binding for each array element. Just - * checking that the variable is an array is not sufficient. - * The variable could be an array element of a uniform block - * that lacks an instance name. For example: - * - * uniform U { - * float f[4]; - * }; - * - * In this case "f" would pass is_in_buffer_block (above) and - * type->is_array(), but it will fail is_interface_instance(). - */ - if (var->is_interface_instance() && var->type->is_array()) { - for (unsigned i = 0; i < var->type->length; i++) { - const char *name = - ralloc_asprintf(mem_ctx, "%s[%u]", iface_type->name, i); - - /* Section 4.4.3 (Uniform Block Layout Qualifiers) of the - * GLSL 4.20 spec says: - * - * "If the binding identifier is used with a uniform - * block instanced as an array then the first element - * of the array takes the specified block binding and - * each subsequent element takes the next consecutive - * uniform block binding point." - */ - linker::set_block_binding(prog, name, - var->data.binding + i); - } - } else { - linker::set_block_binding(prog, iface_type->name, - var->data.binding); - } - } else if (type->contains_atomic()) { - /* we don't actually need to do anything. */ - } else { - assert(!"Explicit binding not on a sampler, UBO or atomic."); - } - } else if (var->constant_initializer) { - linker::set_uniform_initializer(mem_ctx, prog, var->name, - var->type, var->constant_initializer, - boolean_true); - } - } - } - - ralloc_free(mem_ctx); -} diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp deleted file mode 100644 index 33b2d4c8646..00000000000 --- a/src/glsl/link_uniforms.cpp +++ /dev/null @@ -1,1330 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "main/core.h" -#include "ir.h" -#include "linker.h" -#include "ir_uniform.h" -#include "glsl_symbol_table.h" -#include "program/hash_table.h" -#include "program.h" -#include "util/hash_table.h" - -/** - * \file link_uniforms.cpp - * Assign locations for GLSL uniforms. - * - * \author Ian Romanick - */ - -/** - * Used by linker to indicate uniforms that have no location set. - */ -#define UNMAPPED_UNIFORM_LOC ~0u - -/** - * Count the backing storage requirements for a type - */ -static unsigned -values_for_type(const glsl_type *type) -{ - if (type->is_sampler()) { - return 1; - } else if (type->is_array() && type->fields.array->is_sampler()) { - return type->array_size(); - } else { - return type->component_slots(); - } -} - -void -program_resource_visitor::process(const glsl_type *type, const char *name) -{ - assert(type->without_array()->is_record() - || type->without_array()->is_interface()); - - unsigned record_array_count = 1; - char *name_copy = ralloc_strdup(NULL, name); - unsigned packing = type->interface_packing; - - recursion(type, &name_copy, strlen(name), false, NULL, packing, false, - record_array_count); - ralloc_free(name_copy); -} - -void -program_resource_visitor::process(ir_variable *var) -{ - unsigned record_array_count = 1; - const glsl_type *t = var->type; - const glsl_type *t_without_array = var->type->without_array(); - const bool row_major = - var->data.matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; - - const unsigned packing = var->get_interface_type() ? - var->get_interface_type()->interface_packing : - var->type->interface_packing; - - /* false is always passed for the row_major parameter to the other - * processing functions because no information is available to do - * otherwise. See the warning in linker.h. - */ - - /* Only strdup the name if we actually will need to modify it. */ - if (var->data.from_named_ifc_block_array) { - /* lower_named_interface_blocks created this variable by lowering an - * interface block array to an array variable. For example if the - * original source code was: - * - * out Blk { vec4 bar } foo[3]; - * - * Then the variable is now: - * - * out vec4 bar[3]; - * - * We need to visit each array element using the names constructed like - * so: - * - * Blk[0].bar - * Blk[1].bar - * Blk[2].bar - */ - assert(t->is_array()); - const glsl_type *ifc_type = var->get_interface_type(); - char *name = ralloc_strdup(NULL, ifc_type->name); - size_t name_length = strlen(name); - for (unsigned i = 0; i < t->length; i++) { - size_t new_length = name_length; - ralloc_asprintf_rewrite_tail(&name, &new_length, "[%u].%s", i, - var->name); - /* Note: row_major is only meaningful for uniform blocks, and - * lowering is only applied to non-uniform interface blocks, so we - * can safely pass false for row_major. - */ - recursion(var->type, &name, new_length, row_major, NULL, packing, - false, record_array_count); - } - ralloc_free(name); - } else if (var->data.from_named_ifc_block_nonarray) { - /* lower_named_interface_blocks created this variable by lowering a - * named interface block (non-array) to an ordinary variable. For - * example if the original source code was: - * - * out Blk { vec4 bar } foo; - * - * Then the variable is now: - * - * out vec4 bar; - * - * We need to visit this variable using the name: - * - * Blk.bar - */ - const glsl_type *ifc_type = var->get_interface_type(); - char *name = ralloc_asprintf(NULL, "%s.%s", ifc_type->name, var->name); - /* Note: row_major is only meaningful for uniform blocks, and lowering - * is only applied to non-uniform interface blocks, so we can safely - * pass false for row_major. - */ - recursion(var->type, &name, strlen(name), row_major, NULL, packing, - false, record_array_count); - ralloc_free(name); - } else if (t_without_array->is_record() || - (t->is_array() && t->fields.array->is_array())) { - char *name = ralloc_strdup(NULL, var->name); - recursion(var->type, &name, strlen(name), row_major, NULL, packing, - false, record_array_count); - ralloc_free(name); - } else if (t_without_array->is_interface()) { - char *name = ralloc_strdup(NULL, t_without_array->name); - recursion(var->type, &name, strlen(name), row_major, NULL, packing, - false, record_array_count); - ralloc_free(name); - } else { - this->set_record_array_count(record_array_count); - this->visit_field(t, var->name, row_major, NULL, packing, false); - } -} - -void -program_resource_visitor::recursion(const glsl_type *t, char **name, - size_t name_length, bool row_major, - const glsl_type *record_type, - const unsigned packing, - bool last_field, - unsigned record_array_count) -{ - /* Records need to have each field processed individually. - * - * Arrays of records need to have each array element processed - * individually, then each field of the resulting array elements processed - * individually. - */ - if (t->is_record() || t->is_interface()) { - if (record_type == NULL && t->is_record()) - record_type = t; - - if (t->is_record()) - this->enter_record(t, *name, row_major, packing); - - for (unsigned i = 0; i < t->length; i++) { - const char *field = t->fields.structure[i].name; - size_t new_length = name_length; - - if (t->fields.structure[i].type->is_record()) - this->visit_field(&t->fields.structure[i]); - - /* Append '.field' to the current variable name. */ - if (name_length == 0) { - ralloc_asprintf_rewrite_tail(name, &new_length, "%s", field); - } else { - ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); - } - - /* The layout of structures at the top level of the block is set - * during parsing. For matrices contained in multiple levels of - * structures in the block, the inner structures have no layout. - * These cases must potentially inherit the layout from the outer - * levels. - */ - bool field_row_major = row_major; - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(t->fields.structure[i].matrix_layout); - if (matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR) { - field_row_major = true; - } else if (matrix_layout == GLSL_MATRIX_LAYOUT_COLUMN_MAJOR) { - field_row_major = false; - } - - recursion(t->fields.structure[i].type, name, new_length, - field_row_major, - record_type, - packing, - (i + 1) == t->length, record_array_count); - - /* Only the first leaf-field of the record gets called with the - * record type pointer. - */ - record_type = NULL; - } - - if (t->is_record()) { - (*name)[name_length] = '\0'; - this->leave_record(t, *name, row_major, packing); - } - } else if (t->without_array()->is_record() || - t->without_array()->is_interface() || - (t->is_array() && t->fields.array->is_array())) { - if (record_type == NULL && t->fields.array->is_record()) - record_type = t->fields.array; - - unsigned length = t->length; - /* Shader storage block unsized arrays: add subscript [0] to variable - * names */ - if (t->is_unsized_array()) - length = 1; - - record_array_count *= length; - - for (unsigned i = 0; i < length; i++) { - size_t new_length = name_length; - - /* Append the subscript to the current variable name */ - ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); - - recursion(t->fields.array, name, new_length, row_major, - record_type, - packing, - (i + 1) == t->length, record_array_count); - - /* Only the first leaf-field of the record gets called with the - * record type pointer. - */ - record_type = NULL; - } - } else { - this->set_record_array_count(record_array_count); - this->visit_field(t, *name, row_major, record_type, packing, last_field); - } -} - -void -program_resource_visitor::visit_field(const glsl_type *type, const char *name, - bool row_major, - const glsl_type *, - const unsigned, - bool /* last_field */) -{ - visit_field(type, name, row_major); -} - -void -program_resource_visitor::visit_field(const glsl_struct_field *field) -{ - (void) field; - /* empty */ -} - -void -program_resource_visitor::enter_record(const glsl_type *, const char *, bool, - const unsigned) -{ -} - -void -program_resource_visitor::leave_record(const glsl_type *, const char *, bool, - const unsigned) -{ -} - -void -program_resource_visitor::set_record_array_count(unsigned) -{ -} - -namespace { - -/** - * Class to help calculate the storage requirements for a set of uniforms - * - * As uniforms are added to the active set the number of active uniforms and - * the storage requirements for those uniforms are accumulated. The active - * uniforms are added to the hash table supplied to the constructor. - * - * If the same uniform is added multiple times (i.e., once for each shader - * target), it will only be accounted once. - */ -class count_uniform_size : public program_resource_visitor { -public: - count_uniform_size(struct string_to_uint_map *map, - struct string_to_uint_map *hidden_map) - : num_active_uniforms(0), num_hidden_uniforms(0), num_values(0), - num_shader_samplers(0), num_shader_images(0), - num_shader_uniform_components(0), num_shader_subroutines(0), - is_ubo_var(false), is_shader_storage(false), map(map), - hidden_map(hidden_map) - { - /* empty */ - } - - void start_shader() - { - this->num_shader_samplers = 0; - this->num_shader_images = 0; - this->num_shader_uniform_components = 0; - this->num_shader_subroutines = 0; - } - - void process(ir_variable *var) - { - this->current_var = var; - this->is_ubo_var = var->is_in_buffer_block(); - this->is_shader_storage = var->is_in_shader_storage_block(); - if (var->is_interface_instance()) - program_resource_visitor::process(var->get_interface_type(), - var->get_interface_type()->name); - else - program_resource_visitor::process(var); - } - - /** - * Total number of active uniforms counted - */ - unsigned num_active_uniforms; - - unsigned num_hidden_uniforms; - - /** - * Number of data values required to back the storage for the active uniforms - */ - unsigned num_values; - - /** - * Number of samplers used - */ - unsigned num_shader_samplers; - - /** - * Number of images used - */ - unsigned num_shader_images; - - /** - * Number of uniforms used in the current shader - */ - unsigned num_shader_uniform_components; - - /** - * Number of subroutine uniforms used - */ - unsigned num_shader_subroutines; - - bool is_ubo_var; - bool is_shader_storage; - - struct string_to_uint_map *map; - -private: - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major) - { - assert(!type->without_array()->is_record()); - assert(!type->without_array()->is_interface()); - assert(!(type->is_array() && type->fields.array->is_array())); - - (void) row_major; - - /* Count the number of samplers regardless of whether the uniform is - * already in the hash table. The hash table prevents adding the same - * uniform for multiple shader targets, but in this case we want to - * count it for each shader target. - */ - const unsigned values = values_for_type(type); - if (type->contains_subroutine()) { - this->num_shader_subroutines += values; - } else if (type->contains_sampler()) { - this->num_shader_samplers += values; - } else if (type->contains_image()) { - this->num_shader_images += values; - - /* As drivers are likely to represent image uniforms as - * scalar indices, count them against the limit of uniform - * components in the default block. The spec allows image - * uniforms to use up no more than one scalar slot. - */ - if(!is_shader_storage) - this->num_shader_uniform_components += values; - } else { - /* Accumulate the total number of uniform slots used by this shader. - * Note that samplers do not count against this limit because they - * don't use any storage on current hardware. - */ - if (!is_ubo_var && !is_shader_storage) - this->num_shader_uniform_components += values; - } - - /* If the uniform is already in the map, there's nothing more to do. - */ - unsigned id; - if (this->map->get(id, name)) - return; - - if (this->current_var->data.how_declared == ir_var_hidden) { - this->hidden_map->put(this->num_hidden_uniforms, name); - this->num_hidden_uniforms++; - } else { - this->map->put(this->num_active_uniforms-this->num_hidden_uniforms, - name); - } - - /* Each leaf uniform occupies one entry in the list of active - * uniforms. - */ - this->num_active_uniforms++; - this->num_values += values; - } - - struct string_to_uint_map *hidden_map; - - /** - * Current variable being processed. - */ - ir_variable *current_var; -}; - -} /* anonymous namespace */ - -/** - * Class to help parcel out pieces of backing storage to uniforms - * - * Each uniform processed has some range of the \c gl_constant_value - * structures associated with it. The association is done by finding - * the uniform in the \c string_to_uint_map and using the value from - * the map to connect that slot in the \c gl_uniform_storage table - * with the next available slot in the \c gl_constant_value array. - * - * \warning - * This class assumes that every uniform that will be processed is - * already in the \c string_to_uint_map. In addition, it assumes that - * the \c gl_uniform_storage and \c gl_constant_value arrays are "big - * enough." - */ -class parcel_out_uniform_storage : public program_resource_visitor { -public: - parcel_out_uniform_storage(struct string_to_uint_map *map, - struct gl_uniform_storage *uniforms, - union gl_constant_value *values) - : map(map), uniforms(uniforms), values(values) - { - } - - void start_shader(gl_shader_stage shader_type) - { - assert(shader_type < MESA_SHADER_STAGES); - this->shader_type = shader_type; - - this->shader_samplers_used = 0; - this->shader_shadow_samplers = 0; - this->next_sampler = 0; - this->next_image = 0; - this->next_subroutine = 0; - this->record_array_count = 1; - memset(this->targets, 0, sizeof(this->targets)); - } - - void set_and_process(struct gl_shader_program *prog, - ir_variable *var) - { - current_var = var; - field_counter = 0; - this->record_next_sampler = new string_to_uint_map; - - ubo_block_index = -1; - if (var->is_in_buffer_block()) { - if (var->is_interface_instance() && var->type->is_array()) { - unsigned l = strlen(var->get_interface_type()->name); - - for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { - if (strncmp(var->get_interface_type()->name, - prog->BufferInterfaceBlocks[i].Name, - l) == 0 - && prog->BufferInterfaceBlocks[i].Name[l] == '[') { - ubo_block_index = i; - break; - } - } - } else { - for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { - if (strcmp(var->get_interface_type()->name, - prog->BufferInterfaceBlocks[i].Name) == 0) { - ubo_block_index = i; - break; - } - } - } - assert(ubo_block_index != -1); - - /* Uniform blocks that were specified with an instance name must be - * handled a little bit differently. The name of the variable is the - * name used to reference the uniform block instead of being the name - * of a variable within the block. Therefore, searching for the name - * within the block will fail. - */ - if (var->is_interface_instance()) { - ubo_byte_offset = 0; - process(var->get_interface_type(), - var->get_interface_type()->name); - } else { - const struct gl_uniform_block *const block = - &prog->BufferInterfaceBlocks[ubo_block_index]; - - assert(var->data.location != -1); - - const struct gl_uniform_buffer_variable *const ubo_var = - &block->Uniforms[var->data.location]; - - ubo_byte_offset = ubo_var->Offset; - process(var); - } - } else { - /* Store any explicit location and reset data location so we can - * reuse this variable for storing the uniform slot number. - */ - this->explicit_location = current_var->data.location; - current_var->data.location = -1; - - process(var); - } - delete this->record_next_sampler; - } - - int ubo_block_index; - int ubo_byte_offset; - gl_shader_stage shader_type; - -private: - void handle_samplers(const glsl_type *base_type, - struct gl_uniform_storage *uniform, const char *name) - { - if (base_type->is_sampler()) { - uniform->opaque[shader_type].active = true; - - /* Handle multiple samplers inside struct arrays */ - if (this->record_array_count > 1) { - unsigned inner_array_size = MAX2(1, uniform->array_elements); - char *name_copy = ralloc_strdup(NULL, name); - - /* Remove all array subscripts from the sampler name */ - char *str_start; - const char *str_end; - while((str_start = strchr(name_copy, '[')) && - (str_end = strchr(name_copy, ']'))) { - memmove(str_start, str_end + 1, 1 + strlen(str_end)); - } - - unsigned index = 0; - if (this->record_next_sampler->get(index, name_copy)) { - /* In this case, we've already seen this uniform so we just use - * the next sampler index recorded the last time we visited. - */ - uniform->opaque[shader_type].index = index; - index = inner_array_size + uniform->opaque[shader_type].index; - this->record_next_sampler->put(index, name_copy); - - ralloc_free(name_copy); - /* Return as everything else has already been initialised in a - * previous pass. - */ - return; - } else { - /* We've never seen this uniform before so we need to allocate - * enough indices to store it. - * - * Nested struct arrays behave like arrays of arrays so we need - * to increase the index by the total number of elements of the - * sampler in case there is more than one sampler inside the - * structs. This allows the offset to be easily calculated for - * indirect indexing. - */ - uniform->opaque[shader_type].index = this->next_sampler; - this->next_sampler += - inner_array_size * this->record_array_count; - - /* Store the next index for future passes over the struct array - */ - index = uniform->opaque[shader_type].index + inner_array_size; - this->record_next_sampler->put(index, name_copy); - ralloc_free(name_copy); - } - } else { - /* Increment the sampler by 1 for non-arrays and by the number of - * array elements for arrays. - */ - uniform->opaque[shader_type].index = this->next_sampler; - this->next_sampler += MAX2(1, uniform->array_elements); - } - - const gl_texture_index target = base_type->sampler_index(); - const unsigned shadow = base_type->sampler_shadow; - for (unsigned i = uniform->opaque[shader_type].index; - i < MIN2(this->next_sampler, MAX_SAMPLERS); - i++) { - this->targets[i] = target; - this->shader_samplers_used |= 1U << i; - this->shader_shadow_samplers |= shadow << i; - } - } - } - - void handle_images(const glsl_type *base_type, - struct gl_uniform_storage *uniform) - { - if (base_type->is_image()) { - uniform->opaque[shader_type].index = this->next_image; - uniform->opaque[shader_type].active = true; - - /* Increment the image index by 1 for non-arrays and by the - * number of array elements for arrays. - */ - this->next_image += MAX2(1, uniform->array_elements); - - } - } - - void handle_subroutines(const glsl_type *base_type, - struct gl_uniform_storage *uniform) - { - if (base_type->is_subroutine()) { - uniform->opaque[shader_type].index = this->next_subroutine; - uniform->opaque[shader_type].active = true; - - /* Increment the subroutine index by 1 for non-arrays and by the - * number of array elements for arrays. - */ - this->next_subroutine += MAX2(1, uniform->array_elements); - - } - } - - virtual void set_record_array_count(unsigned record_array_count) - { - this->record_array_count = record_array_count; - } - - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major) - { - (void) type; - (void) name; - (void) row_major; - assert(!"Should not get here."); - } - - virtual void enter_record(const glsl_type *type, const char *, - bool row_major, const unsigned packing) { - assert(type->is_record()); - if (this->ubo_block_index == -1) - return; - if (packing == GLSL_INTERFACE_PACKING_STD430) - this->ubo_byte_offset = glsl_align( - this->ubo_byte_offset, type->std430_base_alignment(row_major)); - else - this->ubo_byte_offset = glsl_align( - this->ubo_byte_offset, type->std140_base_alignment(row_major)); - } - - virtual void leave_record(const glsl_type *type, const char *, - bool row_major, const unsigned packing) { - assert(type->is_record()); - if (this->ubo_block_index == -1) - return; - if (packing == GLSL_INTERFACE_PACKING_STD430) - this->ubo_byte_offset = glsl_align( - this->ubo_byte_offset, type->std430_base_alignment(row_major)); - else - this->ubo_byte_offset = glsl_align( - this->ubo_byte_offset, type->std140_base_alignment(row_major)); - } - - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major, const glsl_type *record_type, - const unsigned packing, - bool /* last_field */) - { - assert(!type->without_array()->is_record()); - assert(!type->without_array()->is_interface()); - assert(!(type->is_array() && type->fields.array->is_array())); - - unsigned id; - bool found = this->map->get(id, name); - assert(found); - - if (!found) - return; - - const glsl_type *base_type; - if (type->is_array()) { - this->uniforms[id].array_elements = type->length; - base_type = type->fields.array; - } else { - this->uniforms[id].array_elements = 0; - base_type = type; - } - - /* Initialise opaque data */ - this->uniforms[id].opaque[shader_type].index = ~0; - this->uniforms[id].opaque[shader_type].active = false; - - /* This assigns uniform indices to sampler and image uniforms. */ - handle_samplers(base_type, &this->uniforms[id], name); - handle_images(base_type, &this->uniforms[id]); - handle_subroutines(base_type, &this->uniforms[id]); - - /* For array of arrays or struct arrays the base location may have - * already been set so don't set it again. - */ - if (ubo_block_index == -1 && current_var->data.location == -1) { - current_var->data.location = id; - } - - /* If there is already storage associated with this uniform or if the - * uniform is set as builtin, it means that it was set while processing - * an earlier shader stage. For example, we may be processing the - * uniform in the fragment shader, but the uniform was already processed - * in the vertex shader. - */ - if (this->uniforms[id].storage != NULL || this->uniforms[id].builtin) { - return; - } - - /* Assign explicit locations. */ - if (current_var->data.explicit_location) { - /* Set sequential locations for struct fields. */ - if (current_var->type->without_array()->is_record() || - current_var->type->is_array_of_arrays()) { - const unsigned entries = MAX2(1, this->uniforms[id].array_elements); - this->uniforms[id].remap_location = - this->explicit_location + field_counter; - field_counter += entries; - } else { - this->uniforms[id].remap_location = this->explicit_location; - } - } else { - /* Initialize to to indicate that no location is set */ - this->uniforms[id].remap_location = UNMAPPED_UNIFORM_LOC; - } - - this->uniforms[id].name = ralloc_strdup(this->uniforms, name); - this->uniforms[id].type = base_type; - this->uniforms[id].initialized = 0; - this->uniforms[id].num_driver_storage = 0; - this->uniforms[id].driver_storage = NULL; - this->uniforms[id].atomic_buffer_index = -1; - this->uniforms[id].hidden = - current_var->data.how_declared == ir_var_hidden; - this->uniforms[id].builtin = is_gl_identifier(name); - - /* Do not assign storage if the uniform is builtin */ - if (!this->uniforms[id].builtin) - this->uniforms[id].storage = this->values; - - this->uniforms[id].is_shader_storage = - current_var->is_in_shader_storage_block(); - - if (this->ubo_block_index != -1) { - this->uniforms[id].block_index = this->ubo_block_index; - - unsigned alignment = type->std140_base_alignment(row_major); - if (packing == GLSL_INTERFACE_PACKING_STD430) - alignment = type->std430_base_alignment(row_major); - this->ubo_byte_offset = glsl_align(this->ubo_byte_offset, alignment); - this->uniforms[id].offset = this->ubo_byte_offset; - if (packing == GLSL_INTERFACE_PACKING_STD430) - this->ubo_byte_offset += type->std430_size(row_major); - else - this->ubo_byte_offset += type->std140_size(row_major); - - if (type->is_array()) { - if (packing == GLSL_INTERFACE_PACKING_STD430) - this->uniforms[id].array_stride = - type->without_array()->std430_array_stride(row_major); - else - this->uniforms[id].array_stride = - glsl_align(type->without_array()->std140_size(row_major), - 16); - } else { - this->uniforms[id].array_stride = 0; - } - - if (type->without_array()->is_matrix()) { - const glsl_type *matrix = type->without_array(); - const unsigned N = matrix->base_type == GLSL_TYPE_DOUBLE ? 8 : 4; - const unsigned items = - row_major ? matrix->matrix_columns : matrix->vector_elements; - - assert(items <= 4); - if (packing == GLSL_INTERFACE_PACKING_STD430) - this->uniforms[id].matrix_stride = items < 3 ? items * N : - glsl_align(items * N, 16); - else - this->uniforms[id].matrix_stride = glsl_align(items * N, 16); - this->uniforms[id].row_major = row_major; - } else { - this->uniforms[id].matrix_stride = 0; - this->uniforms[id].row_major = false; - } - } else { - this->uniforms[id].block_index = -1; - this->uniforms[id].offset = -1; - this->uniforms[id].array_stride = -1; - this->uniforms[id].matrix_stride = -1; - this->uniforms[id].row_major = false; - } - - this->values += values_for_type(type); - } - - struct string_to_uint_map *map; - - struct gl_uniform_storage *uniforms; - unsigned next_sampler; - unsigned next_image; - unsigned next_subroutine; - - /** - * Field counter is used to take care that uniform structures - * with explicit locations get sequential locations. - */ - unsigned field_counter; - - /** - * Current variable being processed. - */ - ir_variable *current_var; - - /* Used to store the explicit location from current_var so that we can - * reuse the location field for storing the uniform slot id. - */ - int explicit_location; - - /* Stores total struct array elements including nested structs */ - unsigned record_array_count; - - /* Map for temporarily storing next sampler index when handling samplers in - * struct arrays. - */ - struct string_to_uint_map *record_next_sampler; - -public: - union gl_constant_value *values; - - gl_texture_index targets[MAX_SAMPLERS]; - - /** - * Mask of samplers used by the current shader stage. - */ - unsigned shader_samplers_used; - - /** - * Mask of samplers used by the current shader stage for shadows. - */ - unsigned shader_shadow_samplers; -}; - -/** - * Merges a uniform block into an array of uniform blocks that may or - * may not already contain a copy of it. - * - * Returns the index of the new block in the array. - */ -int -link_cross_validate_uniform_block(void *mem_ctx, - struct gl_uniform_block **linked_blocks, - unsigned int *num_linked_blocks, - struct gl_uniform_block *new_block) -{ - for (unsigned int i = 0; i < *num_linked_blocks; i++) { - struct gl_uniform_block *old_block = &(*linked_blocks)[i]; - - if (strcmp(old_block->Name, new_block->Name) == 0) - return link_uniform_blocks_are_compatible(old_block, new_block) - ? i : -1; - } - - *linked_blocks = reralloc(mem_ctx, *linked_blocks, - struct gl_uniform_block, - *num_linked_blocks + 1); - int linked_block_index = (*num_linked_blocks)++; - struct gl_uniform_block *linked_block = &(*linked_blocks)[linked_block_index]; - - memcpy(linked_block, new_block, sizeof(*new_block)); - linked_block->Uniforms = ralloc_array(*linked_blocks, - struct gl_uniform_buffer_variable, - linked_block->NumUniforms); - - memcpy(linked_block->Uniforms, - new_block->Uniforms, - sizeof(*linked_block->Uniforms) * linked_block->NumUniforms); - - for (unsigned int i = 0; i < linked_block->NumUniforms; i++) { - struct gl_uniform_buffer_variable *ubo_var = - &linked_block->Uniforms[i]; - - if (ubo_var->Name == ubo_var->IndexName) { - ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); - ubo_var->IndexName = ubo_var->Name; - } else { - ubo_var->Name = ralloc_strdup(*linked_blocks, ubo_var->Name); - ubo_var->IndexName = ralloc_strdup(*linked_blocks, ubo_var->IndexName); - } - } - - return linked_block_index; -} - -/** - * Walks the IR and update the references to uniform blocks in the - * ir_variables to point at linked shader's list (previously, they - * would point at the uniform block list in one of the pre-linked - * shaders). - */ -static void -link_update_uniform_buffer_variables(struct gl_shader *shader) -{ - foreach_in_list(ir_instruction, node, shader->ir) { - ir_variable *const var = node->as_variable(); - - if ((var == NULL) || !var->is_in_buffer_block()) - continue; - - assert(var->data.mode == ir_var_uniform || - var->data.mode == ir_var_shader_storage); - - if (var->is_interface_instance()) { - var->data.location = 0; - continue; - } - - bool found = false; - char sentinel = '\0'; - - if (var->type->is_record()) { - sentinel = '.'; - } else if (var->type->is_array() && (var->type->fields.array->is_array() - || var->type->without_array()->is_record())) { - sentinel = '['; - } - - const unsigned l = strlen(var->name); - for (unsigned i = 0; i < shader->NumBufferInterfaceBlocks; i++) { - for (unsigned j = 0; j < shader->BufferInterfaceBlocks[i].NumUniforms; j++) { - if (sentinel) { - const char *begin = shader->BufferInterfaceBlocks[i].Uniforms[j].Name; - const char *end = strchr(begin, sentinel); - - if (end == NULL) - continue; - - if ((ptrdiff_t) l != (end - begin)) - continue; - - if (strncmp(var->name, begin, l) == 0) { - found = true; - var->data.location = j; - break; - } - } else if (!strcmp(var->name, - shader->BufferInterfaceBlocks[i].Uniforms[j].Name)) { - found = true; - var->data.location = j; - break; - } - } - if (found) - break; - } - assert(found); - } -} - -static void -link_set_image_access_qualifiers(struct gl_shader_program *prog, - gl_shader *sh, unsigned shader_stage, - ir_variable *var, const glsl_type *type, - char **name, size_t name_length) -{ - /* Handle arrays of arrays */ - if (type->is_array() && type->fields.array->is_array()) { - for (unsigned i = 0; i < type->length; i++) { - size_t new_length = name_length; - - /* Append the subscript to the current variable name */ - ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); - - link_set_image_access_qualifiers(prog, sh, shader_stage, var, - type->fields.array, name, - new_length); - } - } else { - unsigned id = 0; - bool found = prog->UniformHash->get(id, *name); - assert(found); - (void) found; - const gl_uniform_storage *storage = &prog->UniformStorage[id]; - const unsigned index = storage->opaque[shader_stage].index; - const GLenum access = (var->data.image_read_only ? GL_READ_ONLY : - var->data.image_write_only ? GL_WRITE_ONLY : - GL_READ_WRITE); - - for (unsigned j = 0; j < MAX2(1, storage->array_elements); ++j) - sh->ImageAccess[index + j] = access; - } -} - -/** - * Combine the hidden uniform hash map with the uniform hash map so that the - * hidden uniforms will be given indicies at the end of the uniform storage - * array. - */ -static void -assign_hidden_uniform_slot_id(const char *name, unsigned hidden_id, - void *closure) -{ - count_uniform_size *uniform_size = (count_uniform_size *) closure; - unsigned hidden_uniform_start = uniform_size->num_active_uniforms - - uniform_size->num_hidden_uniforms; - - uniform_size->map->put(hidden_uniform_start + hidden_id, name); -} - -void -link_assign_uniform_locations(struct gl_shader_program *prog, - unsigned int boolean_true) -{ - ralloc_free(prog->UniformStorage); - prog->UniformStorage = NULL; - prog->NumUniformStorage = 0; - - if (prog->UniformHash != NULL) { - prog->UniformHash->clear(); - } else { - prog->UniformHash = new string_to_uint_map; - } - - /* First pass: Count the uniform resources used by the user-defined - * uniforms. While this happens, each active uniform will have an index - * assigned to it. - * - * Note: this is *NOT* the index that is returned to the application by - * glGetUniformLocation. - */ - struct string_to_uint_map *hiddenUniforms = new string_to_uint_map; - count_uniform_size uniform_size(prog->UniformHash, hiddenUniforms); - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *sh = prog->_LinkedShaders[i]; - - if (sh == NULL) - continue; - - /* Uniforms that lack an initializer in the shader code have an initial - * value of zero. This includes sampler uniforms. - * - * Page 24 (page 30 of the PDF) of the GLSL 1.20 spec says: - * - * "The link time initial value is either the value of the variable's - * initializer, if present, or 0 if no initializer is present. Sampler - * types cannot have initializers." - */ - memset(sh->SamplerUnits, 0, sizeof(sh->SamplerUnits)); - memset(sh->ImageUnits, 0, sizeof(sh->ImageUnits)); - - link_update_uniform_buffer_variables(sh); - - /* Reset various per-shader target counts. - */ - uniform_size.start_shader(); - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *const var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != ir_var_uniform && - var->data.mode != ir_var_shader_storage)) - continue; - - uniform_size.process(var); - } - - sh->num_samplers = uniform_size.num_shader_samplers; - sh->NumImages = uniform_size.num_shader_images; - sh->num_uniform_components = uniform_size.num_shader_uniform_components; - sh->num_combined_uniform_components = sh->num_uniform_components; - - for (unsigned i = 0; i < sh->NumBufferInterfaceBlocks; i++) { - if (!sh->BufferInterfaceBlocks[i].IsShaderStorage) { - sh->num_combined_uniform_components += - sh->BufferInterfaceBlocks[i].UniformBufferSize / 4; - } - } - } - - const unsigned num_uniforms = uniform_size.num_active_uniforms; - const unsigned num_data_slots = uniform_size.num_values; - const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms; - - /* assign hidden uniforms a slot id */ - hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size); - delete hiddenUniforms; - - /* On the outside chance that there were no uniforms, bail out. - */ - if (num_uniforms == 0) - return; - - struct gl_uniform_storage *uniforms = - rzalloc_array(prog, struct gl_uniform_storage, num_uniforms); - union gl_constant_value *data = - rzalloc_array(uniforms, union gl_constant_value, num_data_slots); -#ifndef NDEBUG - union gl_constant_value *data_end = &data[num_data_slots]; -#endif - - parcel_out_uniform_storage parcel(prog->UniformHash, uniforms, data); - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - parcel.start_shader((gl_shader_stage)i); - - foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) { - ir_variable *const var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != ir_var_uniform && - var->data.mode != ir_var_shader_storage)) - continue; - - parcel.set_and_process(prog, var); - } - - prog->_LinkedShaders[i]->active_samplers = parcel.shader_samplers_used; - prog->_LinkedShaders[i]->shadow_samplers = parcel.shader_shadow_samplers; - - STATIC_ASSERT(sizeof(prog->_LinkedShaders[i]->SamplerTargets) == - sizeof(parcel.targets)); - memcpy(prog->_LinkedShaders[i]->SamplerTargets, parcel.targets, - sizeof(prog->_LinkedShaders[i]->SamplerTargets)); - } - - /* Reserve all the explicit locations of the active uniforms. */ - for (unsigned i = 0; i < num_uniforms; i++) { - if (uniforms[i].type->is_subroutine() || - uniforms[i].is_shader_storage) - continue; - - if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) { - /* How many new entries for this uniform? */ - const unsigned entries = MAX2(1, uniforms[i].array_elements); - - /* Set remap table entries point to correct gl_uniform_storage. */ - for (unsigned j = 0; j < entries; j++) { - unsigned element_loc = uniforms[i].remap_location + j; - assert(prog->UniformRemapTable[element_loc] == - INACTIVE_UNIFORM_EXPLICIT_LOCATION); - prog->UniformRemapTable[element_loc] = &uniforms[i]; - } - } - } - - /* Reserve locations for rest of the uniforms. */ - for (unsigned i = 0; i < num_uniforms; i++) { - - if (uniforms[i].type->is_subroutine() || - uniforms[i].is_shader_storage) - continue; - - /* Built-in uniforms should not get any location. */ - if (uniforms[i].builtin) - continue; - - /* Explicit ones have been set already. */ - if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) - continue; - - /* how many new entries for this uniform? */ - const unsigned entries = MAX2(1, uniforms[i].array_elements); - - /* resize remap table to fit new entries */ - prog->UniformRemapTable = - reralloc(prog, - prog->UniformRemapTable, - gl_uniform_storage *, - prog->NumUniformRemapTable + entries); - - /* set pointers for this uniform */ - for (unsigned j = 0; j < entries; j++) - prog->UniformRemapTable[prog->NumUniformRemapTable+j] = &uniforms[i]; - - /* set the base location in remap table for the uniform */ - uniforms[i].remap_location = prog->NumUniformRemapTable; - - prog->NumUniformRemapTable += entries; - } - - /* Reserve all the explicit locations of the active subroutine uniforms. */ - for (unsigned i = 0; i < num_uniforms; i++) { - if (!uniforms[i].type->is_subroutine()) - continue; - - if (uniforms[i].remap_location == UNMAPPED_UNIFORM_LOC) - continue; - - for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { - struct gl_shader *sh = prog->_LinkedShaders[j]; - if (!sh) - continue; - - if (!uniforms[i].opaque[j].active) - continue; - - /* How many new entries for this uniform? */ - const unsigned entries = MAX2(1, uniforms[i].array_elements); - - /* Set remap table entries point to correct gl_uniform_storage. */ - for (unsigned k = 0; k < entries; k++) { - unsigned element_loc = uniforms[i].remap_location + k; - assert(sh->SubroutineUniformRemapTable[element_loc] == - INACTIVE_UNIFORM_EXPLICIT_LOCATION); - sh->SubroutineUniformRemapTable[element_loc] = &uniforms[i]; - } - } - } - - /* reserve subroutine locations */ - for (unsigned i = 0; i < num_uniforms; i++) { - - if (!uniforms[i].type->is_subroutine()) - continue; - const unsigned entries = MAX2(1, uniforms[i].array_elements); - - if (uniforms[i].remap_location != UNMAPPED_UNIFORM_LOC) - continue; - for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { - struct gl_shader *sh = prog->_LinkedShaders[j]; - if (!sh) - continue; - - if (!uniforms[i].opaque[j].active) - continue; - - sh->SubroutineUniformRemapTable = - reralloc(sh, - sh->SubroutineUniformRemapTable, - gl_uniform_storage *, - sh->NumSubroutineUniformRemapTable + entries); - - for (unsigned k = 0; k < entries; k++) - sh->SubroutineUniformRemapTable[sh->NumSubroutineUniformRemapTable + k] = &uniforms[i]; - uniforms[i].remap_location = sh->NumSubroutineUniformRemapTable; - sh->NumSubroutineUniformRemapTable += entries; - } - } - -#ifndef NDEBUG - for (unsigned i = 0; i < num_uniforms; i++) { - assert(uniforms[i].storage != NULL || uniforms[i].builtin); - } - - assert(parcel.values == data_end); -#endif - - prog->NumUniformStorage = num_uniforms; - prog->NumHiddenUniforms = hidden_uniforms; - prog->UniformStorage = uniforms; - - /** - * Scan the program for image uniforms and store image unit access - * information into the gl_shader data structure. - */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - gl_shader *sh = prog->_LinkedShaders[i]; - - if (sh == NULL) - continue; - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); - - if (var && var->data.mode == ir_var_uniform && - var->type->contains_image()) { - char *name_copy = ralloc_strdup(NULL, var->name); - link_set_image_access_qualifiers(prog, sh, i, var, var->type, - &name_copy, strlen(var->name)); - ralloc_free(name_copy); - } - } - } - - link_set_uniform_initializers(prog, boolean_true); - - return; -} diff --git a/src/glsl/link_varyings.cpp b/src/glsl/link_varyings.cpp deleted file mode 100644 index 264b69ca619..00000000000 --- a/src/glsl/link_varyings.cpp +++ /dev/null @@ -1,1888 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file link_varyings.cpp - * - * Linker functions related specifically to linking varyings between shader - * stages. - */ - - -#include "main/mtypes.h" -#include "glsl_symbol_table.h" -#include "glsl_parser_extras.h" -#include "ir_optimization.h" -#include "linker.h" -#include "link_varyings.h" -#include "main/macros.h" -#include "program/hash_table.h" -#include "program.h" - - -/** - * Get the varying type stripped of the outermost array if we're processing - * a stage whose varyings are arrays indexed by a vertex number (such as - * geometry shader inputs). - */ -static const glsl_type * -get_varying_type(const ir_variable *var, gl_shader_stage stage) -{ - const glsl_type *type = var->type; - - if (!var->data.patch && - ((var->data.mode == ir_var_shader_out && - stage == MESA_SHADER_TESS_CTRL) || - (var->data.mode == ir_var_shader_in && - (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL || - stage == MESA_SHADER_GEOMETRY)))) { - assert(type->is_array()); - type = type->fields.array; - } - - return type; -} - -/** - * Validate the types and qualifiers of an output from one stage against the - * matching input to another stage. - */ -static void -cross_validate_types_and_qualifiers(struct gl_shader_program *prog, - const ir_variable *input, - const ir_variable *output, - gl_shader_stage consumer_stage, - gl_shader_stage producer_stage) -{ - /* Check that the types match between stages. - */ - const glsl_type *type_to_match = input->type; - - /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ - const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX && - consumer_stage != MESA_SHADER_FRAGMENT) || - consumer_stage == MESA_SHADER_GEOMETRY; - if (extra_array_level) { - assert(type_to_match->is_array()); - type_to_match = type_to_match->fields.array; - } - - if (type_to_match != output->type) { - /* There is a bit of a special case for gl_TexCoord. This - * built-in is unsized by default. Applications that variable - * access it must redeclare it with a size. There is some - * language in the GLSL spec that implies the fragment shader - * and vertex shader do not have to agree on this size. Other - * driver behave this way, and one or two applications seem to - * rely on it. - * - * Neither declaration needs to be modified here because the array - * sizes are fixed later when update_array_sizes is called. - * - * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: - * - * "Unlike user-defined varying variables, the built-in - * varying variables don't have a strict one-to-one - * correspondence between the vertex language and the - * fragment language." - */ - if (!output->type->is_array() || !is_gl_identifier(output->name)) { - linker_error(prog, - "%s shader output `%s' declared as type `%s', " - "but %s shader input declared as type `%s'\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - output->type->name, - _mesa_shader_stage_to_string(consumer_stage), - input->type->name); - return; - } - } - - /* Check that all of the qualifiers match between stages. - */ - if (input->data.centroid != output->data.centroid) { - linker_error(prog, - "%s shader output `%s' %s centroid qualifier, " - "but %s shader input %s centroid qualifier\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - (output->data.centroid) ? "has" : "lacks", - _mesa_shader_stage_to_string(consumer_stage), - (input->data.centroid) ? "has" : "lacks"); - return; - } - - if (input->data.sample != output->data.sample) { - linker_error(prog, - "%s shader output `%s' %s sample qualifier, " - "but %s shader input %s sample qualifier\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - (output->data.sample) ? "has" : "lacks", - _mesa_shader_stage_to_string(consumer_stage), - (input->data.sample) ? "has" : "lacks"); - return; - } - - if (input->data.patch != output->data.patch) { - linker_error(prog, - "%s shader output `%s' %s patch qualifier, " - "but %s shader input %s patch qualifier\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - (output->data.patch) ? "has" : "lacks", - _mesa_shader_stage_to_string(consumer_stage), - (input->data.patch) ? "has" : "lacks"); - return; - } - - if (!prog->IsES && input->data.invariant != output->data.invariant) { - linker_error(prog, - "%s shader output `%s' %s invariant qualifier, " - "but %s shader input %s invariant qualifier\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - (output->data.invariant) ? "has" : "lacks", - _mesa_shader_stage_to_string(consumer_stage), - (input->data.invariant) ? "has" : "lacks"); - return; - } - - /* GLSL >= 4.40 removes text requiring interpolation qualifiers - * to match cross stage, they must only match within the same stage. - * - * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec: - * - * "It is a link-time error if, within the same stage, the interpolation - * qualifiers of variables of the same name do not match. - * - */ - if (input->data.interpolation != output->data.interpolation && - prog->Version < 440) { - linker_error(prog, - "%s shader output `%s' specifies %s " - "interpolation qualifier, " - "but %s shader input specifies %s " - "interpolation qualifier\n", - _mesa_shader_stage_to_string(producer_stage), - output->name, - interpolation_string(output->data.interpolation), - _mesa_shader_stage_to_string(consumer_stage), - interpolation_string(input->data.interpolation)); - return; - } -} - -/** - * Validate front and back color outputs against single color input - */ -static void -cross_validate_front_and_back_color(struct gl_shader_program *prog, - const ir_variable *input, - const ir_variable *front_color, - const ir_variable *back_color, - gl_shader_stage consumer_stage, - gl_shader_stage producer_stage) -{ - if (front_color != NULL && front_color->data.assigned) - cross_validate_types_and_qualifiers(prog, input, front_color, - consumer_stage, producer_stage); - - if (back_color != NULL && back_color->data.assigned) - cross_validate_types_and_qualifiers(prog, input, back_color, - consumer_stage, producer_stage); -} - -/** - * Validate that outputs from one stage match inputs of another - */ -void -cross_validate_outputs_to_inputs(struct gl_shader_program *prog, - gl_shader *producer, gl_shader *consumer) -{ - glsl_symbol_table parameters; - ir_variable *explicit_locations[MAX_VARYING] = { NULL, }; - - /* Find all shader outputs in the "producer" stage. - */ - foreach_in_list(ir_instruction, node, producer->ir) { - ir_variable *const var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != ir_var_shader_out)) - continue; - - if (!var->data.explicit_location - || var->data.location < VARYING_SLOT_VAR0) - parameters.add_variable(var); - else { - /* User-defined varyings with explicit locations are handled - * differently because they do not need to have matching names. - */ - const unsigned idx = var->data.location - VARYING_SLOT_VAR0; - - if (explicit_locations[idx] != NULL) { - linker_error(prog, - "%s shader has multiple outputs explicitly " - "assigned to location %d\n", - _mesa_shader_stage_to_string(producer->Stage), - idx); - return; - } - - explicit_locations[idx] = var; - } - } - - - /* Find all shader inputs in the "consumer" stage. Any variables that have - * matching outputs already in the symbol table must have the same type and - * qualifiers. - * - * Exception: if the consumer is the geometry shader, then the inputs - * should be arrays and the type of the array element should match the type - * of the corresponding producer output. - */ - foreach_in_list(ir_instruction, node, consumer->ir) { - ir_variable *const input = node->as_variable(); - - if ((input == NULL) || (input->data.mode != ir_var_shader_in)) - continue; - - if (strcmp(input->name, "gl_Color") == 0 && input->data.used) { - const ir_variable *const front_color = - parameters.get_variable("gl_FrontColor"); - - const ir_variable *const back_color = - parameters.get_variable("gl_BackColor"); - - cross_validate_front_and_back_color(prog, input, - front_color, back_color, - consumer->Stage, producer->Stage); - } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { - const ir_variable *const front_color = - parameters.get_variable("gl_FrontSecondaryColor"); - - const ir_variable *const back_color = - parameters.get_variable("gl_BackSecondaryColor"); - - cross_validate_front_and_back_color(prog, input, - front_color, back_color, - consumer->Stage, producer->Stage); - } else { - /* The rules for connecting inputs and outputs change in the presence - * of explicit locations. In this case, we no longer care about the - * names of the variables. Instead, we care only about the - * explicitly assigned location. - */ - ir_variable *output = NULL; - if (input->data.explicit_location - && input->data.location >= VARYING_SLOT_VAR0) { - output = explicit_locations[input->data.location - VARYING_SLOT_VAR0]; - - if (output == NULL) { - linker_error(prog, - "%s shader input `%s' with explicit location " - "has no matching output\n", - _mesa_shader_stage_to_string(consumer->Stage), - input->name); - } - } else { - output = parameters.get_variable(input->name); - } - - if (output != NULL) { - cross_validate_types_and_qualifiers(prog, input, output, - consumer->Stage, producer->Stage); - } else { - /* Check for input vars with unmatched output vars in prev stage - * taking into account that interface blocks could have a matching - * output but with different name, so we ignore them. - */ - assert(!input->data.assigned); - if (input->data.used && !input->get_interface_type() && - !input->data.explicit_location && !prog->SeparateShader) - linker_error(prog, - "%s shader input `%s' " - "has no matching output in the previous stage\n", - _mesa_shader_stage_to_string(consumer->Stage), - input->name); - } - } - } -} - -/** - * Demote shader inputs and outputs that are not used in other stages, and - * remove them via dead code elimination. - */ -void -remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, - gl_shader *sh, - enum ir_variable_mode mode) -{ - if (is_separate_shader_object) - return; - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *const var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != int(mode))) - continue; - - /* A shader 'in' or 'out' variable is only really an input or output if - * its value is used by other shader stages. This will cause the - * variable to have a location assigned. - */ - if (var->data.is_unmatched_generic_inout) { - assert(var->data.mode != ir_var_temporary); - var->data.mode = ir_var_auto; - } - } - - /* Eliminate code that is now dead due to unused inputs/outputs being - * demoted. - */ - while (do_dead_code(sh->ir, false)) - ; - -} - -/** - * Initialize this object based on a string that was passed to - * glTransformFeedbackVaryings. - * - * If the input is mal-formed, this call still succeeds, but it sets - * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var() - * will fail to find any matching variable. - */ -void -tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx, - const char *input) -{ - /* We don't have to be pedantic about what is a valid GLSL variable name, - * because any variable with an invalid name can't exist in the IR anyway. - */ - - this->location = -1; - this->orig_name = input; - this->lowered_builtin_array_variable = none; - this->skip_components = 0; - this->next_buffer_separator = false; - this->matched_candidate = NULL; - this->stream_id = 0; - - if (ctx->Extensions.ARB_transform_feedback3) { - /* Parse gl_NextBuffer. */ - if (strcmp(input, "gl_NextBuffer") == 0) { - this->next_buffer_separator = true; - return; - } - - /* Parse gl_SkipComponents. */ - if (strcmp(input, "gl_SkipComponents1") == 0) - this->skip_components = 1; - else if (strcmp(input, "gl_SkipComponents2") == 0) - this->skip_components = 2; - else if (strcmp(input, "gl_SkipComponents3") == 0) - this->skip_components = 3; - else if (strcmp(input, "gl_SkipComponents4") == 0) - this->skip_components = 4; - - if (this->skip_components) - return; - } - - /* Parse a declaration. */ - const char *base_name_end; - long subscript = parse_program_resource_name(input, &base_name_end); - this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input); - if (this->var_name == NULL) { - _mesa_error_no_memory(__func__); - return; - } - - if (subscript >= 0) { - this->array_subscript = subscript; - this->is_subscripted = true; - } else { - this->is_subscripted = false; - } - - /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this - * class must behave specially to account for the fact that gl_ClipDistance - * is converted from a float[8] to a vec4[2]. - */ - if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerClipDistance && - strcmp(this->var_name, "gl_ClipDistance") == 0) { - this->lowered_builtin_array_variable = clip_distance; - } - - if (ctx->Const.LowerTessLevel && - (strcmp(this->var_name, "gl_TessLevelOuter") == 0)) - this->lowered_builtin_array_variable = tess_level_outer; - if (ctx->Const.LowerTessLevel && - (strcmp(this->var_name, "gl_TessLevelInner") == 0)) - this->lowered_builtin_array_variable = tess_level_inner; -} - - -/** - * Determine whether two tfeedback_decl objects refer to the same variable and - * array index (if applicable). - */ -bool -tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y) -{ - assert(x.is_varying() && y.is_varying()); - - if (strcmp(x.var_name, y.var_name) != 0) - return false; - if (x.is_subscripted != y.is_subscripted) - return false; - if (x.is_subscripted && x.array_subscript != y.array_subscript) - return false; - return true; -} - - -/** - * Assign a location and stream ID for this tfeedback_decl object based on the - * transform feedback candidate found by find_candidate. - * - * If an error occurs, the error is reported through linker_error() and false - * is returned. - */ -bool -tfeedback_decl::assign_location(struct gl_context *ctx, - struct gl_shader_program *prog) -{ - assert(this->is_varying()); - - unsigned fine_location - = this->matched_candidate->toplevel_var->data.location * 4 - + this->matched_candidate->toplevel_var->data.location_frac - + this->matched_candidate->offset; - - if (this->matched_candidate->type->is_array()) { - /* Array variable */ - const unsigned matrix_cols = - this->matched_candidate->type->fields.array->matrix_columns; - const unsigned vector_elements = - this->matched_candidate->type->fields.array->vector_elements; - const unsigned dmul = - this->matched_candidate->type->fields.array->is_double() ? 2 : 1; - unsigned actual_array_size; - switch (this->lowered_builtin_array_variable) { - case clip_distance: - actual_array_size = prog->LastClipDistanceArraySize; - break; - case tess_level_outer: - actual_array_size = 4; - break; - case tess_level_inner: - actual_array_size = 2; - break; - case none: - default: - actual_array_size = this->matched_candidate->type->array_size(); - break; - } - - if (this->is_subscripted) { - /* Check array bounds. */ - if (this->array_subscript >= actual_array_size) { - linker_error(prog, "Transform feedback varying %s has index " - "%i, but the array size is %u.", - this->orig_name, this->array_subscript, - actual_array_size); - return false; - } - unsigned array_elem_size = this->lowered_builtin_array_variable ? - 1 : vector_elements * matrix_cols * dmul; - fine_location += array_elem_size * this->array_subscript; - this->size = 1; - } else { - this->size = actual_array_size; - } - this->vector_elements = vector_elements; - this->matrix_columns = matrix_cols; - if (this->lowered_builtin_array_variable) - this->type = GL_FLOAT; - else - this->type = this->matched_candidate->type->fields.array->gl_type; - } else { - /* Regular variable (scalar, vector, or matrix) */ - if (this->is_subscripted) { - linker_error(prog, "Transform feedback varying %s requested, " - "but %s is not an array.", - this->orig_name, this->var_name); - return false; - } - this->size = 1; - this->vector_elements = this->matched_candidate->type->vector_elements; - this->matrix_columns = this->matched_candidate->type->matrix_columns; - this->type = this->matched_candidate->type->gl_type; - } - this->location = fine_location / 4; - this->location_frac = fine_location % 4; - - /* From GL_EXT_transform_feedback: - * A program will fail to link if: - * - * * the total number of components to capture in any varying - * variable in is greater than the constant - * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the - * buffer mode is SEPARATE_ATTRIBS_EXT; - */ - if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && - this->num_components() > - ctx->Const.MaxTransformFeedbackSeparateComponents) { - linker_error(prog, "Transform feedback varying %s exceeds " - "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.", - this->orig_name); - return false; - } - - /* Only transform feedback varyings can be assigned to non-zero streams, - * so assign the stream id here. - */ - this->stream_id = this->matched_candidate->toplevel_var->data.stream; - - return true; -} - - -unsigned -tfeedback_decl::get_num_outputs() const -{ - if (!this->is_varying()) { - return 0; - } - return (this->num_components() + this->location_frac + 3)/4; -} - - -/** - * Update gl_transform_feedback_info to reflect this tfeedback_decl. - * - * If an error occurs, the error is reported through linker_error() and false - * is returned. - */ -bool -tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, - struct gl_transform_feedback_info *info, - unsigned buffer, const unsigned max_outputs) const -{ - assert(!this->next_buffer_separator); - - /* Handle gl_SkipComponents. */ - if (this->skip_components) { - info->BufferStride[buffer] += this->skip_components; - return true; - } - - /* From GL_EXT_transform_feedback: - * A program will fail to link if: - * - * * the total number of components to capture is greater than - * the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT - * and the buffer mode is INTERLEAVED_ATTRIBS_EXT. - */ - if (prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS && - info->BufferStride[buffer] + this->num_components() > - ctx->Const.MaxTransformFeedbackInterleavedComponents) { - linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " - "limit has been exceeded."); - return false; - } - - unsigned location = this->location; - unsigned location_frac = this->location_frac; - unsigned num_components = this->num_components(); - while (num_components > 0) { - unsigned output_size = MIN2(num_components, 4 - location_frac); - assert(info->NumOutputs < max_outputs); - info->Outputs[info->NumOutputs].ComponentOffset = location_frac; - info->Outputs[info->NumOutputs].OutputRegister = location; - info->Outputs[info->NumOutputs].NumComponents = output_size; - info->Outputs[info->NumOutputs].StreamId = stream_id; - info->Outputs[info->NumOutputs].OutputBuffer = buffer; - info->Outputs[info->NumOutputs].DstOffset = info->BufferStride[buffer]; - ++info->NumOutputs; - info->BufferStride[buffer] += output_size; - info->BufferStream[buffer] = this->stream_id; - num_components -= output_size; - location++; - location_frac = 0; - } - - info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, this->orig_name); - info->Varyings[info->NumVarying].Type = this->type; - info->Varyings[info->NumVarying].Size = this->size; - info->NumVarying++; - - return true; -} - - -const tfeedback_candidate * -tfeedback_decl::find_candidate(gl_shader_program *prog, - hash_table *tfeedback_candidates) -{ - const char *name = this->var_name; - switch (this->lowered_builtin_array_variable) { - case none: - name = this->var_name; - break; - case clip_distance: - name = "gl_ClipDistanceMESA"; - break; - case tess_level_outer: - name = "gl_TessLevelOuterMESA"; - break; - case tess_level_inner: - name = "gl_TessLevelInnerMESA"; - break; - } - this->matched_candidate = (const tfeedback_candidate *) - hash_table_find(tfeedback_candidates, name); - if (!this->matched_candidate) { - /* From GL_EXT_transform_feedback: - * A program will fail to link if: - * - * * any variable name specified in the array is not - * declared as an output in the geometry shader (if present) or - * the vertex shader (if no geometry shader is present); - */ - linker_error(prog, "Transform feedback varying %s undeclared.", - this->orig_name); - } - return this->matched_candidate; -} - - -/** - * Parse all the transform feedback declarations that were passed to - * glTransformFeedbackVaryings() and store them in tfeedback_decl objects. - * - * If an error occurs, the error is reported through linker_error() and false - * is returned. - */ -bool -parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, - const void *mem_ctx, unsigned num_names, - char **varying_names, tfeedback_decl *decls) -{ - for (unsigned i = 0; i < num_names; ++i) { - decls[i].init(ctx, mem_ctx, varying_names[i]); - - if (!decls[i].is_varying()) - continue; - - /* From GL_EXT_transform_feedback: - * A program will fail to link if: - * - * * any two entries in the array specify the same varying - * variable; - * - * We interpret this to mean "any two entries in the array - * specify the same varying variable and array index", since transform - * feedback of arrays would be useless otherwise. - */ - for (unsigned j = 0; j < i; ++j) { - if (!decls[j].is_varying()) - continue; - - if (tfeedback_decl::is_same(decls[i], decls[j])) { - linker_error(prog, "Transform feedback varying %s specified " - "more than once.", varying_names[i]); - return false; - } - } - } - return true; -} - - -/** - * Store transform feedback location assignments into - * prog->LinkedTransformFeedback based on the data stored in tfeedback_decls. - * - * If an error occurs, the error is reported through linker_error() and false - * is returned. - */ -bool -store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, - unsigned num_tfeedback_decls, - tfeedback_decl *tfeedback_decls) -{ - bool separate_attribs_mode = - prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; - - ralloc_free(prog->LinkedTransformFeedback.Varyings); - ralloc_free(prog->LinkedTransformFeedback.Outputs); - - memset(&prog->LinkedTransformFeedback, 0, - sizeof(prog->LinkedTransformFeedback)); - - prog->LinkedTransformFeedback.Varyings = - rzalloc_array(prog, - struct gl_transform_feedback_varying_info, - num_tfeedback_decls); - - unsigned num_outputs = 0; - for (unsigned i = 0; i < num_tfeedback_decls; ++i) - num_outputs += tfeedback_decls[i].get_num_outputs(); - - prog->LinkedTransformFeedback.Outputs = - rzalloc_array(prog, - struct gl_transform_feedback_output, - num_outputs); - - unsigned num_buffers = 0; - - if (separate_attribs_mode) { - /* GL_SEPARATE_ATTRIBS */ - for (unsigned i = 0; i < num_tfeedback_decls; ++i) { - if (!tfeedback_decls[i].store(ctx, prog, &prog->LinkedTransformFeedback, - num_buffers, num_outputs)) - return false; - - num_buffers++; - } - } - else { - /* GL_INVERLEAVED_ATTRIBS */ - int buffer_stream_id = -1; - for (unsigned i = 0; i < num_tfeedback_decls; ++i) { - if (tfeedback_decls[i].is_next_buffer_separator()) { - num_buffers++; - buffer_stream_id = -1; - continue; - } else if (buffer_stream_id == -1) { - /* First varying writing to this buffer: remember its stream */ - buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); - } else if (buffer_stream_id != - (int) tfeedback_decls[i].get_stream_id()) { - /* Varying writes to the same buffer from a different stream */ - linker_error(prog, - "Transform feedback can't capture varyings belonging " - "to different vertex streams in a single buffer. " - "Varying %s writes to buffer from stream %u, other " - "varyings in the same buffer write from stream %u.", - tfeedback_decls[i].name(), - tfeedback_decls[i].get_stream_id(), - buffer_stream_id); - return false; - } - - if (!tfeedback_decls[i].store(ctx, prog, - &prog->LinkedTransformFeedback, - num_buffers, num_outputs)) - return false; - } - num_buffers++; - } - - assert(prog->LinkedTransformFeedback.NumOutputs == num_outputs); - - prog->LinkedTransformFeedback.NumBuffers = num_buffers; - return true; -} - -namespace { - -/** - * Data structure recording the relationship between outputs of one shader - * stage (the "producer") and inputs of another (the "consumer"). - */ -class varying_matches -{ -public: - varying_matches(bool disable_varying_packing, - gl_shader_stage producer_stage, - gl_shader_stage consumer_stage); - ~varying_matches(); - void record(ir_variable *producer_var, ir_variable *consumer_var); - unsigned assign_locations(struct gl_shader_program *prog, - uint64_t reserved_slots, bool separate_shader); - void store_locations() const; - -private: - /** - * If true, this driver disables varying packing, so all varyings need to - * be aligned on slot boundaries, and take up a number of slots equal to - * their number of matrix columns times their array size. - */ - const bool disable_varying_packing; - - /** - * Enum representing the order in which varyings are packed within a - * packing class. - * - * Currently we pack vec4's first, then vec2's, then scalar values, then - * vec3's. This order ensures that the only vectors that are at risk of - * having to be "double parked" (split between two adjacent varying slots) - * are the vec3's. - */ - enum packing_order_enum { - PACKING_ORDER_VEC4, - PACKING_ORDER_VEC2, - PACKING_ORDER_SCALAR, - PACKING_ORDER_VEC3, - }; - - static unsigned compute_packing_class(const ir_variable *var); - static packing_order_enum compute_packing_order(const ir_variable *var); - static int match_comparator(const void *x_generic, const void *y_generic); - - /** - * Structure recording the relationship between a single producer output - * and a single consumer input. - */ - struct match { - /** - * Packing class for this varying, computed by compute_packing_class(). - */ - unsigned packing_class; - - /** - * Packing order for this varying, computed by compute_packing_order(). - */ - packing_order_enum packing_order; - unsigned num_components; - - /** - * The output variable in the producer stage. - */ - ir_variable *producer_var; - - /** - * The input variable in the consumer stage. - */ - ir_variable *consumer_var; - - /** - * The location which has been assigned for this varying. This is - * expressed in multiples of a float, with the first generic varying - * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the - * value 0. - */ - unsigned generic_location; - } *matches; - - /** - * The number of elements in the \c matches array that are currently in - * use. - */ - unsigned num_matches; - - /** - * The number of elements that were set aside for the \c matches array when - * it was allocated. - */ - unsigned matches_capacity; - - gl_shader_stage producer_stage; - gl_shader_stage consumer_stage; -}; - -} /* anonymous namespace */ - -varying_matches::varying_matches(bool disable_varying_packing, - gl_shader_stage producer_stage, - gl_shader_stage consumer_stage) - : disable_varying_packing(disable_varying_packing), - producer_stage(producer_stage), - consumer_stage(consumer_stage) -{ - /* Note: this initial capacity is rather arbitrarily chosen to be large - * enough for many cases without wasting an unreasonable amount of space. - * varying_matches::record() will resize the array if there are more than - * this number of varyings. - */ - this->matches_capacity = 8; - this->matches = (match *) - malloc(sizeof(*this->matches) * this->matches_capacity); - this->num_matches = 0; -} - - -varying_matches::~varying_matches() -{ - free(this->matches); -} - - -/** - * Record the given producer/consumer variable pair in the list of variables - * that should later be assigned locations. - * - * It is permissible for \c consumer_var to be NULL (this happens if a - * variable is output by the producer and consumed by transform feedback, but - * not consumed by the consumer). - * - * If \c producer_var has already been paired up with a consumer_var, or - * producer_var is part of fixed pipeline functionality (and hence already has - * a location assigned), this function has no effect. - * - * Note: as a side effect this function may change the interpolation type of - * \c producer_var, but only when the change couldn't possibly affect - * rendering. - */ -void -varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) -{ - assert(producer_var != NULL || consumer_var != NULL); - - if ((producer_var && (!producer_var->data.is_unmatched_generic_inout || - producer_var->data.explicit_location)) || - (consumer_var && (!consumer_var->data.is_unmatched_generic_inout || - consumer_var->data.explicit_location))) { - /* Either a location already exists for this variable (since it is part - * of fixed functionality), or it has already been recorded as part of a - * previous match. - */ - return; - } - - if ((consumer_var == NULL && producer_var->type->contains_integer()) || - (consumer_stage != -1 && consumer_stage != MESA_SHADER_FRAGMENT)) { - /* Since this varying is not being consumed by the fragment shader, its - * interpolation type varying cannot possibly affect rendering. - * Also, this variable is non-flat and is (or contains) an integer. - * If the consumer stage is unknown, don't modify the interpolation - * type as it could affect rendering later with separate shaders. - * - * lower_packed_varyings requires all integer varyings to flat, - * regardless of where they appear. We can trivially satisfy that - * requirement by changing the interpolation type to flat here. - */ - if (producer_var) { - producer_var->data.centroid = false; - producer_var->data.sample = false; - producer_var->data.interpolation = INTERP_QUALIFIER_FLAT; - } - - if (consumer_var) { - consumer_var->data.centroid = false; - consumer_var->data.sample = false; - consumer_var->data.interpolation = INTERP_QUALIFIER_FLAT; - } - } - - if (this->num_matches == this->matches_capacity) { - this->matches_capacity *= 2; - this->matches = (match *) - realloc(this->matches, - sizeof(*this->matches) * this->matches_capacity); - } - - const ir_variable *const var = (producer_var != NULL) - ? producer_var : consumer_var; - const gl_shader_stage stage = (producer_var != NULL) - ? producer_stage : consumer_stage; - const glsl_type *type = get_varying_type(var, stage); - - this->matches[this->num_matches].packing_class - = this->compute_packing_class(var); - this->matches[this->num_matches].packing_order - = this->compute_packing_order(var); - if (this->disable_varying_packing) { - unsigned slots = type->count_attribute_slots(false); - this->matches[this->num_matches].num_components = slots * 4; - } else { - this->matches[this->num_matches].num_components - = type->component_slots(); - } - this->matches[this->num_matches].producer_var = producer_var; - this->matches[this->num_matches].consumer_var = consumer_var; - this->num_matches++; - if (producer_var) - producer_var->data.is_unmatched_generic_inout = 0; - if (consumer_var) - consumer_var->data.is_unmatched_generic_inout = 0; -} - - -/** - * Choose locations for all of the variable matches that were previously - * passed to varying_matches::record(). - */ -unsigned -varying_matches::assign_locations(struct gl_shader_program *prog, - uint64_t reserved_slots, - bool separate_shader) -{ - /* We disable varying sorting for separate shader programs for the - * following reasons: - * - * 1/ All programs must sort the code in the same order to guarantee the - * interface matching. However varying_matches::record() will change the - * interpolation qualifier of some stages. - * - * 2/ GLSL version 4.50 removes the matching constrain on the interpolation - * qualifier. - * - * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.40 spec: - * - * "The type and presence of interpolation qualifiers of variables with - * the same name declared in all linked shaders for the same cross-stage - * interface must match, otherwise the link command will fail. - * - * When comparing an output from one stage to an input of a subsequent - * stage, the input and output don't match if their interpolation - * qualifiers (or lack thereof) are not the same." - * - * "It is a link-time error if, within the same stage, the interpolation - * qualifiers of variables of the same name do not match." - */ - if (!separate_shader) { - /* Sort varying matches into an order that makes them easy to pack. */ - qsort(this->matches, this->num_matches, sizeof(*this->matches), - &varying_matches::match_comparator); - } - - unsigned generic_location = 0; - unsigned generic_patch_location = MAX_VARYING*4; - - for (unsigned i = 0; i < this->num_matches; i++) { - unsigned *location = &generic_location; - - const ir_variable *var; - const glsl_type *type; - bool is_vertex_input = false; - if (matches[i].consumer_var) { - var = matches[i].consumer_var; - type = get_varying_type(var, consumer_stage); - if (consumer_stage == MESA_SHADER_VERTEX) - is_vertex_input = true; - } else { - var = matches[i].producer_var; - type = get_varying_type(var, producer_stage); - } - - if (var->data.patch) - location = &generic_patch_location; - - /* Advance to the next slot if this varying has a different packing - * class than the previous one, and we're not already on a slot - * boundary. - */ - if (i > 0 && - this->matches[i - 1].packing_class - != this->matches[i].packing_class) { - *location = ALIGN(*location, 4); - } - - unsigned num_elements = type->count_attribute_slots(is_vertex_input); - unsigned slot_end = this->disable_varying_packing ? 4 : - type->without_array()->vector_elements; - slot_end += *location - 1; - - /* FIXME: We could be smarter in the below code and loop back over - * trying to fill any locations that we skipped because we couldn't pack - * the varying between an explicit location. For now just let the user - * hit the linking error if we run out of room and suggest they use - * explicit locations. - */ - for (unsigned j = 0; j < num_elements; j++) { - while ((slot_end < MAX_VARYING * 4u) && - ((reserved_slots & (UINT64_C(1) << *location / 4u) || - (reserved_slots & (UINT64_C(1) << slot_end / 4u))))) { - - *location = ALIGN(*location + 1, 4); - slot_end = *location; - - /* reset the counter and try again */ - j = 0; - } - - /* Increase the slot to make sure there is enough room for next - * array element. - */ - if (this->disable_varying_packing) - slot_end += 4; - else - slot_end += type->without_array()->vector_elements; - } - - if (!var->data.patch && *location >= MAX_VARYING * 4u) { - linker_error(prog, "insufficient contiguous locations available for " - "%s it is possible an array or struct could not be " - "packed between varyings with explicit locations. Try " - "using an explicit location for arrays and structs.", - var->name); - } - - this->matches[i].generic_location = *location; - - *location += this->matches[i].num_components; - } - - return (generic_location + 3) / 4; -} - - -/** - * Update the producer and consumer shaders to reflect the locations - * assignments that were made by varying_matches::assign_locations(). - */ -void -varying_matches::store_locations() const -{ - for (unsigned i = 0; i < this->num_matches; i++) { - ir_variable *producer_var = this->matches[i].producer_var; - ir_variable *consumer_var = this->matches[i].consumer_var; - unsigned generic_location = this->matches[i].generic_location; - unsigned slot = generic_location / 4; - unsigned offset = generic_location % 4; - - if (producer_var) { - producer_var->data.location = VARYING_SLOT_VAR0 + slot; - producer_var->data.location_frac = offset; - } - - if (consumer_var) { - assert(consumer_var->data.location == -1); - consumer_var->data.location = VARYING_SLOT_VAR0 + slot; - consumer_var->data.location_frac = offset; - } - } -} - - -/** - * Compute the "packing class" of the given varying. This is an unsigned - * integer with the property that two variables in the same packing class can - * be safely backed into the same vec4. - */ -unsigned -varying_matches::compute_packing_class(const ir_variable *var) -{ - /* Without help from the back-end, there is no way to pack together - * variables with different interpolation types, because - * lower_packed_varyings must choose exactly one interpolation type for - * each packed varying it creates. - * - * However, we can safely pack together floats, ints, and uints, because: - * - * - varyings of base type "int" and "uint" must use the "flat" - * interpolation type, which can only occur in GLSL 1.30 and above. - * - * - On platforms that support GLSL 1.30 and above, lower_packed_varyings - * can store flat floats as ints without losing any information (using - * the ir_unop_bitcast_* opcodes). - * - * Therefore, the packing class depends only on the interpolation type. - */ - unsigned packing_class = var->data.centroid | (var->data.sample << 1) | - (var->data.patch << 2); - packing_class *= 4; - packing_class += var->data.interpolation; - return packing_class; -} - - -/** - * Compute the "packing order" of the given varying. This is a sort key we - * use to determine when to attempt to pack the given varying relative to - * other varyings in the same packing class. - */ -varying_matches::packing_order_enum -varying_matches::compute_packing_order(const ir_variable *var) -{ - const glsl_type *element_type = var->type; - - while (element_type->base_type == GLSL_TYPE_ARRAY) { - element_type = element_type->fields.array; - } - - switch (element_type->component_slots() % 4) { - case 1: return PACKING_ORDER_SCALAR; - case 2: return PACKING_ORDER_VEC2; - case 3: return PACKING_ORDER_VEC3; - case 0: return PACKING_ORDER_VEC4; - default: - assert(!"Unexpected value of vector_elements"); - return PACKING_ORDER_VEC4; - } -} - - -/** - * Comparison function passed to qsort() to sort varyings by packing_class and - * then by packing_order. - */ -int -varying_matches::match_comparator(const void *x_generic, const void *y_generic) -{ - const match *x = (const match *) x_generic; - const match *y = (const match *) y_generic; - - if (x->packing_class != y->packing_class) - return x->packing_class - y->packing_class; - return x->packing_order - y->packing_order; -} - - -/** - * Is the given variable a varying variable to be counted against the - * limit in ctx->Const.MaxVarying? - * This includes variables such as texcoords, colors and generic - * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord. - */ -static bool -var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var) -{ - /* Only fragment shaders will take a varying variable as an input */ - if (stage == MESA_SHADER_FRAGMENT && - var->data.mode == ir_var_shader_in) { - switch (var->data.location) { - case VARYING_SLOT_POS: - case VARYING_SLOT_FACE: - case VARYING_SLOT_PNTC: - return false; - default: - return true; - } - } - return false; -} - - -/** - * Visitor class that generates tfeedback_candidate structs describing all - * possible targets of transform feedback. - * - * tfeedback_candidate structs are stored in the hash table - * tfeedback_candidates, which is passed to the constructor. This hash table - * maps varying names to instances of the tfeedback_candidate struct. - */ -class tfeedback_candidate_generator : public program_resource_visitor -{ -public: - tfeedback_candidate_generator(void *mem_ctx, - hash_table *tfeedback_candidates) - : mem_ctx(mem_ctx), - tfeedback_candidates(tfeedback_candidates), - toplevel_var(NULL), - varying_floats(0) - { - } - - void process(ir_variable *var) - { - /* All named varying interface blocks should be flattened by now */ - assert(!var->is_interface_instance()); - - this->toplevel_var = var; - this->varying_floats = 0; - program_resource_visitor::process(var); - } - -private: - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major) - { - assert(!type->without_array()->is_record()); - assert(!type->without_array()->is_interface()); - - (void) row_major; - - tfeedback_candidate *candidate - = rzalloc(this->mem_ctx, tfeedback_candidate); - candidate->toplevel_var = this->toplevel_var; - candidate->type = type; - candidate->offset = this->varying_floats; - hash_table_insert(this->tfeedback_candidates, candidate, - ralloc_strdup(this->mem_ctx, name)); - this->varying_floats += type->component_slots(); - } - - /** - * Memory context used to allocate hash table keys and values. - */ - void * const mem_ctx; - - /** - * Hash table in which tfeedback_candidate objects should be stored. - */ - hash_table * const tfeedback_candidates; - - /** - * Pointer to the toplevel variable that is being traversed. - */ - ir_variable *toplevel_var; - - /** - * Total number of varying floats that have been visited so far. This is - * used to determine the offset to each varying within the toplevel - * variable. - */ - unsigned varying_floats; -}; - - -namespace linker { - -bool -populate_consumer_input_sets(void *mem_ctx, exec_list *ir, - hash_table *consumer_inputs, - hash_table *consumer_interface_inputs, - ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) -{ - memset(consumer_inputs_with_locations, - 0, - sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX); - - foreach_in_list(ir_instruction, node, ir) { - ir_variable *const input_var = node->as_variable(); - - if ((input_var != NULL) && (input_var->data.mode == ir_var_shader_in)) { - if (input_var->type->is_interface()) - return false; - - if (input_var->data.explicit_location) { - /* assign_varying_locations only cares about finding the - * ir_variable at the start of a contiguous location block. - * - * - For !producer, consumer_inputs_with_locations isn't used. - * - * - For !consumer, consumer_inputs_with_locations is empty. - * - * For consumer && producer, if you were trying to set some - * ir_variable to the middle of a location block on the other side - * of producer/consumer, cross_validate_outputs_to_inputs() should - * be link-erroring due to either type mismatch or location - * overlaps. If the variables do match up, then they've got a - * matching data.location and you only looked at - * consumer_inputs_with_locations[var->data.location], not any - * following entries for the array/structure. - */ - consumer_inputs_with_locations[input_var->data.location] = - input_var; - } else if (input_var->get_interface_type() != NULL) { - char *const iface_field_name = - ralloc_asprintf(mem_ctx, "%s.%s", - input_var->get_interface_type()->name, - input_var->name); - hash_table_insert(consumer_interface_inputs, input_var, - iface_field_name); - } else { - hash_table_insert(consumer_inputs, input_var, - ralloc_strdup(mem_ctx, input_var->name)); - } - } - } - - return true; -} - -/** - * Find a variable from the consumer that "matches" the specified variable - * - * This function only finds inputs with names that match. There is no - * validation (here) that the types, etc. are compatible. - */ -ir_variable * -get_matching_input(void *mem_ctx, - const ir_variable *output_var, - hash_table *consumer_inputs, - hash_table *consumer_interface_inputs, - ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) -{ - ir_variable *input_var; - - if (output_var->data.explicit_location) { - input_var = consumer_inputs_with_locations[output_var->data.location]; - } else if (output_var->get_interface_type() != NULL) { - char *const iface_field_name = - ralloc_asprintf(mem_ctx, "%s.%s", - output_var->get_interface_type()->name, - output_var->name); - input_var = - (ir_variable *) hash_table_find(consumer_interface_inputs, - iface_field_name); - } else { - input_var = - (ir_variable *) hash_table_find(consumer_inputs, output_var->name); - } - - return (input_var == NULL || input_var->data.mode != ir_var_shader_in) - ? NULL : input_var; -} - -} - -static int -io_variable_cmp(const void *_a, const void *_b) -{ - const ir_variable *const a = *(const ir_variable **) _a; - const ir_variable *const b = *(const ir_variable **) _b; - - if (a->data.explicit_location && b->data.explicit_location) - return b->data.location - a->data.location; - - if (a->data.explicit_location && !b->data.explicit_location) - return 1; - - if (!a->data.explicit_location && b->data.explicit_location) - return -1; - - return -strcmp(a->name, b->name); -} - -/** - * Sort the shader IO variables into canonical order - */ -static void -canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) -{ - ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4]; - unsigned num_variables = 0; - - foreach_in_list(ir_instruction, node, ir) { - ir_variable *const var = node->as_variable(); - - if (var == NULL || var->data.mode != io_mode) - continue; - - /* If we have already encountered more I/O variables that could - * successfully link, bail. - */ - if (num_variables == ARRAY_SIZE(var_table)) - return; - - var_table[num_variables++] = var; - } - - if (num_variables == 0) - return; - - /* Sort the list in reverse order (io_variable_cmp handles this). Later - * we're going to push the variables on to the IR list as a stack, so we - * want the last variable (in canonical order) to be first in the list. - */ - qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp); - - /* Remove the variable from it's current location in the IR, and put it at - * the front. - */ - for (unsigned i = 0; i < num_variables; i++) { - var_table[i]->remove(); - ir->push_head(var_table[i]); - } -} - -/** - * Generate a bitfield map of the explicit locations for shader varyings. - * - * In theory a 32 bits value will be enough but a 64 bits value is future proof. - */ -uint64_t -reserved_varying_slot(struct gl_shader *stage, ir_variable_mode io_mode) -{ - assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); - assert(MAX_VARYING <= 64); /* avoid an overflow of the returned value */ - - uint64_t slots = 0; - int var_slot; - - if (!stage) - return slots; - - foreach_in_list(ir_instruction, node, stage->ir) { - ir_variable *const var = node->as_variable(); - - if (var == NULL || var->data.mode != io_mode || - !var->data.explicit_location || - var->data.location < VARYING_SLOT_VAR0) - continue; - - var_slot = var->data.location - VARYING_SLOT_VAR0; - - unsigned num_elements = get_varying_type(var, stage->Stage) - ->count_attribute_slots(stage->Stage == MESA_SHADER_VERTEX); - for (unsigned i = 0; i < num_elements; i++) { - if (var_slot >= 0 && var_slot < MAX_VARYING) - slots |= UINT64_C(1) << var_slot; - var_slot += 1; - } - } - - return slots; -} - - -/** - * Assign locations for all variables that are produced in one pipeline stage - * (the "producer") and consumed in the next stage (the "consumer"). - * - * Variables produced by the producer may also be consumed by transform - * feedback. - * - * \param num_tfeedback_decls is the number of declarations indicating - * variables that may be consumed by transform feedback. - * - * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects - * representing the result of parsing the strings passed to - * glTransformFeedbackVaryings(). assign_location() will be called for - * each of these objects that matches one of the outputs of the - * producer. - * - * When num_tfeedback_decls is nonzero, it is permissible for the consumer to - * be NULL. In this case, varying locations are assigned solely based on the - * requirements of transform feedback. - */ -bool -assign_varying_locations(struct gl_context *ctx, - void *mem_ctx, - struct gl_shader_program *prog, - gl_shader *producer, gl_shader *consumer, - unsigned num_tfeedback_decls, - tfeedback_decl *tfeedback_decls) -{ - if (ctx->Const.DisableVaryingPacking) { - /* Transform feedback code assumes varyings are packed, so if the driver - * has disabled varying packing, make sure it does not support transform - * feedback. - */ - assert(!ctx->Extensions.EXT_transform_feedback); - } - - /* Tessellation shaders treat inputs and outputs as shared memory and can - * access inputs and outputs of other invocations. - * Therefore, they can't be lowered to temps easily (and definitely not - * efficiently). - */ - bool disable_varying_packing = - ctx->Const.DisableVaryingPacking || - (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || - (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || - (producer && producer->Stage == MESA_SHADER_TESS_CTRL); - - varying_matches matches(disable_varying_packing, - producer ? producer->Stage : (gl_shader_stage)-1, - consumer ? consumer->Stage : (gl_shader_stage)-1); - hash_table *tfeedback_candidates - = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); - hash_table *consumer_inputs - = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); - hash_table *consumer_interface_inputs - = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); - ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = { - NULL, - }; - - unsigned consumer_vertices = 0; - if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY) - consumer_vertices = prog->Geom.VerticesIn; - - /* Operate in a total of four passes. - * - * 1. Sort inputs / outputs into a canonical order. This is necessary so - * that inputs / outputs of separable shaders will be assigned - * predictable locations regardless of the order in which declarations - * appeared in the shader source. - * - * 2. Assign locations for any matching inputs and outputs. - * - * 3. Mark output variables in the producer that do not have locations as - * not being outputs. This lets the optimizer eliminate them. - * - * 4. Mark input variables in the consumer that do not have locations as - * not being inputs. This lets the optimizer eliminate them. - */ - if (consumer) - canonicalize_shader_io(consumer->ir, ir_var_shader_in); - - if (producer) - canonicalize_shader_io(producer->ir, ir_var_shader_out); - - if (consumer - && !linker::populate_consumer_input_sets(mem_ctx, - consumer->ir, - consumer_inputs, - consumer_interface_inputs, - consumer_inputs_with_locations)) { - assert(!"populate_consumer_input_sets failed"); - hash_table_dtor(tfeedback_candidates); - hash_table_dtor(consumer_inputs); - hash_table_dtor(consumer_interface_inputs); - return false; - } - - if (producer) { - foreach_in_list(ir_instruction, node, producer->ir) { - ir_variable *const output_var = node->as_variable(); - - if ((output_var == NULL) || - (output_var->data.mode != ir_var_shader_out)) - continue; - - /* Only geometry shaders can use non-zero streams */ - assert(output_var->data.stream == 0 || - (output_var->data.stream < MAX_VERTEX_STREAMS && - producer->Stage == MESA_SHADER_GEOMETRY)); - - tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates); - g.process(output_var); - - ir_variable *const input_var = - linker::get_matching_input(mem_ctx, output_var, consumer_inputs, - consumer_interface_inputs, - consumer_inputs_with_locations); - - /* If a matching input variable was found, add this ouptut (and the - * input) to the set. If this is a separable program and there is no - * consumer stage, add the output. - * - * Always add TCS outputs. They are shared by all invocations - * within a patch and can be used as shared memory. - */ - if (input_var || (prog->SeparateShader && consumer == NULL) || - producer->Type == GL_TESS_CONTROL_SHADER) { - matches.record(output_var, input_var); - } - - /* Only stream 0 outputs can be consumed in the next stage */ - if (input_var && output_var->data.stream != 0) { - linker_error(prog, "output %s is assigned to stream=%d but " - "is linked to an input, which requires stream=0", - output_var->name, output_var->data.stream); - return false; - } - } - } else { - /* If there's no producer stage, then this must be a separable program. - * For example, we may have a program that has just a fragment shader. - * Later this program will be used with some arbitrary vertex (or - * geometry) shader program. This means that locations must be assigned - * for all the inputs. - */ - foreach_in_list(ir_instruction, node, consumer->ir) { - ir_variable *const input_var = node->as_variable(); - - if ((input_var == NULL) || - (input_var->data.mode != ir_var_shader_in)) - continue; - - matches.record(NULL, input_var); - } - } - - for (unsigned i = 0; i < num_tfeedback_decls; ++i) { - if (!tfeedback_decls[i].is_varying()) - continue; - - const tfeedback_candidate *matched_candidate - = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates); - - if (matched_candidate == NULL) { - hash_table_dtor(tfeedback_candidates); - hash_table_dtor(consumer_inputs); - hash_table_dtor(consumer_interface_inputs); - return false; - } - - if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) - matches.record(matched_candidate->toplevel_var, NULL); - } - - const uint64_t reserved_slots = - reserved_varying_slot(producer, ir_var_shader_out) | - reserved_varying_slot(consumer, ir_var_shader_in); - - const unsigned slots_used = matches.assign_locations(prog, reserved_slots, - prog->SeparateShader); - matches.store_locations(); - - for (unsigned i = 0; i < num_tfeedback_decls; ++i) { - if (!tfeedback_decls[i].is_varying()) - continue; - - if (!tfeedback_decls[i].assign_location(ctx, prog)) { - hash_table_dtor(tfeedback_candidates); - hash_table_dtor(consumer_inputs); - hash_table_dtor(consumer_interface_inputs); - return false; - } - } - - hash_table_dtor(tfeedback_candidates); - hash_table_dtor(consumer_inputs); - hash_table_dtor(consumer_interface_inputs); - - if (consumer && producer) { - foreach_in_list(ir_instruction, node, consumer->ir) { - ir_variable *const var = node->as_variable(); - - if (var && var->data.mode == ir_var_shader_in && - var->data.is_unmatched_generic_inout) { - if (prog->IsES) { - /* - * On Page 91 (Page 97 of the PDF) of the GLSL ES 1.0 spec: - * - * If the vertex shader declares but doesn't write to a - * varying and the fragment shader declares and reads it, - * is this an error? - * - * RESOLUTION: No. - */ - linker_warning(prog, "%s shader varying %s not written " - "by %s shader\n.", - _mesa_shader_stage_to_string(consumer->Stage), - var->name, - _mesa_shader_stage_to_string(producer->Stage)); - } else if (prog->Version <= 120) { - /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: - * - * Only those varying variables used (i.e. read) in - * the fragment shader executable must be written to - * by the vertex shader executable; declaring - * superfluous varying variables in a vertex shader is - * permissible. - * - * We interpret this text as meaning that the VS must - * write the variable for the FS to read it. See - * "glsl1-varying read but not written" in piglit. - */ - linker_error(prog, "%s shader varying %s not written " - "by %s shader\n.", - _mesa_shader_stage_to_string(consumer->Stage), - var->name, - _mesa_shader_stage_to_string(producer->Stage)); - } - } - } - - /* Now that validation is done its safe to remove unused varyings. As - * we have both a producer and consumer its safe to remove unused - * varyings even if the program is a SSO because the stages are being - * linked together i.e. we have a multi-stage SSO. - */ - remove_unused_shader_inputs_and_outputs(false, producer, - ir_var_shader_out); - remove_unused_shader_inputs_and_outputs(false, consumer, - ir_var_shader_in); - } - - if (!disable_varying_packing) { - if (producer) { - lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_out, - 0, producer); - } - if (consumer) { - lower_packed_varyings(mem_ctx, slots_used, ir_var_shader_in, - consumer_vertices, consumer); - } - } - - return true; -} - -bool -check_against_output_limit(struct gl_context *ctx, - struct gl_shader_program *prog, - gl_shader *producer) -{ - unsigned output_vectors = 0; - - foreach_in_list(ir_instruction, node, producer->ir) { - ir_variable *const var = node->as_variable(); - - if (var && var->data.mode == ir_var_shader_out && - var_counts_against_varying_limit(producer->Stage, var)) { - /* outputs for fragment shader can't be doubles */ - output_vectors += var->type->count_attribute_slots(false); - } - } - - assert(producer->Stage != MESA_SHADER_FRAGMENT); - unsigned max_output_components = - ctx->Const.Program[producer->Stage].MaxOutputComponents; - - const unsigned output_components = output_vectors * 4; - if (output_components > max_output_components) { - if (ctx->API == API_OPENGLES2 || prog->IsES) - linker_error(prog, "%s shader uses too many output vectors " - "(%u > %u)\n", - _mesa_shader_stage_to_string(producer->Stage), - output_vectors, - max_output_components / 4); - else - linker_error(prog, "%s shader uses too many output components " - "(%u > %u)\n", - _mesa_shader_stage_to_string(producer->Stage), - output_components, - max_output_components); - - return false; - } - - return true; -} - -bool -check_against_input_limit(struct gl_context *ctx, - struct gl_shader_program *prog, - gl_shader *consumer) -{ - unsigned input_vectors = 0; - - foreach_in_list(ir_instruction, node, consumer->ir) { - ir_variable *const var = node->as_variable(); - - if (var && var->data.mode == ir_var_shader_in && - var_counts_against_varying_limit(consumer->Stage, var)) { - /* vertex inputs aren't varying counted */ - input_vectors += var->type->count_attribute_slots(false); - } - } - - assert(consumer->Stage != MESA_SHADER_VERTEX); - unsigned max_input_components = - ctx->Const.Program[consumer->Stage].MaxInputComponents; - - const unsigned input_components = input_vectors * 4; - if (input_components > max_input_components) { - if (ctx->API == API_OPENGLES2 || prog->IsES) - linker_error(prog, "%s shader uses too many input vectors " - "(%u > %u)\n", - _mesa_shader_stage_to_string(consumer->Stage), - input_vectors, - max_input_components / 4); - else - linker_error(prog, "%s shader uses too many input components " - "(%u > %u)\n", - _mesa_shader_stage_to_string(consumer->Stage), - input_components, - max_input_components); - - return false; - } - - return true; -} diff --git a/src/glsl/link_varyings.h b/src/glsl/link_varyings.h deleted file mode 100644 index b2812614ecc..00000000000 --- a/src/glsl/link_varyings.h +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef GLSL_LINK_VARYINGS_H -#define GLSL_LINK_VARYINGS_H - -/** - * \file link_varyings.h - * - * Linker functions related specifically to linking varyings between shader - * stages. - */ - - -#include "main/glheader.h" - - -struct gl_shader_program; -struct gl_shader; -class ir_variable; - - -/** - * Data structure describing a varying which is available for use in transform - * feedback. - * - * For example, if the vertex shader contains: - * - * struct S { - * vec4 foo; - * float[3] bar; - * }; - * - * varying S[2] v; - * - * Then there would be tfeedback_candidate objects corresponding to the - * following varyings: - * - * v[0].foo - * v[0].bar - * v[1].foo - * v[1].bar - */ -struct tfeedback_candidate -{ - /** - * Toplevel variable containing this varying. In the above example, this - * would point to the declaration of the varying v. - */ - ir_variable *toplevel_var; - - /** - * Type of this varying. In the above example, this would point to the - * glsl_type for "vec4" or "float[3]". - */ - const glsl_type *type; - - /** - * Offset within the toplevel variable where this varying occurs (counted - * in multiples of the size of a float). - */ - unsigned offset; -}; - - -/** - * Data structure tracking information about a transform feedback declaration - * during linking. - */ -class tfeedback_decl -{ -public: - void init(struct gl_context *ctx, const void *mem_ctx, const char *input); - static bool is_same(const tfeedback_decl &x, const tfeedback_decl &y); - bool assign_location(struct gl_context *ctx, - struct gl_shader_program *prog); - unsigned get_num_outputs() const; - bool store(struct gl_context *ctx, struct gl_shader_program *prog, - struct gl_transform_feedback_info *info, unsigned buffer, - const unsigned max_outputs) const; - const tfeedback_candidate *find_candidate(gl_shader_program *prog, - hash_table *tfeedback_candidates); - - bool is_next_buffer_separator() const - { - return this->next_buffer_separator; - } - - bool is_varying() const - { - return !this->next_buffer_separator && !this->skip_components; - } - - const char *name() const - { - return this->orig_name; - } - - unsigned get_stream_id() const - { - return this->stream_id; - } - - /** - * The total number of varying components taken up by this variable. Only - * valid if assign_location() has been called. - */ - unsigned num_components() const - { - if (this->lowered_builtin_array_variable) - return this->size; - else - return this->vector_elements * this->matrix_columns * this->size * - (this->is_double() ? 2 : 1); - } - - unsigned get_location() const { - return this->location; - } - -private: - - bool is_double() const - { - switch (this->type) { - case GL_DOUBLE: - case GL_DOUBLE_VEC2: - case GL_DOUBLE_VEC3: - case GL_DOUBLE_VEC4: - case GL_DOUBLE_MAT2: - case GL_DOUBLE_MAT2x3: - case GL_DOUBLE_MAT2x4: - case GL_DOUBLE_MAT3: - case GL_DOUBLE_MAT3x2: - case GL_DOUBLE_MAT3x4: - case GL_DOUBLE_MAT4: - case GL_DOUBLE_MAT4x2: - case GL_DOUBLE_MAT4x3: - return true; - default: - return false; - } - } - - /** - * The name that was supplied to glTransformFeedbackVaryings. Used for - * error reporting and glGetTransformFeedbackVarying(). - */ - const char *orig_name; - - /** - * The name of the variable, parsed from orig_name. - */ - const char *var_name; - - /** - * True if the declaration in orig_name represents an array. - */ - bool is_subscripted; - - /** - * If is_subscripted is true, the subscript that was specified in orig_name. - */ - unsigned array_subscript; - - /** - * Non-zero if the variable is gl_ClipDistance, glTessLevelOuter or - * gl_TessLevelInner and the driver lowers it to gl_*MESA. - */ - enum { - none, - clip_distance, - tess_level_outer, - tess_level_inner, - } lowered_builtin_array_variable; - - /** - * The vertex shader output location that the linker assigned for this - * variable. -1 if a location hasn't been assigned yet. - */ - int location; - - /** - * If non-zero, then this variable may be packed along with other variables - * into a single varying slot, so this offset should be applied when - * accessing components. For example, an offset of 1 means that the x - * component of this variable is actually stored in component y of the - * location specified by \c location. - * - * Only valid if location != -1. - */ - unsigned location_frac; - - /** - * If location != -1, the number of vector elements in this variable, or 1 - * if this variable is a scalar. - */ - unsigned vector_elements; - - /** - * If location != -1, the number of matrix columns in this variable, or 1 - * if this variable is not a matrix. - */ - unsigned matrix_columns; - - /** Type of the varying returned by glGetTransformFeedbackVarying() */ - GLenum type; - - /** - * If location != -1, the size that should be returned by - * glGetTransformFeedbackVarying(). - */ - unsigned size; - - /** - * How many components to skip. If non-zero, this is - * gl_SkipComponents{1,2,3,4} from ARB_transform_feedback3. - */ - unsigned skip_components; - - /** - * Whether this is gl_NextBuffer from ARB_transform_feedback3. - */ - bool next_buffer_separator; - - /** - * If find_candidate() has been called, pointer to the tfeedback_candidate - * data structure that was found. Otherwise NULL. - */ - const tfeedback_candidate *matched_candidate; - - /** - * StreamId assigned to this varying (defaults to 0). Can only be set to - * values other than 0 in geometry shaders that use the stream layout - * modifier. Accepted values must be in the range [0, MAX_VERTEX_STREAMS-1]. - */ - unsigned stream_id; -}; - - -void -cross_validate_outputs_to_inputs(struct gl_shader_program *prog, - gl_shader *producer, gl_shader *consumer); - -bool -parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, - const void *mem_ctx, unsigned num_names, - char **varying_names, tfeedback_decl *decls); - -void -remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, - gl_shader *sh, - enum ir_variable_mode mode); - -bool -store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, - unsigned num_tfeedback_decls, - tfeedback_decl *tfeedback_decls); - -bool -assign_varying_locations(struct gl_context *ctx, - void *mem_ctx, - struct gl_shader_program *prog, - gl_shader *producer, gl_shader *consumer, - unsigned num_tfeedback_decls, - tfeedback_decl *tfeedback_decls); - -bool -check_against_output_limit(struct gl_context *ctx, - struct gl_shader_program *prog, - gl_shader *producer); - -bool -check_against_input_limit(struct gl_context *ctx, - struct gl_shader_program *prog, - gl_shader *consumer); - -#endif /* GLSL_LINK_VARYINGS_H */ diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp deleted file mode 100644 index 6657777d74c..00000000000 --- a/src/glsl/linker.cpp +++ /dev/null @@ -1,4676 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file linker.cpp - * GLSL linker implementation - * - * Given a set of shaders that are to be linked to generate a final program, - * there are three distinct stages. - * - * In the first stage shaders are partitioned into groups based on the shader - * type. All shaders of a particular type (e.g., vertex shaders) are linked - * together. - * - * - Undefined references in each shader are resolve to definitions in - * another shader. - * - Types and qualifiers of uniforms, outputs, and global variables defined - * in multiple shaders with the same name are verified to be the same. - * - Initializers for uniforms and global variables defined - * in multiple shaders with the same name are verified to be the same. - * - * The result, in the terminology of the GLSL spec, is a set of shader - * executables for each processing unit. - * - * After the first stage is complete, a series of semantic checks are performed - * on each of the shader executables. - * - * - Each shader executable must define a \c main function. - * - Each vertex shader executable must write to \c gl_Position. - * - Each fragment shader executable must write to either \c gl_FragData or - * \c gl_FragColor. - * - * In the final stage individual shader executables are linked to create a - * complete exectuable. - * - * - Types of uniforms defined in multiple shader stages with the same name - * are verified to be the same. - * - Initializers for uniforms defined in multiple shader stages with the - * same name are verified to be the same. - * - Types and qualifiers of outputs defined in one stage are verified to - * be the same as the types and qualifiers of inputs defined with the same - * name in a later stage. - * - * \author Ian Romanick - */ - -#include -#include "util/strndup.h" -#include "main/core.h" -#include "glsl_symbol_table.h" -#include "glsl_parser_extras.h" -#include "ir.h" -#include "program.h" -#include "program/hash_table.h" -#include "linker.h" -#include "link_varyings.h" -#include "ir_optimization.h" -#include "ir_rvalue_visitor.h" -#include "ir_uniform.h" - -#include "main/shaderobj.h" -#include "main/enums.h" - - -void linker_error(gl_shader_program *, const char *, ...); - -namespace { - -/** - * Visitor that determines whether or not a variable is ever written. - */ -class find_assignment_visitor : public ir_hierarchical_visitor { -public: - find_assignment_visitor(const char *name) - : name(name), found(false) - { - /* empty */ - } - - virtual ir_visitor_status visit_enter(ir_assignment *ir) - { - ir_variable *const var = ir->lhs->variable_referenced(); - - if (strcmp(name, var->name) == 0) { - found = true; - return visit_stop; - } - - return visit_continue_with_parent; - } - - virtual ir_visitor_status visit_enter(ir_call *ir) - { - foreach_two_lists(formal_node, &ir->callee->parameters, - actual_node, &ir->actual_parameters) { - ir_rvalue *param_rval = (ir_rvalue *) actual_node; - ir_variable *sig_param = (ir_variable *) formal_node; - - if (sig_param->data.mode == ir_var_function_out || - sig_param->data.mode == ir_var_function_inout) { - ir_variable *var = param_rval->variable_referenced(); - if (var && strcmp(name, var->name) == 0) { - found = true; - return visit_stop; - } - } - } - - if (ir->return_deref != NULL) { - ir_variable *const var = ir->return_deref->variable_referenced(); - - if (strcmp(name, var->name) == 0) { - found = true; - return visit_stop; - } - } - - return visit_continue_with_parent; - } - - bool variable_found() - { - return found; - } - -private: - const char *name; /**< Find writes to a variable with this name. */ - bool found; /**< Was a write to the variable found? */ -}; - - -/** - * Visitor that determines whether or not a variable is ever read. - */ -class find_deref_visitor : public ir_hierarchical_visitor { -public: - find_deref_visitor(const char *name) - : name(name), found(false) - { - /* empty */ - } - - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - if (strcmp(this->name, ir->var->name) == 0) { - this->found = true; - return visit_stop; - } - - return visit_continue; - } - - bool variable_found() const - { - return this->found; - } - -private: - const char *name; /**< Find writes to a variable with this name. */ - bool found; /**< Was a write to the variable found? */ -}; - - -class geom_array_resize_visitor : public ir_hierarchical_visitor { -public: - unsigned num_vertices; - gl_shader_program *prog; - - geom_array_resize_visitor(unsigned num_vertices, gl_shader_program *prog) - { - this->num_vertices = num_vertices; - this->prog = prog; - } - - virtual ~geom_array_resize_visitor() - { - /* empty */ - } - - virtual ir_visitor_status visit(ir_variable *var) - { - if (!var->type->is_array() || var->data.mode != ir_var_shader_in) - return visit_continue; - - unsigned size = var->type->length; - - /* Generate a link error if the shader has declared this array with an - * incorrect size. - */ - if (size && size != this->num_vertices) { - linker_error(this->prog, "size of array %s declared as %u, " - "but number of input vertices is %u\n", - var->name, size, this->num_vertices); - return visit_continue; - } - - /* Generate a link error if the shader attempts to access an input - * array using an index too large for its actual size assigned at link - * time. - */ - if (var->data.max_array_access >= this->num_vertices) { - linker_error(this->prog, "geometry shader accesses element %i of " - "%s, but only %i input vertices\n", - var->data.max_array_access, var->name, this->num_vertices); - return visit_continue; - } - - var->type = glsl_type::get_array_instance(var->type->fields.array, - this->num_vertices); - var->data.max_array_access = this->num_vertices - 1; - - return visit_continue; - } - - /* Dereferences of input variables need to be updated so that their type - * matches the newly assigned type of the variable they are accessing. */ - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - ir->type = ir->var->type; - return visit_continue; - } - - /* Dereferences of 2D input arrays need to be updated so that their type - * matches the newly assigned type of the array they are accessing. */ - virtual ir_visitor_status visit_leave(ir_dereference_array *ir) - { - const glsl_type *const vt = ir->array->type; - if (vt->is_array()) - ir->type = vt->fields.array; - return visit_continue; - } -}; - -class tess_eval_array_resize_visitor : public ir_hierarchical_visitor { -public: - unsigned num_vertices; - gl_shader_program *prog; - - tess_eval_array_resize_visitor(unsigned num_vertices, gl_shader_program *prog) - { - this->num_vertices = num_vertices; - this->prog = prog; - } - - virtual ~tess_eval_array_resize_visitor() - { - /* empty */ - } - - virtual ir_visitor_status visit(ir_variable *var) - { - if (!var->type->is_array() || var->data.mode != ir_var_shader_in || var->data.patch) - return visit_continue; - - var->type = glsl_type::get_array_instance(var->type->fields.array, - this->num_vertices); - var->data.max_array_access = this->num_vertices - 1; - - return visit_continue; - } - - /* Dereferences of input variables need to be updated so that their type - * matches the newly assigned type of the variable they are accessing. */ - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - ir->type = ir->var->type; - return visit_continue; - } - - /* Dereferences of 2D input arrays need to be updated so that their type - * matches the newly assigned type of the array they are accessing. */ - virtual ir_visitor_status visit_leave(ir_dereference_array *ir) - { - const glsl_type *const vt = ir->array->type; - if (vt->is_array()) - ir->type = vt->fields.array; - return visit_continue; - } -}; - -class barrier_use_visitor : public ir_hierarchical_visitor { -public: - barrier_use_visitor(gl_shader_program *prog) - : prog(prog), in_main(false), after_return(false), control_flow(0) - { - } - - virtual ~barrier_use_visitor() - { - /* empty */ - } - - virtual ir_visitor_status visit_enter(ir_function *ir) - { - if (strcmp(ir->name, "main") == 0) - in_main = true; - - return visit_continue; - } - - virtual ir_visitor_status visit_leave(ir_function *) - { - in_main = false; - after_return = false; - return visit_continue; - } - - virtual ir_visitor_status visit_leave(ir_return *) - { - after_return = true; - return visit_continue; - } - - virtual ir_visitor_status visit_enter(ir_if *) - { - ++control_flow; - return visit_continue; - } - - virtual ir_visitor_status visit_leave(ir_if *) - { - --control_flow; - return visit_continue; - } - - virtual ir_visitor_status visit_enter(ir_loop *) - { - ++control_flow; - return visit_continue; - } - - virtual ir_visitor_status visit_leave(ir_loop *) - { - --control_flow; - return visit_continue; - } - - /* FINISHME: `switch` is not expressed at the IR level -- it's already - * been lowered to a mess of `if`s. We'll correctly disallow any use of - * barrier() in a conditional path within the switch, but not in a path - * which is always hit. - */ - - virtual ir_visitor_status visit_enter(ir_call *ir) - { - if (ir->use_builtin && strcmp(ir->callee_name(), "barrier") == 0) { - /* Use of barrier(); determine if it is legal: */ - if (!in_main) { - linker_error(prog, "Builtin barrier() may only be used in main"); - return visit_stop; - } - - if (after_return) { - linker_error(prog, "Builtin barrier() may not be used after return"); - return visit_stop; - } - - if (control_flow != 0) { - linker_error(prog, "Builtin barrier() may not be used inside control flow"); - return visit_stop; - } - } - return visit_continue; - } - -private: - gl_shader_program *prog; - bool in_main, after_return; - int control_flow; -}; - -/** - * Visitor that determines the highest stream id to which a (geometry) shader - * emits vertices. It also checks whether End{Stream}Primitive is ever called. - */ -class find_emit_vertex_visitor : public ir_hierarchical_visitor { -public: - find_emit_vertex_visitor(int max_allowed) - : max_stream_allowed(max_allowed), - invalid_stream_id(0), - invalid_stream_id_from_emit_vertex(false), - end_primitive_found(false), - uses_non_zero_stream(false) - { - /* empty */ - } - - virtual ir_visitor_status visit_leave(ir_emit_vertex *ir) - { - int stream_id = ir->stream_id(); - - if (stream_id < 0) { - invalid_stream_id = stream_id; - invalid_stream_id_from_emit_vertex = true; - return visit_stop; - } - - if (stream_id > max_stream_allowed) { - invalid_stream_id = stream_id; - invalid_stream_id_from_emit_vertex = true; - return visit_stop; - } - - if (stream_id != 0) - uses_non_zero_stream = true; - - return visit_continue; - } - - virtual ir_visitor_status visit_leave(ir_end_primitive *ir) - { - end_primitive_found = true; - - int stream_id = ir->stream_id(); - - if (stream_id < 0) { - invalid_stream_id = stream_id; - invalid_stream_id_from_emit_vertex = false; - return visit_stop; - } - - if (stream_id > max_stream_allowed) { - invalid_stream_id = stream_id; - invalid_stream_id_from_emit_vertex = false; - return visit_stop; - } - - if (stream_id != 0) - uses_non_zero_stream = true; - - return visit_continue; - } - - bool error() - { - return invalid_stream_id != 0; - } - - const char *error_func() - { - return invalid_stream_id_from_emit_vertex ? - "EmitStreamVertex" : "EndStreamPrimitive"; - } - - int error_stream() - { - return invalid_stream_id; - } - - bool uses_streams() - { - return uses_non_zero_stream; - } - - bool uses_end_primitive() - { - return end_primitive_found; - } - -private: - int max_stream_allowed; - int invalid_stream_id; - bool invalid_stream_id_from_emit_vertex; - bool end_primitive_found; - bool uses_non_zero_stream; -}; - -/* Class that finds array derefs and check if indexes are dynamic. */ -class dynamic_sampler_array_indexing_visitor : public ir_hierarchical_visitor -{ -public: - dynamic_sampler_array_indexing_visitor() : - dynamic_sampler_array_indexing(false) - { - } - - ir_visitor_status visit_enter(ir_dereference_array *ir) - { - if (!ir->variable_referenced()) - return visit_continue; - - if (!ir->variable_referenced()->type->contains_sampler()) - return visit_continue; - - if (!ir->array_index->constant_expression_value()) { - dynamic_sampler_array_indexing = true; - return visit_stop; - } - return visit_continue; - } - - bool uses_dynamic_sampler_array_indexing() - { - return dynamic_sampler_array_indexing; - } - -private: - bool dynamic_sampler_array_indexing; -}; - -} /* anonymous namespace */ - -void -linker_error(gl_shader_program *prog, const char *fmt, ...) -{ - va_list ap; - - ralloc_strcat(&prog->InfoLog, "error: "); - va_start(ap, fmt); - ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); - va_end(ap); - - prog->LinkStatus = false; -} - - -void -linker_warning(gl_shader_program *prog, const char *fmt, ...) -{ - va_list ap; - - ralloc_strcat(&prog->InfoLog, "warning: "); - va_start(ap, fmt); - ralloc_vasprintf_append(&prog->InfoLog, fmt, ap); - va_end(ap); - -} - - -/** - * Given a string identifying a program resource, break it into a base name - * and an optional array index in square brackets. - * - * If an array index is present, \c out_base_name_end is set to point to the - * "[" that precedes the array index, and the array index itself is returned - * as a long. - * - * If no array index is present (or if the array index is negative or - * mal-formed), \c out_base_name_end, is set to point to the null terminator - * at the end of the input string, and -1 is returned. - * - * Only the final array index is parsed; if the string contains other array - * indices (or structure field accesses), they are left in the base name. - * - * No attempt is made to check that the base name is properly formed; - * typically the caller will look up the base name in a hash table, so - * ill-formed base names simply turn into hash table lookup failures. - */ -long -parse_program_resource_name(const GLchar *name, - const GLchar **out_base_name_end) -{ - /* Section 7.3.1 ("Program Interfaces") of the OpenGL 4.3 spec says: - * - * "When an integer array element or block instance number is part of - * the name string, it will be specified in decimal form without a "+" - * or "-" sign or any extra leading zeroes. Additionally, the name - * string will not include white space anywhere in the string." - */ - - const size_t len = strlen(name); - *out_base_name_end = name + len; - - if (len == 0 || name[len-1] != ']') - return -1; - - /* Walk backwards over the string looking for a non-digit character. This - * had better be the opening bracket for an array index. - * - * Initially, i specifies the location of the ']'. Since the string may - * contain only the ']' charcater, walk backwards very carefully. - */ - unsigned i; - for (i = len - 1; (i > 0) && isdigit(name[i-1]); --i) - /* empty */ ; - - if ((i == 0) || name[i-1] != '[') - return -1; - - long array_index = strtol(&name[i], NULL, 10); - if (array_index < 0) - return -1; - - /* Check for leading zero */ - if (name[i] == '0' && name[i+1] != ']') - return -1; - - *out_base_name_end = name + (i - 1); - return array_index; -} - - -void -link_invalidate_variable_locations(exec_list *ir) -{ - foreach_in_list(ir_instruction, node, ir) { - ir_variable *const var = node->as_variable(); - - if (var == NULL) - continue; - - /* Only assign locations for variables that lack an explicit location. - * Explicit locations are set for all built-in variables, generic vertex - * shader inputs (via layout(location=...)), and generic fragment shader - * outputs (also via layout(location=...)). - */ - if (!var->data.explicit_location) { - var->data.location = -1; - var->data.location_frac = 0; - } - - /* ir_variable::is_unmatched_generic_inout is used by the linker while - * connecting outputs from one stage to inputs of the next stage. - */ - if (var->data.explicit_location && - var->data.location < VARYING_SLOT_VAR0) { - var->data.is_unmatched_generic_inout = 0; - } else { - var->data.is_unmatched_generic_inout = 1; - } - } -} - - -/** - * Set clip_distance_array_size based on the given shader. - * - * Also check for errors based on incorrect usage of gl_ClipVertex and - * gl_ClipDistance. - * - * Return false if an error was reported. - */ -static void -analyze_clip_usage(struct gl_shader_program *prog, - struct gl_shader *shader, - GLuint *clip_distance_array_size) -{ - *clip_distance_array_size = 0; - - if (!prog->IsES && prog->Version >= 130) { - /* From section 7.1 (Vertex Shader Special Variables) of the - * GLSL 1.30 spec: - * - * "It is an error for a shader to statically write both - * gl_ClipVertex and gl_ClipDistance." - * - * This does not apply to GLSL ES shaders, since GLSL ES defines neither - * gl_ClipVertex nor gl_ClipDistance. - */ - find_assignment_visitor clip_vertex("gl_ClipVertex"); - find_assignment_visitor clip_distance("gl_ClipDistance"); - - clip_vertex.run(shader->ir); - clip_distance.run(shader->ir); - if (clip_vertex.variable_found() && clip_distance.variable_found()) { - linker_error(prog, "%s shader writes to both `gl_ClipVertex' " - "and `gl_ClipDistance'\n", - _mesa_shader_stage_to_string(shader->Stage)); - return; - } - - if (clip_distance.variable_found()) { - ir_variable *clip_distance_var = - shader->symbols->get_variable("gl_ClipDistance"); - - assert(clip_distance_var); - *clip_distance_array_size = clip_distance_var->type->length; - } - } -} - - -/** - * Verify that a vertex shader executable meets all semantic requirements. - * - * Also sets prog->Vert.ClipDistanceArraySize as a side effect. - * - * \param shader Vertex shader executable to be verified - */ -void -validate_vertex_shader_executable(struct gl_shader_program *prog, - struct gl_shader *shader) -{ - if (shader == NULL) - return; - - /* From the GLSL 1.10 spec, page 48: - * - * "The variable gl_Position is available only in the vertex - * language and is intended for writing the homogeneous vertex - * position. All executions of a well-formed vertex shader - * executable must write a value into this variable. [...] The - * variable gl_Position is available only in the vertex - * language and is intended for writing the homogeneous vertex - * position. All executions of a well-formed vertex shader - * executable must write a value into this variable." - * - * while in GLSL 1.40 this text is changed to: - * - * "The variable gl_Position is available only in the vertex - * language and is intended for writing the homogeneous vertex - * position. It can be written at any time during shader - * execution. It may also be read back by a vertex shader - * after being written. This value will be used by primitive - * assembly, clipping, culling, and other fixed functionality - * operations, if present, that operate on primitives after - * vertex processing has occurred. Its value is undefined if - * the vertex shader executable does not write gl_Position." - * - * All GLSL ES Versions are similar to GLSL 1.40--failing to write to - * gl_Position is not an error. - */ - if (prog->Version < (prog->IsES ? 300 : 140)) { - find_assignment_visitor find("gl_Position"); - find.run(shader->ir); - if (!find.variable_found()) { - if (prog->IsES) { - linker_warning(prog, - "vertex shader does not write to `gl_Position'." - "It's value is undefined. \n"); - } else { - linker_error(prog, - "vertex shader does not write to `gl_Position'. \n"); - } - return; - } - } - - analyze_clip_usage(prog, shader, &prog->Vert.ClipDistanceArraySize); -} - -void -validate_tess_eval_shader_executable(struct gl_shader_program *prog, - struct gl_shader *shader) -{ - if (shader == NULL) - return; - - analyze_clip_usage(prog, shader, &prog->TessEval.ClipDistanceArraySize); -} - - -/** - * Verify that a fragment shader executable meets all semantic requirements - * - * \param shader Fragment shader executable to be verified - */ -void -validate_fragment_shader_executable(struct gl_shader_program *prog, - struct gl_shader *shader) -{ - if (shader == NULL) - return; - - find_assignment_visitor frag_color("gl_FragColor"); - find_assignment_visitor frag_data("gl_FragData"); - - frag_color.run(shader->ir); - frag_data.run(shader->ir); - - if (frag_color.variable_found() && frag_data.variable_found()) { - linker_error(prog, "fragment shader writes to both " - "`gl_FragColor' and `gl_FragData'\n"); - } -} - -/** - * Verify that a geometry shader executable meets all semantic requirements - * - * Also sets prog->Geom.VerticesIn, and prog->Geom.ClipDistanceArraySize as - * a side effect. - * - * \param shader Geometry shader executable to be verified - */ -void -validate_geometry_shader_executable(struct gl_shader_program *prog, - struct gl_shader *shader) -{ - if (shader == NULL) - return; - - unsigned num_vertices = vertices_per_prim(prog->Geom.InputType); - prog->Geom.VerticesIn = num_vertices; - - analyze_clip_usage(prog, shader, &prog->Geom.ClipDistanceArraySize); -} - -/** - * Check if geometry shaders emit to non-zero streams and do corresponding - * validations. - */ -static void -validate_geometry_shader_emissions(struct gl_context *ctx, - struct gl_shader_program *prog) -{ - if (prog->_LinkedShaders[MESA_SHADER_GEOMETRY] != NULL) { - find_emit_vertex_visitor emit_vertex(ctx->Const.MaxVertexStreams - 1); - emit_vertex.run(prog->_LinkedShaders[MESA_SHADER_GEOMETRY]->ir); - if (emit_vertex.error()) { - linker_error(prog, "Invalid call %s(%d). Accepted values for the " - "stream parameter are in the range [0, %d].\n", - emit_vertex.error_func(), - emit_vertex.error_stream(), - ctx->Const.MaxVertexStreams - 1); - } - prog->Geom.UsesStreams = emit_vertex.uses_streams(); - prog->Geom.UsesEndPrimitive = emit_vertex.uses_end_primitive(); - - /* From the ARB_gpu_shader5 spec: - * - * "Multiple vertex streams are supported only if the output primitive - * type is declared to be "points". A program will fail to link if it - * contains a geometry shader calling EmitStreamVertex() or - * EndStreamPrimitive() if its output primitive type is not "points". - * - * However, in the same spec: - * - * "The function EmitVertex() is equivalent to calling EmitStreamVertex() - * with set to zero." - * - * And: - * - * "The function EndPrimitive() is equivalent to calling - * EndStreamPrimitive() with set to zero." - * - * Since we can call EmitVertex() and EndPrimitive() when we output - * primitives other than points, calling EmitStreamVertex(0) or - * EmitEndPrimitive(0) should not produce errors. This it also what Nvidia - * does. Currently we only set prog->Geom.UsesStreams to TRUE when - * EmitStreamVertex() or EmitEndPrimitive() are called with a non-zero - * stream. - */ - if (prog->Geom.UsesStreams && prog->Geom.OutputType != GL_POINTS) { - linker_error(prog, "EmitStreamVertex(n) and EndStreamPrimitive(n) " - "with n>0 requires point output\n"); - } - } -} - -bool -validate_intrastage_arrays(struct gl_shader_program *prog, - ir_variable *const var, - ir_variable *const existing) -{ - /* Consider the types to be "the same" if both types are arrays - * of the same type and one of the arrays is implicitly sized. - * In addition, set the type of the linked variable to the - * explicitly sized array. - */ - if (var->type->is_array() && existing->type->is_array()) { - if ((var->type->fields.array == existing->type->fields.array) && - ((var->type->length == 0)|| (existing->type->length == 0))) { - if (var->type->length != 0) { - if (var->type->length <= existing->data.max_array_access) { - linker_error(prog, "%s `%s' declared as type " - "`%s' but outermost dimension has an index" - " of `%i'\n", - mode_string(var), - var->name, var->type->name, - existing->data.max_array_access); - } - existing->type = var->type; - return true; - } else if (existing->type->length != 0) { - if(existing->type->length <= var->data.max_array_access && - !existing->data.from_ssbo_unsized_array) { - linker_error(prog, "%s `%s' declared as type " - "`%s' but outermost dimension has an index" - " of `%i'\n", - mode_string(var), - var->name, existing->type->name, - var->data.max_array_access); - } - return true; - } - } else { - /* The arrays of structs could have different glsl_type pointers but - * they are actually the same type. Use record_compare() to check that. - */ - if (existing->type->fields.array->is_record() && - var->type->fields.array->is_record() && - existing->type->fields.array->record_compare(var->type->fields.array)) - return true; - } - } - return false; -} - - -/** - * Perform validation of global variables used across multiple shaders - */ -void -cross_validate_globals(struct gl_shader_program *prog, - struct gl_shader **shader_list, - unsigned num_shaders, - bool uniforms_only) -{ - /* Examine all of the uniforms in all of the shaders and cross validate - * them. - */ - glsl_symbol_table variables; - for (unsigned i = 0; i < num_shaders; i++) { - if (shader_list[i] == NULL) - continue; - - foreach_in_list(ir_instruction, node, shader_list[i]->ir) { - ir_variable *const var = node->as_variable(); - - if (var == NULL) - continue; - - if (uniforms_only && (var->data.mode != ir_var_uniform && var->data.mode != ir_var_shader_storage)) - continue; - - /* don't cross validate subroutine uniforms */ - if (var->type->contains_subroutine()) - continue; - - /* Don't cross validate temporaries that are at global scope. These - * will eventually get pulled into the shaders 'main'. - */ - if (var->data.mode == ir_var_temporary) - continue; - - /* If a global with this name has already been seen, verify that the - * new instance has the same type. In addition, if the globals have - * initializers, the values of the initializers must be the same. - */ - ir_variable *const existing = variables.get_variable(var->name); - if (existing != NULL) { - /* Check if types match. Interface blocks have some special - * rules so we handle those elsewhere. - */ - if (var->type != existing->type && - !var->is_interface_instance()) { - if (!validate_intrastage_arrays(prog, var, existing)) { - if (var->type->is_record() && existing->type->is_record() - && existing->type->record_compare(var->type)) { - existing->type = var->type; - } else { - /* If it is an unsized array in a Shader Storage Block, - * two different shaders can access to different elements. - * Because of that, they might be converted to different - * sized arrays, then check that they are compatible but - * ignore the array size. - */ - if (!(var->data.mode == ir_var_shader_storage && - var->data.from_ssbo_unsized_array && - existing->data.mode == ir_var_shader_storage && - existing->data.from_ssbo_unsized_array && - var->type->gl_type == existing->type->gl_type)) { - linker_error(prog, "%s `%s' declared as type " - "`%s' and type `%s'\n", - mode_string(var), - var->name, var->type->name, - existing->type->name); - return; - } - } - } - } - - if (var->data.explicit_location) { - if (existing->data.explicit_location - && (var->data.location != existing->data.location)) { - linker_error(prog, "explicit locations for %s " - "`%s' have differing values\n", - mode_string(var), var->name); - return; - } - - existing->data.location = var->data.location; - existing->data.explicit_location = true; - } else { - /* Check if uniform with implicit location was marked explicit - * by earlier shader stage. If so, mark it explicit in this stage - * too to make sure later processing does not treat it as - * implicit one. - */ - if (existing->data.explicit_location) { - var->data.location = existing->data.location; - var->data.explicit_location = true; - } - } - - /* From the GLSL 4.20 specification: - * "A link error will result if two compilation units in a program - * specify different integer-constant bindings for the same - * opaque-uniform name. However, it is not an error to specify a - * binding on some but not all declarations for the same name" - */ - if (var->data.explicit_binding) { - if (existing->data.explicit_binding && - var->data.binding != existing->data.binding) { - linker_error(prog, "explicit bindings for %s " - "`%s' have differing values\n", - mode_string(var), var->name); - return; - } - - existing->data.binding = var->data.binding; - existing->data.explicit_binding = true; - } - - if (var->type->contains_atomic() && - var->data.offset != existing->data.offset) { - linker_error(prog, "offset specifications for %s " - "`%s' have differing values\n", - mode_string(var), var->name); - return; - } - - /* Validate layout qualifiers for gl_FragDepth. - * - * From the AMD/ARB_conservative_depth specs: - * - * "If gl_FragDepth is redeclared in any fragment shader in a - * program, it must be redeclared in all fragment shaders in - * that program that have static assignments to - * gl_FragDepth. All redeclarations of gl_FragDepth in all - * fragment shaders in a single program must have the same set - * of qualifiers." - */ - if (strcmp(var->name, "gl_FragDepth") == 0) { - bool layout_declared = var->data.depth_layout != ir_depth_layout_none; - bool layout_differs = - var->data.depth_layout != existing->data.depth_layout; - - if (layout_declared && layout_differs) { - linker_error(prog, - "All redeclarations of gl_FragDepth in all " - "fragment shaders in a single program must have " - "the same set of qualifiers.\n"); - } - - if (var->data.used && layout_differs) { - linker_error(prog, - "If gl_FragDepth is redeclared with a layout " - "qualifier in any fragment shader, it must be " - "redeclared with the same layout qualifier in " - "all fragment shaders that have assignments to " - "gl_FragDepth\n"); - } - } - - /* Page 35 (page 41 of the PDF) of the GLSL 4.20 spec says: - * - * "If a shared global has multiple initializers, the - * initializers must all be constant expressions, and they - * must all have the same value. Otherwise, a link error will - * result. (A shared global having only one initializer does - * not require that initializer to be a constant expression.)" - * - * Previous to 4.20 the GLSL spec simply said that initializers - * must have the same value. In this case of non-constant - * initializers, this was impossible to determine. As a result, - * no vendor actually implemented that behavior. The 4.20 - * behavior matches the implemented behavior of at least one other - * vendor, so we'll implement that for all GLSL versions. - */ - if (var->constant_initializer != NULL) { - if (existing->constant_initializer != NULL) { - if (!var->constant_initializer->has_value(existing->constant_initializer)) { - linker_error(prog, "initializers for %s " - "`%s' have differing values\n", - mode_string(var), var->name); - return; - } - } else { - /* If the first-seen instance of a particular uniform did not - * have an initializer but a later instance does, copy the - * initializer to the version stored in the symbol table. - */ - /* FINISHME: This is wrong. The constant_value field should - * FINISHME: not be modified! Imagine a case where a shader - * FINISHME: without an initializer is linked in two different - * FINISHME: programs with shaders that have differing - * FINISHME: initializers. Linking with the first will - * FINISHME: modify the shader, and linking with the second - * FINISHME: will fail. - */ - existing->constant_initializer = - var->constant_initializer->clone(ralloc_parent(existing), - NULL); - } - } - - if (var->data.has_initializer) { - if (existing->data.has_initializer - && (var->constant_initializer == NULL - || existing->constant_initializer == NULL)) { - linker_error(prog, - "shared global variable `%s' has multiple " - "non-constant initializers.\n", - var->name); - return; - } - - /* Some instance had an initializer, so keep track of that. In - * this location, all sorts of initializers (constant or - * otherwise) will propagate the existence to the variable - * stored in the symbol table. - */ - existing->data.has_initializer = true; - } - - if (existing->data.invariant != var->data.invariant) { - linker_error(prog, "declarations for %s `%s' have " - "mismatching invariant qualifiers\n", - mode_string(var), var->name); - return; - } - if (existing->data.centroid != var->data.centroid) { - linker_error(prog, "declarations for %s `%s' have " - "mismatching centroid qualifiers\n", - mode_string(var), var->name); - return; - } - if (existing->data.sample != var->data.sample) { - linker_error(prog, "declarations for %s `%s` have " - "mismatching sample qualifiers\n", - mode_string(var), var->name); - return; - } - if (existing->data.image_format != var->data.image_format) { - linker_error(prog, "declarations for %s `%s` have " - "mismatching image format qualifiers\n", - mode_string(var), var->name); - return; - } - } else - variables.add_variable(var); - } - } -} - - -/** - * Perform validation of uniforms used across multiple shader stages - */ -void -cross_validate_uniforms(struct gl_shader_program *prog) -{ - cross_validate_globals(prog, prog->_LinkedShaders, - MESA_SHADER_STAGES, true); -} - -/** - * Accumulates the array of prog->BufferInterfaceBlocks and checks that all - * definitons of blocks agree on their contents. - */ -static bool -interstage_cross_validate_uniform_blocks(struct gl_shader_program *prog) -{ - unsigned max_num_uniform_blocks = 0; - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i]) - max_num_uniform_blocks += prog->_LinkedShaders[i]->NumBufferInterfaceBlocks; - } - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *sh = prog->_LinkedShaders[i]; - - prog->InterfaceBlockStageIndex[i] = ralloc_array(prog, int, - max_num_uniform_blocks); - for (unsigned int j = 0; j < max_num_uniform_blocks; j++) - prog->InterfaceBlockStageIndex[i][j] = -1; - - if (sh == NULL) - continue; - - for (unsigned int j = 0; j < sh->NumBufferInterfaceBlocks; j++) { - int index = link_cross_validate_uniform_block(prog, - &prog->BufferInterfaceBlocks, - &prog->NumBufferInterfaceBlocks, - &sh->BufferInterfaceBlocks[j]); - - if (index == -1) { - linker_error(prog, "uniform block `%s' has mismatching definitions\n", - sh->BufferInterfaceBlocks[j].Name); - return false; - } - - prog->InterfaceBlockStageIndex[i][index] = j; - } - } - - return true; -} - - -/** - * Populates a shaders symbol table with all global declarations - */ -static void -populate_symbol_table(gl_shader *sh) -{ - sh->symbols = new(sh) glsl_symbol_table; - - foreach_in_list(ir_instruction, inst, sh->ir) { - ir_variable *var; - ir_function *func; - - if ((func = inst->as_function()) != NULL) { - sh->symbols->add_function(func); - } else if ((var = inst->as_variable()) != NULL) { - if (var->data.mode != ir_var_temporary) - sh->symbols->add_variable(var); - } - } -} - - -/** - * Remap variables referenced in an instruction tree - * - * This is used when instruction trees are cloned from one shader and placed in - * another. These trees will contain references to \c ir_variable nodes that - * do not exist in the target shader. This function finds these \c ir_variable - * references and replaces the references with matching variables in the target - * shader. - * - * If there is no matching variable in the target shader, a clone of the - * \c ir_variable is made and added to the target shader. The new variable is - * added to \b both the instruction stream and the symbol table. - * - * \param inst IR tree that is to be processed. - * \param symbols Symbol table containing global scope symbols in the - * linked shader. - * \param instructions Instruction stream where new variable declarations - * should be added. - */ -void -remap_variables(ir_instruction *inst, struct gl_shader *target, - hash_table *temps) -{ - class remap_visitor : public ir_hierarchical_visitor { - public: - remap_visitor(struct gl_shader *target, - hash_table *temps) - { - this->target = target; - this->symbols = target->symbols; - this->instructions = target->ir; - this->temps = temps; - } - - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - if (ir->var->data.mode == ir_var_temporary) { - ir_variable *var = (ir_variable *) hash_table_find(temps, ir->var); - - assert(var != NULL); - ir->var = var; - return visit_continue; - } - - ir_variable *const existing = - this->symbols->get_variable(ir->var->name); - if (existing != NULL) - ir->var = existing; - else { - ir_variable *copy = ir->var->clone(this->target, NULL); - - this->symbols->add_variable(copy); - this->instructions->push_head(copy); - ir->var = copy; - } - - return visit_continue; - } - - private: - struct gl_shader *target; - glsl_symbol_table *symbols; - exec_list *instructions; - hash_table *temps; - }; - - remap_visitor v(target, temps); - - inst->accept(&v); -} - - -/** - * Move non-declarations from one instruction stream to another - * - * The intended usage pattern of this function is to pass the pointer to the - * head sentinel of a list (i.e., a pointer to the list cast to an \c exec_node - * pointer) for \c last and \c false for \c make_copies on the first - * call. Successive calls pass the return value of the previous call for - * \c last and \c true for \c make_copies. - * - * \param instructions Source instruction stream - * \param last Instruction after which new instructions should be - * inserted in the target instruction stream - * \param make_copies Flag selecting whether instructions in \c instructions - * should be copied (via \c ir_instruction::clone) into the - * target list or moved. - * - * \return - * The new "last" instruction in the target instruction stream. This pointer - * is suitable for use as the \c last parameter of a later call to this - * function. - */ -exec_node * -move_non_declarations(exec_list *instructions, exec_node *last, - bool make_copies, gl_shader *target) -{ - hash_table *temps = NULL; - - if (make_copies) - temps = hash_table_ctor(0, hash_table_pointer_hash, - hash_table_pointer_compare); - - foreach_in_list_safe(ir_instruction, inst, instructions) { - if (inst->as_function()) - continue; - - ir_variable *var = inst->as_variable(); - if ((var != NULL) && (var->data.mode != ir_var_temporary)) - continue; - - assert(inst->as_assignment() - || inst->as_call() - || inst->as_if() /* for initializers with the ?: operator */ - || ((var != NULL) && (var->data.mode == ir_var_temporary))); - - if (make_copies) { - inst = inst->clone(target, NULL); - - if (var != NULL) - hash_table_insert(temps, inst, var); - else - remap_variables(inst, target, temps); - } else { - inst->remove(); - } - - last->insert_after(inst); - last = inst; - } - - if (make_copies) - hash_table_dtor(temps); - - return last; -} - - -/** - * This class is only used in link_intrastage_shaders() below but declaring - * it inside that function leads to compiler warnings with some versions of - * gcc. - */ -class array_sizing_visitor : public ir_hierarchical_visitor { -public: - array_sizing_visitor() - : mem_ctx(ralloc_context(NULL)), - unnamed_interfaces(hash_table_ctor(0, hash_table_pointer_hash, - hash_table_pointer_compare)) - { - } - - ~array_sizing_visitor() - { - hash_table_dtor(this->unnamed_interfaces); - ralloc_free(this->mem_ctx); - } - - virtual ir_visitor_status visit(ir_variable *var) - { - const glsl_type *type_without_array; - fixup_type(&var->type, var->data.max_array_access, - var->data.from_ssbo_unsized_array); - type_without_array = var->type->without_array(); - if (var->type->is_interface()) { - if (interface_contains_unsized_arrays(var->type)) { - const glsl_type *new_type = - resize_interface_members(var->type, - var->get_max_ifc_array_access(), - var->is_in_shader_storage_block()); - var->type = new_type; - var->change_interface_type(new_type); - } - } else if (type_without_array->is_interface()) { - if (interface_contains_unsized_arrays(type_without_array)) { - const glsl_type *new_type = - resize_interface_members(type_without_array, - var->get_max_ifc_array_access(), - var->is_in_shader_storage_block()); - var->change_interface_type(new_type); - var->type = update_interface_members_array(var->type, new_type); - } - } else if (const glsl_type *ifc_type = var->get_interface_type()) { - /* Store a pointer to the variable in the unnamed_interfaces - * hashtable. - */ - ir_variable **interface_vars = (ir_variable **) - hash_table_find(this->unnamed_interfaces, ifc_type); - if (interface_vars == NULL) { - interface_vars = rzalloc_array(mem_ctx, ir_variable *, - ifc_type->length); - hash_table_insert(this->unnamed_interfaces, interface_vars, - ifc_type); - } - unsigned index = ifc_type->field_index(var->name); - assert(index < ifc_type->length); - assert(interface_vars[index] == NULL); - interface_vars[index] = var; - } - return visit_continue; - } - - /** - * For each unnamed interface block that was discovered while running the - * visitor, adjust the interface type to reflect the newly assigned array - * sizes, and fix up the ir_variable nodes to point to the new interface - * type. - */ - void fixup_unnamed_interface_types() - { - hash_table_call_foreach(this->unnamed_interfaces, - fixup_unnamed_interface_type, NULL); - } - -private: - /** - * If the type pointed to by \c type represents an unsized array, replace - * it with a sized array whose size is determined by max_array_access. - */ - static void fixup_type(const glsl_type **type, unsigned max_array_access, - bool from_ssbo_unsized_array) - { - if (!from_ssbo_unsized_array && (*type)->is_unsized_array()) { - *type = glsl_type::get_array_instance((*type)->fields.array, - max_array_access + 1); - assert(*type != NULL); - } - } - - static const glsl_type * - update_interface_members_array(const glsl_type *type, - const glsl_type *new_interface_type) - { - const glsl_type *element_type = type->fields.array; - if (element_type->is_array()) { - const glsl_type *new_array_type = - update_interface_members_array(element_type, new_interface_type); - return glsl_type::get_array_instance(new_array_type, type->length); - } else { - return glsl_type::get_array_instance(new_interface_type, - type->length); - } - } - - /** - * Determine whether the given interface type contains unsized arrays (if - * it doesn't, array_sizing_visitor doesn't need to process it). - */ - static bool interface_contains_unsized_arrays(const glsl_type *type) - { - for (unsigned i = 0; i < type->length; i++) { - const glsl_type *elem_type = type->fields.structure[i].type; - if (elem_type->is_unsized_array()) - return true; - } - return false; - } - - /** - * Create a new interface type based on the given type, with unsized arrays - * replaced by sized arrays whose size is determined by - * max_ifc_array_access. - */ - static const glsl_type * - resize_interface_members(const glsl_type *type, - const unsigned *max_ifc_array_access, - bool is_ssbo) - { - unsigned num_fields = type->length; - glsl_struct_field *fields = new glsl_struct_field[num_fields]; - memcpy(fields, type->fields.structure, - num_fields * sizeof(*fields)); - for (unsigned i = 0; i < num_fields; i++) { - /* If SSBO last member is unsized array, we don't replace it by a sized - * array. - */ - if (is_ssbo && i == (num_fields - 1)) - fixup_type(&fields[i].type, max_ifc_array_access[i], - true); - else - fixup_type(&fields[i].type, max_ifc_array_access[i], - false); - } - glsl_interface_packing packing = - (glsl_interface_packing) type->interface_packing; - const glsl_type *new_ifc_type = - glsl_type::get_interface_instance(fields, num_fields, - packing, type->name); - delete [] fields; - return new_ifc_type; - } - - static void fixup_unnamed_interface_type(const void *key, void *data, - void *) - { - const glsl_type *ifc_type = (const glsl_type *) key; - ir_variable **interface_vars = (ir_variable **) data; - unsigned num_fields = ifc_type->length; - glsl_struct_field *fields = new glsl_struct_field[num_fields]; - memcpy(fields, ifc_type->fields.structure, - num_fields * sizeof(*fields)); - bool interface_type_changed = false; - for (unsigned i = 0; i < num_fields; i++) { - if (interface_vars[i] != NULL && - fields[i].type != interface_vars[i]->type) { - fields[i].type = interface_vars[i]->type; - interface_type_changed = true; - } - } - if (!interface_type_changed) { - delete [] fields; - return; - } - glsl_interface_packing packing = - (glsl_interface_packing) ifc_type->interface_packing; - const glsl_type *new_ifc_type = - glsl_type::get_interface_instance(fields, num_fields, packing, - ifc_type->name); - delete [] fields; - for (unsigned i = 0; i < num_fields; i++) { - if (interface_vars[i] != NULL) - interface_vars[i]->change_interface_type(new_ifc_type); - } - } - - /** - * Memory context used to allocate the data in \c unnamed_interfaces. - */ - void *mem_ctx; - - /** - * Hash table from const glsl_type * to an array of ir_variable *'s - * pointing to the ir_variables constituting each unnamed interface block. - */ - hash_table *unnamed_interfaces; -}; - - -/** - * Performs the cross-validation of tessellation control shader vertices and - * layout qualifiers for the attached tessellation control shaders, - * and propagates them to the linked TCS and linked shader program. - */ -static void -link_tcs_out_layout_qualifiers(struct gl_shader_program *prog, - struct gl_shader *linked_shader, - struct gl_shader **shader_list, - unsigned num_shaders) -{ - linked_shader->TessCtrl.VerticesOut = 0; - - if (linked_shader->Stage != MESA_SHADER_TESS_CTRL) - return; - - /* From the GLSL 4.0 spec (chapter 4.3.8.2): - * - * "All tessellation control shader layout declarations in a program - * must specify the same output patch vertex count. There must be at - * least one layout qualifier specifying an output patch vertex count - * in any program containing tessellation control shaders; however, - * such a declaration is not required in all tessellation control - * shaders." - */ - - for (unsigned i = 0; i < num_shaders; i++) { - struct gl_shader *shader = shader_list[i]; - - if (shader->TessCtrl.VerticesOut != 0) { - if (linked_shader->TessCtrl.VerticesOut != 0 && - linked_shader->TessCtrl.VerticesOut != shader->TessCtrl.VerticesOut) { - linker_error(prog, "tessellation control shader defined with " - "conflicting output vertex count (%d and %d)\n", - linked_shader->TessCtrl.VerticesOut, - shader->TessCtrl.VerticesOut); - return; - } - linked_shader->TessCtrl.VerticesOut = shader->TessCtrl.VerticesOut; - } - } - - /* Just do the intrastage -> interstage propagation right now, - * since we already know we're in the right type of shader program - * for doing it. - */ - if (linked_shader->TessCtrl.VerticesOut == 0) { - linker_error(prog, "tessellation control shader didn't declare " - "vertices out layout qualifier\n"); - return; - } - prog->TessCtrl.VerticesOut = linked_shader->TessCtrl.VerticesOut; -} - - -/** - * Performs the cross-validation of tessellation evaluation shader - * primitive type, vertex spacing, ordering and point_mode layout qualifiers - * for the attached tessellation evaluation shaders, and propagates them - * to the linked TES and linked shader program. - */ -static void -link_tes_in_layout_qualifiers(struct gl_shader_program *prog, - struct gl_shader *linked_shader, - struct gl_shader **shader_list, - unsigned num_shaders) -{ - linked_shader->TessEval.PrimitiveMode = PRIM_UNKNOWN; - linked_shader->TessEval.Spacing = 0; - linked_shader->TessEval.VertexOrder = 0; - linked_shader->TessEval.PointMode = -1; - - if (linked_shader->Stage != MESA_SHADER_TESS_EVAL) - return; - - /* From the GLSL 4.0 spec (chapter 4.3.8.1): - * - * "At least one tessellation evaluation shader (compilation unit) in - * a program must declare a primitive mode in its input layout. - * Declaration vertex spacing, ordering, and point mode identifiers is - * optional. It is not required that all tessellation evaluation - * shaders in a program declare a primitive mode. If spacing or - * vertex ordering declarations are omitted, the tessellation - * primitive generator will use equal spacing or counter-clockwise - * vertex ordering, respectively. If a point mode declaration is - * omitted, the tessellation primitive generator will produce lines or - * triangles according to the primitive mode." - */ - - for (unsigned i = 0; i < num_shaders; i++) { - struct gl_shader *shader = shader_list[i]; - - if (shader->TessEval.PrimitiveMode != PRIM_UNKNOWN) { - if (linked_shader->TessEval.PrimitiveMode != PRIM_UNKNOWN && - linked_shader->TessEval.PrimitiveMode != shader->TessEval.PrimitiveMode) { - linker_error(prog, "tessellation evaluation shader defined with " - "conflicting input primitive modes.\n"); - return; - } - linked_shader->TessEval.PrimitiveMode = shader->TessEval.PrimitiveMode; - } - - if (shader->TessEval.Spacing != 0) { - if (linked_shader->TessEval.Spacing != 0 && - linked_shader->TessEval.Spacing != shader->TessEval.Spacing) { - linker_error(prog, "tessellation evaluation shader defined with " - "conflicting vertex spacing.\n"); - return; - } - linked_shader->TessEval.Spacing = shader->TessEval.Spacing; - } - - if (shader->TessEval.VertexOrder != 0) { - if (linked_shader->TessEval.VertexOrder != 0 && - linked_shader->TessEval.VertexOrder != shader->TessEval.VertexOrder) { - linker_error(prog, "tessellation evaluation shader defined with " - "conflicting ordering.\n"); - return; - } - linked_shader->TessEval.VertexOrder = shader->TessEval.VertexOrder; - } - - if (shader->TessEval.PointMode != -1) { - if (linked_shader->TessEval.PointMode != -1 && - linked_shader->TessEval.PointMode != shader->TessEval.PointMode) { - linker_error(prog, "tessellation evaluation shader defined with " - "conflicting point modes.\n"); - return; - } - linked_shader->TessEval.PointMode = shader->TessEval.PointMode; - } - - } - - /* Just do the intrastage -> interstage propagation right now, - * since we already know we're in the right type of shader program - * for doing it. - */ - if (linked_shader->TessEval.PrimitiveMode == PRIM_UNKNOWN) { - linker_error(prog, - "tessellation evaluation shader didn't declare input " - "primitive modes.\n"); - return; - } - prog->TessEval.PrimitiveMode = linked_shader->TessEval.PrimitiveMode; - - if (linked_shader->TessEval.Spacing == 0) - linked_shader->TessEval.Spacing = GL_EQUAL; - prog->TessEval.Spacing = linked_shader->TessEval.Spacing; - - if (linked_shader->TessEval.VertexOrder == 0) - linked_shader->TessEval.VertexOrder = GL_CCW; - prog->TessEval.VertexOrder = linked_shader->TessEval.VertexOrder; - - if (linked_shader->TessEval.PointMode == -1) - linked_shader->TessEval.PointMode = GL_FALSE; - prog->TessEval.PointMode = linked_shader->TessEval.PointMode; -} - - -/** - * Performs the cross-validation of layout qualifiers specified in - * redeclaration of gl_FragCoord for the attached fragment shaders, - * and propagates them to the linked FS and linked shader program. - */ -static void -link_fs_input_layout_qualifiers(struct gl_shader_program *prog, - struct gl_shader *linked_shader, - struct gl_shader **shader_list, - unsigned num_shaders) -{ - linked_shader->redeclares_gl_fragcoord = false; - linked_shader->uses_gl_fragcoord = false; - linked_shader->origin_upper_left = false; - linked_shader->pixel_center_integer = false; - - if (linked_shader->Stage != MESA_SHADER_FRAGMENT || - (prog->Version < 150 && !prog->ARB_fragment_coord_conventions_enable)) - return; - - for (unsigned i = 0; i < num_shaders; i++) { - struct gl_shader *shader = shader_list[i]; - /* From the GLSL 1.50 spec, page 39: - * - * "If gl_FragCoord is redeclared in any fragment shader in a program, - * it must be redeclared in all the fragment shaders in that program - * that have a static use gl_FragCoord." - */ - if ((linked_shader->redeclares_gl_fragcoord - && !shader->redeclares_gl_fragcoord - && shader->uses_gl_fragcoord) - || (shader->redeclares_gl_fragcoord - && !linked_shader->redeclares_gl_fragcoord - && linked_shader->uses_gl_fragcoord)) { - linker_error(prog, "fragment shader defined with conflicting " - "layout qualifiers for gl_FragCoord\n"); - } - - /* From the GLSL 1.50 spec, page 39: - * - * "All redeclarations of gl_FragCoord in all fragment shaders in a - * single program must have the same set of qualifiers." - */ - if (linked_shader->redeclares_gl_fragcoord && shader->redeclares_gl_fragcoord - && (shader->origin_upper_left != linked_shader->origin_upper_left - || shader->pixel_center_integer != linked_shader->pixel_center_integer)) { - linker_error(prog, "fragment shader defined with conflicting " - "layout qualifiers for gl_FragCoord\n"); - } - - /* Update the linked shader state. Note that uses_gl_fragcoord should - * accumulate the results. The other values should replace. If there - * are multiple redeclarations, all the fields except uses_gl_fragcoord - * are already known to be the same. - */ - if (shader->redeclares_gl_fragcoord || shader->uses_gl_fragcoord) { - linked_shader->redeclares_gl_fragcoord = - shader->redeclares_gl_fragcoord; - linked_shader->uses_gl_fragcoord = linked_shader->uses_gl_fragcoord - || shader->uses_gl_fragcoord; - linked_shader->origin_upper_left = shader->origin_upper_left; - linked_shader->pixel_center_integer = shader->pixel_center_integer; - } - - linked_shader->EarlyFragmentTests |= shader->EarlyFragmentTests; - } -} - -/** - * Performs the cross-validation of geometry shader max_vertices and - * primitive type layout qualifiers for the attached geometry shaders, - * and propagates them to the linked GS and linked shader program. - */ -static void -link_gs_inout_layout_qualifiers(struct gl_shader_program *prog, - struct gl_shader *linked_shader, - struct gl_shader **shader_list, - unsigned num_shaders) -{ - linked_shader->Geom.VerticesOut = 0; - linked_shader->Geom.Invocations = 0; - linked_shader->Geom.InputType = PRIM_UNKNOWN; - linked_shader->Geom.OutputType = PRIM_UNKNOWN; - - /* No in/out qualifiers defined for anything but GLSL 1.50+ - * geometry shaders so far. - */ - if (linked_shader->Stage != MESA_SHADER_GEOMETRY || prog->Version < 150) - return; - - /* From the GLSL 1.50 spec, page 46: - * - * "All geometry shader output layout declarations in a program - * must declare the same layout and same value for - * max_vertices. There must be at least one geometry output - * layout declaration somewhere in a program, but not all - * geometry shaders (compilation units) are required to - * declare it." - */ - - for (unsigned i = 0; i < num_shaders; i++) { - struct gl_shader *shader = shader_list[i]; - - if (shader->Geom.InputType != PRIM_UNKNOWN) { - if (linked_shader->Geom.InputType != PRIM_UNKNOWN && - linked_shader->Geom.InputType != shader->Geom.InputType) { - linker_error(prog, "geometry shader defined with conflicting " - "input types\n"); - return; - } - linked_shader->Geom.InputType = shader->Geom.InputType; - } - - if (shader->Geom.OutputType != PRIM_UNKNOWN) { - if (linked_shader->Geom.OutputType != PRIM_UNKNOWN && - linked_shader->Geom.OutputType != shader->Geom.OutputType) { - linker_error(prog, "geometry shader defined with conflicting " - "output types\n"); - return; - } - linked_shader->Geom.OutputType = shader->Geom.OutputType; - } - - if (shader->Geom.VerticesOut != 0) { - if (linked_shader->Geom.VerticesOut != 0 && - linked_shader->Geom.VerticesOut != shader->Geom.VerticesOut) { - linker_error(prog, "geometry shader defined with conflicting " - "output vertex count (%d and %d)\n", - linked_shader->Geom.VerticesOut, - shader->Geom.VerticesOut); - return; - } - linked_shader->Geom.VerticesOut = shader->Geom.VerticesOut; - } - - if (shader->Geom.Invocations != 0) { - if (linked_shader->Geom.Invocations != 0 && - linked_shader->Geom.Invocations != shader->Geom.Invocations) { - linker_error(prog, "geometry shader defined with conflicting " - "invocation count (%d and %d)\n", - linked_shader->Geom.Invocations, - shader->Geom.Invocations); - return; - } - linked_shader->Geom.Invocations = shader->Geom.Invocations; - } - } - - /* Just do the intrastage -> interstage propagation right now, - * since we already know we're in the right type of shader program - * for doing it. - */ - if (linked_shader->Geom.InputType == PRIM_UNKNOWN) { - linker_error(prog, - "geometry shader didn't declare primitive input type\n"); - return; - } - prog->Geom.InputType = linked_shader->Geom.InputType; - - if (linked_shader->Geom.OutputType == PRIM_UNKNOWN) { - linker_error(prog, - "geometry shader didn't declare primitive output type\n"); - return; - } - prog->Geom.OutputType = linked_shader->Geom.OutputType; - - if (linked_shader->Geom.VerticesOut == 0) { - linker_error(prog, - "geometry shader didn't declare max_vertices\n"); - return; - } - prog->Geom.VerticesOut = linked_shader->Geom.VerticesOut; - - if (linked_shader->Geom.Invocations == 0) - linked_shader->Geom.Invocations = 1; - - prog->Geom.Invocations = linked_shader->Geom.Invocations; -} - - -/** - * Perform cross-validation of compute shader local_size_{x,y,z} layout - * qualifiers for the attached compute shaders, and propagate them to the - * linked CS and linked shader program. - */ -static void -link_cs_input_layout_qualifiers(struct gl_shader_program *prog, - struct gl_shader *linked_shader, - struct gl_shader **shader_list, - unsigned num_shaders) -{ - for (int i = 0; i < 3; i++) - linked_shader->Comp.LocalSize[i] = 0; - - /* This function is called for all shader stages, but it only has an effect - * for compute shaders. - */ - if (linked_shader->Stage != MESA_SHADER_COMPUTE) - return; - - /* From the ARB_compute_shader spec, in the section describing local size - * declarations: - * - * If multiple compute shaders attached to a single program object - * declare local work-group size, the declarations must be identical; - * otherwise a link-time error results. Furthermore, if a program - * object contains any compute shaders, at least one must contain an - * input layout qualifier specifying the local work sizes of the - * program, or a link-time error will occur. - */ - for (unsigned sh = 0; sh < num_shaders; sh++) { - struct gl_shader *shader = shader_list[sh]; - - if (shader->Comp.LocalSize[0] != 0) { - if (linked_shader->Comp.LocalSize[0] != 0) { - for (int i = 0; i < 3; i++) { - if (linked_shader->Comp.LocalSize[i] != - shader->Comp.LocalSize[i]) { - linker_error(prog, "compute shader defined with conflicting " - "local sizes\n"); - return; - } - } - } - for (int i = 0; i < 3; i++) - linked_shader->Comp.LocalSize[i] = shader->Comp.LocalSize[i]; - } - } - - /* Just do the intrastage -> interstage propagation right now, - * since we already know we're in the right type of shader program - * for doing it. - */ - if (linked_shader->Comp.LocalSize[0] == 0) { - linker_error(prog, "compute shader didn't declare local size\n"); - return; - } - for (int i = 0; i < 3; i++) - prog->Comp.LocalSize[i] = linked_shader->Comp.LocalSize[i]; -} - - -/** - * Combine a group of shaders for a single stage to generate a linked shader - * - * \note - * If this function is supplied a single shader, it is cloned, and the new - * shader is returned. - */ -static struct gl_shader * -link_intrastage_shaders(void *mem_ctx, - struct gl_context *ctx, - struct gl_shader_program *prog, - struct gl_shader **shader_list, - unsigned num_shaders) -{ - struct gl_uniform_block *uniform_blocks = NULL; - - /* Check that global variables defined in multiple shaders are consistent. - */ - cross_validate_globals(prog, shader_list, num_shaders, false); - if (!prog->LinkStatus) - return NULL; - - /* Check that interface blocks defined in multiple shaders are consistent. - */ - validate_intrastage_interface_blocks(prog, (const gl_shader **)shader_list, - num_shaders); - if (!prog->LinkStatus) - return NULL; - - /* Link up uniform blocks defined within this stage. */ - const unsigned num_uniform_blocks = - link_uniform_blocks(mem_ctx, ctx, prog, shader_list, num_shaders, - &uniform_blocks); - if (!prog->LinkStatus) - return NULL; - - /* Check that there is only a single definition of each function signature - * across all shaders. - */ - for (unsigned i = 0; i < (num_shaders - 1); i++) { - foreach_in_list(ir_instruction, node, shader_list[i]->ir) { - ir_function *const f = node->as_function(); - - if (f == NULL) - continue; - - for (unsigned j = i + 1; j < num_shaders; j++) { - ir_function *const other = - shader_list[j]->symbols->get_function(f->name); - - /* If the other shader has no function (and therefore no function - * signatures) with the same name, skip to the next shader. - */ - if (other == NULL) - continue; - - foreach_in_list(ir_function_signature, sig, &f->signatures) { - if (!sig->is_defined || sig->is_builtin()) - continue; - - ir_function_signature *other_sig = - other->exact_matching_signature(NULL, &sig->parameters); - - if ((other_sig != NULL) && other_sig->is_defined - && !other_sig->is_builtin()) { - linker_error(prog, "function `%s' is multiply defined\n", - f->name); - return NULL; - } - } - } - } - } - - /* Find the shader that defines main, and make a clone of it. - * - * Starting with the clone, search for undefined references. If one is - * found, find the shader that defines it. Clone the reference and add - * it to the shader. Repeat until there are no undefined references or - * until a reference cannot be resolved. - */ - gl_shader *main = NULL; - for (unsigned i = 0; i < num_shaders; i++) { - if (_mesa_get_main_function_signature(shader_list[i]) != NULL) { - main = shader_list[i]; - break; - } - } - - if (main == NULL) { - linker_error(prog, "%s shader lacks `main'\n", - _mesa_shader_stage_to_string(shader_list[0]->Stage)); - return NULL; - } - - gl_shader *linked = ctx->Driver.NewShader(NULL, 0, main->Type); - linked->ir = new(linked) exec_list; - clone_ir_list(mem_ctx, linked->ir, main->ir); - - linked->BufferInterfaceBlocks = uniform_blocks; - linked->NumBufferInterfaceBlocks = num_uniform_blocks; - ralloc_steal(linked, linked->BufferInterfaceBlocks); - - link_fs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); - link_tcs_out_layout_qualifiers(prog, linked, shader_list, num_shaders); - link_tes_in_layout_qualifiers(prog, linked, shader_list, num_shaders); - link_gs_inout_layout_qualifiers(prog, linked, shader_list, num_shaders); - link_cs_input_layout_qualifiers(prog, linked, shader_list, num_shaders); - - populate_symbol_table(linked); - - /* The pointer to the main function in the final linked shader (i.e., the - * copy of the original shader that contained the main function). - */ - ir_function_signature *const main_sig = - _mesa_get_main_function_signature(linked); - - /* Move any instructions other than variable declarations or function - * declarations into main. - */ - exec_node *insertion_point = - move_non_declarations(linked->ir, (exec_node *) &main_sig->body, false, - linked); - - for (unsigned i = 0; i < num_shaders; i++) { - if (shader_list[i] == main) - continue; - - insertion_point = move_non_declarations(shader_list[i]->ir, - insertion_point, true, linked); - } - - /* Check if any shader needs built-in functions. */ - bool need_builtins = false; - for (unsigned i = 0; i < num_shaders; i++) { - if (shader_list[i]->uses_builtin_functions) { - need_builtins = true; - break; - } - } - - bool ok; - if (need_builtins) { - /* Make a temporary array one larger than shader_list, which will hold - * the built-in function shader as well. - */ - gl_shader **linking_shaders = (gl_shader **) - calloc(num_shaders + 1, sizeof(gl_shader *)); - - ok = linking_shaders != NULL; - - if (ok) { - memcpy(linking_shaders, shader_list, num_shaders * sizeof(gl_shader *)); - linking_shaders[num_shaders] = _mesa_glsl_get_builtin_function_shader(); - - ok = link_function_calls(prog, linked, linking_shaders, num_shaders + 1); - - free(linking_shaders); - } else { - _mesa_error_no_memory(__func__); - } - } else { - ok = link_function_calls(prog, linked, shader_list, num_shaders); - } - - - if (!ok) { - _mesa_delete_shader(ctx, linked); - return NULL; - } - - /* At this point linked should contain all of the linked IR, so - * validate it to make sure nothing went wrong. - */ - validate_ir_tree(linked->ir); - - /* Set the size of geometry shader input arrays */ - if (linked->Stage == MESA_SHADER_GEOMETRY) { - unsigned num_vertices = vertices_per_prim(prog->Geom.InputType); - geom_array_resize_visitor input_resize_visitor(num_vertices, prog); - foreach_in_list(ir_instruction, ir, linked->ir) { - ir->accept(&input_resize_visitor); - } - } - - if (ctx->Const.VertexID_is_zero_based) - lower_vertex_id(linked); - - /* Validate correct usage of barrier() in the tess control shader */ - if (linked->Stage == MESA_SHADER_TESS_CTRL) { - barrier_use_visitor visitor(prog); - foreach_in_list(ir_instruction, ir, linked->ir) { - ir->accept(&visitor); - } - } - - /* Make a pass over all variable declarations to ensure that arrays with - * unspecified sizes have a size specified. The size is inferred from the - * max_array_access field. - */ - array_sizing_visitor v; - v.run(linked->ir); - v.fixup_unnamed_interface_types(); - - return linked; -} - -/** - * Update the sizes of linked shader uniform arrays to the maximum - * array index used. - * - * From page 81 (page 95 of the PDF) of the OpenGL 2.1 spec: - * - * If one or more elements of an array are active, - * GetActiveUniform will return the name of the array in name, - * subject to the restrictions listed above. The type of the array - * is returned in type. The size parameter contains the highest - * array element index used, plus one. The compiler or linker - * determines the highest index used. There will be only one - * active uniform reported by the GL per uniform array. - - */ -static void -update_array_sizes(struct gl_shader_program *prog) -{ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - foreach_in_list(ir_instruction, node, prog->_LinkedShaders[i]->ir) { - ir_variable *const var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != ir_var_uniform) || - !var->type->is_array()) - continue; - - /* GL_ARB_uniform_buffer_object says that std140 uniforms - * will not be eliminated. Since we always do std140, just - * don't resize arrays in UBOs. - * - * Atomic counters are supposed to get deterministic - * locations assigned based on the declaration ordering and - * sizes, array compaction would mess that up. - * - * Subroutine uniforms are not removed. - */ - if (var->is_in_buffer_block() || var->type->contains_atomic() || - var->type->contains_subroutine()) - continue; - - unsigned int size = var->data.max_array_access; - for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { - if (prog->_LinkedShaders[j] == NULL) - continue; - - foreach_in_list(ir_instruction, node2, prog->_LinkedShaders[j]->ir) { - ir_variable *other_var = node2->as_variable(); - if (!other_var) - continue; - - if (strcmp(var->name, other_var->name) == 0 && - other_var->data.max_array_access > size) { - size = other_var->data.max_array_access; - } - } - } - - if (size + 1 != var->type->length) { - /* If this is a built-in uniform (i.e., it's backed by some - * fixed-function state), adjust the number of state slots to - * match the new array size. The number of slots per array entry - * is not known. It seems safe to assume that the total number of - * slots is an integer multiple of the number of array elements. - * Determine the number of slots per array element by dividing by - * the old (total) size. - */ - const unsigned num_slots = var->get_num_state_slots(); - if (num_slots > 0) { - var->set_num_state_slots((size + 1) - * (num_slots / var->type->length)); - } - - var->type = glsl_type::get_array_instance(var->type->fields.array, - size + 1); - /* FINISHME: We should update the types of array - * dereferences of this variable now. - */ - } - } - } -} - -/** - * Resize tessellation evaluation per-vertex inputs to the size of - * tessellation control per-vertex outputs. - */ -static void -resize_tes_inputs(struct gl_context *ctx, - struct gl_shader_program *prog) -{ - if (prog->_LinkedShaders[MESA_SHADER_TESS_EVAL] == NULL) - return; - - gl_shader *const tcs = prog->_LinkedShaders[MESA_SHADER_TESS_CTRL]; - gl_shader *const tes = prog->_LinkedShaders[MESA_SHADER_TESS_EVAL]; - - /* If no control shader is present, then the TES inputs are statically - * sized to MaxPatchVertices; the actual size of the arrays won't be - * known until draw time. - */ - const int num_vertices = tcs - ? tcs->TessCtrl.VerticesOut - : ctx->Const.MaxPatchVertices; - - tess_eval_array_resize_visitor input_resize_visitor(num_vertices, prog); - foreach_in_list(ir_instruction, ir, tes->ir) { - ir->accept(&input_resize_visitor); - } - - if (tcs) { - /* Convert the gl_PatchVerticesIn system value into a constant, since - * the value is known at this point. - */ - foreach_in_list(ir_instruction, ir, tes->ir) { - ir_variable *var = ir->as_variable(); - if (var && var->data.mode == ir_var_system_value && - var->data.location == SYSTEM_VALUE_VERTICES_IN) { - void *mem_ctx = ralloc_parent(var); - var->data.mode = ir_var_auto; - var->data.location = 0; - var->constant_value = new(mem_ctx) ir_constant(num_vertices); - } - } - } -} - -/** - * Find a contiguous set of available bits in a bitmask. - * - * \param used_mask Bits representing used (1) and unused (0) locations - * \param needed_count Number of contiguous bits needed. - * - * \return - * Base location of the available bits on success or -1 on failure. - */ -int -find_available_slots(unsigned used_mask, unsigned needed_count) -{ - unsigned needed_mask = (1 << needed_count) - 1; - const int max_bit_to_test = (8 * sizeof(used_mask)) - needed_count; - - /* The comparison to 32 is redundant, but without it GCC emits "warning: - * cannot optimize possibly infinite loops" for the loop below. - */ - if ((needed_count == 0) || (max_bit_to_test < 0) || (max_bit_to_test > 32)) - return -1; - - for (int i = 0; i <= max_bit_to_test; i++) { - if ((needed_mask & ~used_mask) == needed_mask) - return i; - - needed_mask <<= 1; - } - - return -1; -} - - -/** - * Assign locations for either VS inputs or FS outputs - * - * \param prog Shader program whose variables need locations assigned - * \param constants Driver specific constant values for the program. - * \param target_index Selector for the program target to receive location - * assignmnets. Must be either \c MESA_SHADER_VERTEX or - * \c MESA_SHADER_FRAGMENT. - * - * \return - * If locations are successfully assigned, true is returned. Otherwise an - * error is emitted to the shader link log and false is returned. - */ -bool -assign_attribute_or_color_locations(gl_shader_program *prog, - struct gl_constants *constants, - unsigned target_index) -{ - /* Maximum number of generic locations. This corresponds to either the - * maximum number of draw buffers or the maximum number of generic - * attributes. - */ - unsigned max_index = (target_index == MESA_SHADER_VERTEX) ? - constants->Program[target_index].MaxAttribs : - MAX2(constants->MaxDrawBuffers, constants->MaxDualSourceDrawBuffers); - - /* Mark invalid locations as being used. - */ - unsigned used_locations = (max_index >= 32) - ? ~0 : ~((1 << max_index) - 1); - unsigned double_storage_locations = 0; - - assert((target_index == MESA_SHADER_VERTEX) - || (target_index == MESA_SHADER_FRAGMENT)); - - gl_shader *const sh = prog->_LinkedShaders[target_index]; - if (sh == NULL) - return true; - - /* Operate in a total of four passes. - * - * 1. Invalidate the location assignments for all vertex shader inputs. - * - * 2. Assign locations for inputs that have user-defined (via - * glBindVertexAttribLocation) locations and outputs that have - * user-defined locations (via glBindFragDataLocation). - * - * 3. Sort the attributes without assigned locations by number of slots - * required in decreasing order. Fragmentation caused by attribute - * locations assigned by the application may prevent large attributes - * from having enough contiguous space. - * - * 4. Assign locations to any inputs without assigned locations. - */ - - const int generic_base = (target_index == MESA_SHADER_VERTEX) - ? (int) VERT_ATTRIB_GENERIC0 : (int) FRAG_RESULT_DATA0; - - const enum ir_variable_mode direction = - (target_index == MESA_SHADER_VERTEX) - ? ir_var_shader_in : ir_var_shader_out; - - - /* Temporary storage for the set of attributes that need locations assigned. - */ - struct temp_attr { - unsigned slots; - ir_variable *var; - - /* Used below in the call to qsort. */ - static int compare(const void *a, const void *b) - { - const temp_attr *const l = (const temp_attr *) a; - const temp_attr *const r = (const temp_attr *) b; - - /* Reversed because we want a descending order sort below. */ - return r->slots - l->slots; - } - } to_assign[16]; - - unsigned num_attr = 0; - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *const var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != (unsigned) direction)) - continue; - - if (var->data.explicit_location) { - var->data.is_unmatched_generic_inout = 0; - if ((var->data.location >= (int)(max_index + generic_base)) - || (var->data.location < 0)) { - linker_error(prog, - "invalid explicit location %d specified for `%s'\n", - (var->data.location < 0) - ? var->data.location - : var->data.location - generic_base, - var->name); - return false; - } - } else if (target_index == MESA_SHADER_VERTEX) { - unsigned binding; - - if (prog->AttributeBindings->get(binding, var->name)) { - assert(binding >= VERT_ATTRIB_GENERIC0); - var->data.location = binding; - var->data.is_unmatched_generic_inout = 0; - } - } else if (target_index == MESA_SHADER_FRAGMENT) { - unsigned binding; - unsigned index; - - if (prog->FragDataBindings->get(binding, var->name)) { - assert(binding >= FRAG_RESULT_DATA0); - var->data.location = binding; - var->data.is_unmatched_generic_inout = 0; - - if (prog->FragDataIndexBindings->get(index, var->name)) { - var->data.index = index; - } - } - } - - /* From GL4.5 core spec, section 15.2 (Shader Execution): - * - * "Output binding assignments will cause LinkProgram to fail: - * ... - * If the program has an active output assigned to a location greater - * than or equal to the value of MAX_DUAL_SOURCE_DRAW_BUFFERS and has - * an active output assigned an index greater than or equal to one;" - */ - if (target_index == MESA_SHADER_FRAGMENT && var->data.index >= 1 && - var->data.location - generic_base >= - (int) constants->MaxDualSourceDrawBuffers) { - linker_error(prog, - "output location %d >= GL_MAX_DUAL_SOURCE_DRAW_BUFFERS " - "with index %u for %s\n", - var->data.location - generic_base, var->data.index, - var->name); - return false; - } - - const unsigned slots = var->type->count_attribute_slots(target_index == MESA_SHADER_VERTEX ? true : false); - - /* If the variable is not a built-in and has a location statically - * assigned in the shader (presumably via a layout qualifier), make sure - * that it doesn't collide with other assigned locations. Otherwise, - * add it to the list of variables that need linker-assigned locations. - */ - if (var->data.location != -1) { - if (var->data.location >= generic_base && var->data.index < 1) { - /* From page 61 of the OpenGL 4.0 spec: - * - * "LinkProgram will fail if the attribute bindings assigned - * by BindAttribLocation do not leave not enough space to - * assign a location for an active matrix attribute or an - * active attribute array, both of which require multiple - * contiguous generic attributes." - * - * I think above text prohibits the aliasing of explicit and - * automatic assignments. But, aliasing is allowed in manual - * assignments of attribute locations. See below comments for - * the details. - * - * From OpenGL 4.0 spec, page 61: - * - * "It is possible for an application to bind more than one - * attribute name to the same location. This is referred to as - * aliasing. This will only work if only one of the aliased - * attributes is active in the executable program, or if no - * path through the shader consumes more than one attribute of - * a set of attributes aliased to the same location. A link - * error can occur if the linker determines that every path - * through the shader consumes multiple aliased attributes, - * but implementations are not required to generate an error - * in this case." - * - * From GLSL 4.30 spec, page 54: - * - * "A program will fail to link if any two non-vertex shader - * input variables are assigned to the same location. For - * vertex shaders, multiple input variables may be assigned - * to the same location using either layout qualifiers or via - * the OpenGL API. However, such aliasing is intended only to - * support vertex shaders where each execution path accesses - * at most one input per each location. Implementations are - * permitted, but not required, to generate link-time errors - * if they detect that every path through the vertex shader - * executable accesses multiple inputs assigned to any single - * location. For all shader types, a program will fail to link - * if explicit location assignments leave the linker unable - * to find space for other variables without explicit - * assignments." - * - * From OpenGL ES 3.0 spec, page 56: - * - * "Binding more than one attribute name to the same location - * is referred to as aliasing, and is not permitted in OpenGL - * ES Shading Language 3.00 vertex shaders. LinkProgram will - * fail when this condition exists. However, aliasing is - * possible in OpenGL ES Shading Language 1.00 vertex shaders. - * This will only work if only one of the aliased attributes - * is active in the executable program, or if no path through - * the shader consumes more than one attribute of a set of - * attributes aliased to the same location. A link error can - * occur if the linker determines that every path through the - * shader consumes multiple aliased attributes, but implemen- - * tations are not required to generate an error in this case." - * - * After looking at above references from OpenGL, OpenGL ES and - * GLSL specifications, we allow aliasing of vertex input variables - * in: OpenGL 2.0 (and above) and OpenGL ES 2.0. - * - * NOTE: This is not required by the spec but its worth mentioning - * here that we're not doing anything to make sure that no path - * through the vertex shader executable accesses multiple inputs - * assigned to any single location. - */ - - /* Mask representing the contiguous slots that will be used by - * this attribute. - */ - const unsigned attr = var->data.location - generic_base; - const unsigned use_mask = (1 << slots) - 1; - const char *const string = (target_index == MESA_SHADER_VERTEX) - ? "vertex shader input" : "fragment shader output"; - - /* Generate a link error if the requested locations for this - * attribute exceed the maximum allowed attribute location. - */ - if (attr + slots > max_index) { - linker_error(prog, - "insufficient contiguous locations " - "available for %s `%s' %d %d %d\n", string, - var->name, used_locations, use_mask, attr); - return false; - } - - /* Generate a link error if the set of bits requested for this - * attribute overlaps any previously allocated bits. - */ - if ((~(use_mask << attr) & used_locations) != used_locations) { - if (target_index == MESA_SHADER_FRAGMENT || - (prog->IsES && prog->Version >= 300)) { - linker_error(prog, - "overlapping location is assigned " - "to %s `%s' %d %d %d\n", string, - var->name, used_locations, use_mask, attr); - return false; - } else { - linker_warning(prog, - "overlapping location is assigned " - "to %s `%s' %d %d %d\n", string, - var->name, used_locations, use_mask, attr); - } - } - - used_locations |= (use_mask << attr); - - /* From the GL 4.5 core spec, section 11.1.1 (Vertex Attributes): - * - * "A program with more than the value of MAX_VERTEX_ATTRIBS - * active attribute variables may fail to link, unless - * device-dependent optimizations are able to make the program - * fit within available hardware resources. For the purposes - * of this test, attribute variables of the type dvec3, dvec4, - * dmat2x3, dmat2x4, dmat3, dmat3x4, dmat4x3, and dmat4 may - * count as consuming twice as many attributes as equivalent - * single-precision types. While these types use the same number - * of generic attributes as their single-precision equivalents, - * implementations are permitted to consume two single-precision - * vectors of internal storage for each three- or four-component - * double-precision vector." - * - * Mark this attribute slot as taking up twice as much space - * so we can count it properly against limits. According to - * issue (3) of the GL_ARB_vertex_attrib_64bit behavior, this - * is optional behavior, but it seems preferable. - */ - if (var->type->without_array()->is_dual_slot_double()) - double_storage_locations |= (use_mask << attr); - } - - continue; - } - - to_assign[num_attr].slots = slots; - to_assign[num_attr].var = var; - num_attr++; - } - - if (target_index == MESA_SHADER_VERTEX) { - unsigned total_attribs_size = - _mesa_bitcount(used_locations & ((1 << max_index) - 1)) + - _mesa_bitcount(double_storage_locations); - if (total_attribs_size > max_index) { - linker_error(prog, - "attempt to use %d vertex attribute slots only %d available ", - total_attribs_size, max_index); - return false; - } - } - - /* If all of the attributes were assigned locations by the application (or - * are built-in attributes with fixed locations), return early. This should - * be the common case. - */ - if (num_attr == 0) - return true; - - qsort(to_assign, num_attr, sizeof(to_assign[0]), temp_attr::compare); - - if (target_index == MESA_SHADER_VERTEX) { - /* VERT_ATTRIB_GENERIC0 is a pseudo-alias for VERT_ATTRIB_POS. It can - * only be explicitly assigned by via glBindAttribLocation. Mark it as - * reserved to prevent it from being automatically allocated below. - */ - find_deref_visitor find("gl_Vertex"); - find.run(sh->ir); - if (find.variable_found()) - used_locations |= (1 << 0); - } - - for (unsigned i = 0; i < num_attr; i++) { - /* Mask representing the contiguous slots that will be used by this - * attribute. - */ - const unsigned use_mask = (1 << to_assign[i].slots) - 1; - - int location = find_available_slots(used_locations, to_assign[i].slots); - - if (location < 0) { - const char *const string = (target_index == MESA_SHADER_VERTEX) - ? "vertex shader input" : "fragment shader output"; - - linker_error(prog, - "insufficient contiguous locations " - "available for %s `%s'\n", - string, to_assign[i].var->name); - return false; - } - - to_assign[i].var->data.location = generic_base + location; - to_assign[i].var->data.is_unmatched_generic_inout = 0; - used_locations |= (use_mask << location); - } - - return true; -} - -/** - * Match explicit locations of outputs to inputs and deactivate the - * unmatch flag if found so we don't optimise them away. - */ -static void -match_explicit_outputs_to_inputs(struct gl_shader_program *prog, - gl_shader *producer, - gl_shader *consumer) -{ - glsl_symbol_table parameters; - ir_variable *explicit_locations[MAX_VARYING] = { NULL }; - - /* Find all shader outputs in the "producer" stage. - */ - foreach_in_list(ir_instruction, node, producer->ir) { - ir_variable *const var = node->as_variable(); - - if ((var == NULL) || (var->data.mode != ir_var_shader_out)) - continue; - - if (var->data.explicit_location && - var->data.location >= VARYING_SLOT_VAR0) { - const unsigned idx = var->data.location - VARYING_SLOT_VAR0; - if (explicit_locations[idx] == NULL) - explicit_locations[idx] = var; - } - } - - /* Match inputs to outputs */ - foreach_in_list(ir_instruction, node, consumer->ir) { - ir_variable *const input = node->as_variable(); - - if ((input == NULL) || (input->data.mode != ir_var_shader_in)) - continue; - - ir_variable *output = NULL; - if (input->data.explicit_location - && input->data.location >= VARYING_SLOT_VAR0) { - output = explicit_locations[input->data.location - VARYING_SLOT_VAR0]; - - if (output != NULL){ - input->data.is_unmatched_generic_inout = 0; - output->data.is_unmatched_generic_inout = 0; - } - } - } -} - -/** - * Store the gl_FragDepth layout in the gl_shader_program struct. - */ -static void -store_fragdepth_layout(struct gl_shader_program *prog) -{ - if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { - return; - } - - struct exec_list *ir = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]->ir; - - /* We don't look up the gl_FragDepth symbol directly because if - * gl_FragDepth is not used in the shader, it's removed from the IR. - * However, the symbol won't be removed from the symbol table. - * - * We're only interested in the cases where the variable is NOT removed - * from the IR. - */ - foreach_in_list(ir_instruction, node, ir) { - ir_variable *const var = node->as_variable(); - - if (var == NULL || var->data.mode != ir_var_shader_out) { - continue; - } - - if (strcmp(var->name, "gl_FragDepth") == 0) { - switch (var->data.depth_layout) { - case ir_depth_layout_none: - prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_NONE; - return; - case ir_depth_layout_any: - prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_ANY; - return; - case ir_depth_layout_greater: - prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_GREATER; - return; - case ir_depth_layout_less: - prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_LESS; - return; - case ir_depth_layout_unchanged: - prog->FragDepthLayout = FRAG_DEPTH_LAYOUT_UNCHANGED; - return; - default: - assert(0); - return; - } - } - } -} - -/** - * Validate the resources used by a program versus the implementation limits - */ -static void -check_resources(struct gl_context *ctx, struct gl_shader_program *prog) -{ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *sh = prog->_LinkedShaders[i]; - - if (sh == NULL) - continue; - - if (sh->num_samplers > ctx->Const.Program[i].MaxTextureImageUnits) { - linker_error(prog, "Too many %s shader texture samplers\n", - _mesa_shader_stage_to_string(i)); - } - - if (sh->num_uniform_components > - ctx->Const.Program[i].MaxUniformComponents) { - if (ctx->Const.GLSLSkipStrictMaxUniformLimitCheck) { - linker_warning(prog, "Too many %s shader default uniform block " - "components, but the driver will try to optimize " - "them out; this is non-portable out-of-spec " - "behavior\n", - _mesa_shader_stage_to_string(i)); - } else { - linker_error(prog, "Too many %s shader default uniform block " - "components\n", - _mesa_shader_stage_to_string(i)); - } - } - - if (sh->num_combined_uniform_components > - ctx->Const.Program[i].MaxCombinedUniformComponents) { - if (ctx->Const.GLSLSkipStrictMaxUniformLimitCheck) { - linker_warning(prog, "Too many %s shader uniform components, " - "but the driver will try to optimize them out; " - "this is non-portable out-of-spec behavior\n", - _mesa_shader_stage_to_string(i)); - } else { - linker_error(prog, "Too many %s shader uniform components\n", - _mesa_shader_stage_to_string(i)); - } - } - } - - unsigned blocks[MESA_SHADER_STAGES] = {0}; - unsigned total_uniform_blocks = 0; - unsigned shader_blocks[MESA_SHADER_STAGES] = {0}; - unsigned total_shader_storage_blocks = 0; - - for (unsigned i = 0; i < prog->NumBufferInterfaceBlocks; i++) { - /* Don't check SSBOs for Uniform Block Size */ - if (!prog->BufferInterfaceBlocks[i].IsShaderStorage && - prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxUniformBlockSize) { - linker_error(prog, "Uniform block %s too big (%d/%d)\n", - prog->BufferInterfaceBlocks[i].Name, - prog->BufferInterfaceBlocks[i].UniformBufferSize, - ctx->Const.MaxUniformBlockSize); - } - - if (prog->BufferInterfaceBlocks[i].IsShaderStorage && - prog->BufferInterfaceBlocks[i].UniformBufferSize > ctx->Const.MaxShaderStorageBlockSize) { - linker_error(prog, "Shader storage block %s too big (%d/%d)\n", - prog->BufferInterfaceBlocks[i].Name, - prog->BufferInterfaceBlocks[i].UniformBufferSize, - ctx->Const.MaxShaderStorageBlockSize); - } - - for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { - if (prog->InterfaceBlockStageIndex[j][i] != -1) { - struct gl_shader *sh = prog->_LinkedShaders[j]; - int stage_index = prog->InterfaceBlockStageIndex[j][i]; - if (sh && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) { - shader_blocks[j]++; - total_shader_storage_blocks++; - } else { - blocks[j]++; - total_uniform_blocks++; - } - } - } - - if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) { - linker_error(prog, "Too many combined uniform blocks (%d/%d)\n", - total_uniform_blocks, - ctx->Const.MaxCombinedUniformBlocks); - } else { - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - const unsigned max_uniform_blocks = - ctx->Const.Program[i].MaxUniformBlocks; - if (blocks[i] > max_uniform_blocks) { - linker_error(prog, "Too many %s uniform blocks (%d/%d)\n", - _mesa_shader_stage_to_string(i), - blocks[i], - max_uniform_blocks); - break; - } - } - } - - if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) { - linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n", - total_shader_storage_blocks, - ctx->Const.MaxCombinedShaderStorageBlocks); - } else { - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - const unsigned max_shader_storage_blocks = - ctx->Const.Program[i].MaxShaderStorageBlocks; - if (shader_blocks[i] > max_shader_storage_blocks) { - linker_error(prog, "Too many %s shader storage blocks (%d/%d)\n", - _mesa_shader_stage_to_string(i), - shader_blocks[i], - max_shader_storage_blocks); - break; - } - } - } - } -} - -static void -link_calculate_subroutine_compat(struct gl_shader_program *prog) -{ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *sh = prog->_LinkedShaders[i]; - int count; - if (!sh) - continue; - - for (unsigned j = 0; j < sh->NumSubroutineUniformRemapTable; j++) { - struct gl_uniform_storage *uni = sh->SubroutineUniformRemapTable[j]; - - if (!uni) - continue; - - count = 0; - for (unsigned f = 0; f < sh->NumSubroutineFunctions; f++) { - struct gl_subroutine_function *fn = &sh->SubroutineFunctions[f]; - for (int k = 0; k < fn->num_compat_types; k++) { - if (fn->types[k] == uni->type) { - count++; - break; - } - } - } - uni->num_compatible_subroutines = count; - } - } -} - -static void -check_subroutine_resources(struct gl_shader_program *prog) -{ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *sh = prog->_LinkedShaders[i]; - - if (sh) { - if (sh->NumSubroutineUniformRemapTable > MAX_SUBROUTINE_UNIFORM_LOCATIONS) - linker_error(prog, "Too many %s shader subroutine uniforms\n", - _mesa_shader_stage_to_string(i)); - } - } -} -/** - * Validate shader image resources. - */ -static void -check_image_resources(struct gl_context *ctx, struct gl_shader_program *prog) -{ - unsigned total_image_units = 0; - unsigned fragment_outputs = 0; - unsigned total_shader_storage_blocks = 0; - - if (!ctx->Extensions.ARB_shader_image_load_store) - return; - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *sh = prog->_LinkedShaders[i]; - - if (sh) { - if (sh->NumImages > ctx->Const.Program[i].MaxImageUniforms) - linker_error(prog, "Too many %s shader image uniforms (%u > %u)\n", - _mesa_shader_stage_to_string(i), sh->NumImages, - ctx->Const.Program[i].MaxImageUniforms); - - total_image_units += sh->NumImages; - - for (unsigned j = 0; j < prog->NumBufferInterfaceBlocks; j++) { - int stage_index = prog->InterfaceBlockStageIndex[i][j]; - if (stage_index != -1 && sh->BufferInterfaceBlocks[stage_index].IsShaderStorage) - total_shader_storage_blocks++; - } - - if (i == MESA_SHADER_FRAGMENT) { - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); - if (var && var->data.mode == ir_var_shader_out) - /* since there are no double fs outputs - pass false */ - fragment_outputs += var->type->count_attribute_slots(false); - } - } - } - } - - if (total_image_units > ctx->Const.MaxCombinedImageUniforms) - linker_error(prog, "Too many combined image uniforms\n"); - - if (total_image_units + fragment_outputs + total_shader_storage_blocks > - ctx->Const.MaxCombinedShaderOutputResources) - linker_error(prog, "Too many combined image uniforms, shader storage " - " buffers and fragment outputs\n"); -} - - -/** - * Initializes explicit location slots to INACTIVE_UNIFORM_EXPLICIT_LOCATION - * for a variable, checks for overlaps between other uniforms using explicit - * locations. - */ -static bool -reserve_explicit_locations(struct gl_shader_program *prog, - string_to_uint_map *map, ir_variable *var) -{ - unsigned slots = var->type->uniform_locations(); - unsigned max_loc = var->data.location + slots - 1; - - /* Resize remap table if locations do not fit in the current one. */ - if (max_loc + 1 > prog->NumUniformRemapTable) { - prog->UniformRemapTable = - reralloc(prog, prog->UniformRemapTable, - gl_uniform_storage *, - max_loc + 1); - - if (!prog->UniformRemapTable) { - linker_error(prog, "Out of memory during linking.\n"); - return false; - } - - /* Initialize allocated space. */ - for (unsigned i = prog->NumUniformRemapTable; i < max_loc + 1; i++) - prog->UniformRemapTable[i] = NULL; - - prog->NumUniformRemapTable = max_loc + 1; - } - - for (unsigned i = 0; i < slots; i++) { - unsigned loc = var->data.location + i; - - /* Check if location is already used. */ - if (prog->UniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) { - - /* Possibly same uniform from a different stage, this is ok. */ - unsigned hash_loc; - if (map->get(hash_loc, var->name) && hash_loc == loc - i) - continue; - - /* ARB_explicit_uniform_location specification states: - * - * "No two default-block uniform variables in the program can have - * the same location, even if they are unused, otherwise a compiler - * or linker error will be generated." - */ - linker_error(prog, - "location qualifier for uniform %s overlaps " - "previously used location\n", - var->name); - return false; - } - - /* Initialize location as inactive before optimization - * rounds and location assignment. - */ - prog->UniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION; - } - - /* Note, base location used for arrays. */ - map->put(var->data.location, var->name); - - return true; -} - -static bool -reserve_subroutine_explicit_locations(struct gl_shader_program *prog, - struct gl_shader *sh, - ir_variable *var) -{ - unsigned slots = var->type->uniform_locations(); - unsigned max_loc = var->data.location + slots - 1; - - /* Resize remap table if locations do not fit in the current one. */ - if (max_loc + 1 > sh->NumSubroutineUniformRemapTable) { - sh->SubroutineUniformRemapTable = - reralloc(sh, sh->SubroutineUniformRemapTable, - gl_uniform_storage *, - max_loc + 1); - - if (!sh->SubroutineUniformRemapTable) { - linker_error(prog, "Out of memory during linking.\n"); - return false; - } - - /* Initialize allocated space. */ - for (unsigned i = sh->NumSubroutineUniformRemapTable; i < max_loc + 1; i++) - sh->SubroutineUniformRemapTable[i] = NULL; - - sh->NumSubroutineUniformRemapTable = max_loc + 1; - } - - for (unsigned i = 0; i < slots; i++) { - unsigned loc = var->data.location + i; - - /* Check if location is already used. */ - if (sh->SubroutineUniformRemapTable[loc] == INACTIVE_UNIFORM_EXPLICIT_LOCATION) { - - /* ARB_explicit_uniform_location specification states: - * "No two subroutine uniform variables can have the same location - * in the same shader stage, otherwise a compiler or linker error - * will be generated." - */ - linker_error(prog, - "location qualifier for uniform %s overlaps " - "previously used location\n", - var->name); - return false; - } - - /* Initialize location as inactive before optimization - * rounds and location assignment. - */ - sh->SubroutineUniformRemapTable[loc] = INACTIVE_UNIFORM_EXPLICIT_LOCATION; - } - - return true; -} -/** - * Check and reserve all explicit uniform locations, called before - * any optimizations happen to handle also inactive uniforms and - * inactive array elements that may get trimmed away. - */ -static void -check_explicit_uniform_locations(struct gl_context *ctx, - struct gl_shader_program *prog) -{ - if (!ctx->Extensions.ARB_explicit_uniform_location) - return; - - /* This map is used to detect if overlapping explicit locations - * occur with the same uniform (from different stage) or a different one. - */ - string_to_uint_map *uniform_map = new string_to_uint_map; - - if (!uniform_map) { - linker_error(prog, "Out of memory during linking.\n"); - return; - } - - unsigned entries_total = 0; - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *sh = prog->_LinkedShaders[i]; - - if (!sh) - continue; - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); - if (!var || var->data.mode != ir_var_uniform) - continue; - - entries_total += var->type->uniform_locations(); - - if (var->data.explicit_location) { - bool ret; - if (var->type->without_array()->is_subroutine()) - ret = reserve_subroutine_explicit_locations(prog, sh, var); - else - ret = reserve_explicit_locations(prog, uniform_map, var); - if (!ret) { - delete uniform_map; - return; - } - } - } - } - - /* Verify that total amount of entries for explicit and implicit locations - * is less than MAX_UNIFORM_LOCATIONS. - */ - if (entries_total >= ctx->Const.MaxUserAssignableUniformLocations) { - linker_error(prog, "count of uniform locations >= MAX_UNIFORM_LOCATIONS" - "(%u >= %u)", entries_total, - ctx->Const.MaxUserAssignableUniformLocations); - } - delete uniform_map; -} - -static bool -should_add_buffer_variable(struct gl_shader_program *shProg, - GLenum type, const char *name) -{ - bool found_interface = false; - unsigned block_name_len = 0; - const char *block_name_dot = strchr(name, '.'); - - /* These rules only apply to buffer variables. So we return - * true for the rest of types. - */ - if (type != GL_BUFFER_VARIABLE) - return true; - - for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - const char *block_name = shProg->BufferInterfaceBlocks[i].Name; - block_name_len = strlen(block_name); - - const char *block_square_bracket = strchr(block_name, '['); - if (block_square_bracket) { - /* The block is part of an array of named interfaces, - * for the name comparison we ignore the "[x]" part. - */ - block_name_len -= strlen(block_square_bracket); - } - - if (block_name_dot) { - /* Check if the variable name starts with the interface - * name. The interface name (if present) should have the - * length than the interface block name we are comparing to. - */ - unsigned len = strlen(name) - strlen(block_name_dot); - if (len != block_name_len) - continue; - } - - if (strncmp(block_name, name, block_name_len) == 0) { - found_interface = true; - break; - } - } - - /* We remove the interface name from the buffer variable name, - * including the dot that follows it. - */ - if (found_interface) - name = name + block_name_len + 1; - - /* From: ARB_program_interface_query extension: - * - * "For an active shader storage block member declared as an array, an - * entry will be generated only for the first array element, regardless - * of its type. For arrays of aggregate types, the enumeration rules are - * applied recursively for the single enumerated array element. - */ - const char *struct_first_dot = strchr(name, '.'); - const char *first_square_bracket = strchr(name, '['); - - /* The buffer variable is on top level and it is not an array */ - if (!first_square_bracket) { - return true; - /* The shader storage block member is a struct, then generate the entry */ - } else if (struct_first_dot && struct_first_dot < first_square_bracket) { - return true; - } else { - /* Shader storage block member is an array, only generate an entry for the - * first array element. - */ - if (strncmp(first_square_bracket, "[0]", 3) == 0) - return true; - } - - return false; -} - -static bool -add_program_resource(struct gl_shader_program *prog, GLenum type, - const void *data, uint8_t stages) -{ - assert(data); - - /* If resource already exists, do not add it again. */ - for (unsigned i = 0; i < prog->NumProgramResourceList; i++) - if (prog->ProgramResourceList[i].Data == data) - return true; - - prog->ProgramResourceList = - reralloc(prog, - prog->ProgramResourceList, - gl_program_resource, - prog->NumProgramResourceList + 1); - - if (!prog->ProgramResourceList) { - linker_error(prog, "Out of memory during linking.\n"); - return false; - } - - struct gl_program_resource *res = - &prog->ProgramResourceList[prog->NumProgramResourceList]; - - res->Type = type; - res->Data = data; - res->StageReferences = stages; - - prog->NumProgramResourceList++; - - return true; -} - -/* Function checks if a variable var is a packed varying and - * if given name is part of packed varying's list. - * - * If a variable is a packed varying, it has a name like - * 'packed:a,b,c' where a, b and c are separate variables. - */ -static bool -included_in_packed_varying(ir_variable *var, const char *name) -{ - if (strncmp(var->name, "packed:", 7) != 0) - return false; - - char *list = strdup(var->name + 7); - assert(list); - - bool found = false; - char *saveptr; - char *token = strtok_r(list, ",", &saveptr); - while (token) { - if (strcmp(token, name) == 0) { - found = true; - break; - } - token = strtok_r(NULL, ",", &saveptr); - } - free(list); - return found; -} - -/** - * Function builds a stage reference bitmask from variable name. - */ -static uint8_t -build_stageref(struct gl_shader_program *shProg, const char *name, - unsigned mode) -{ - uint8_t stages = 0; - - /* Note, that we assume MAX 8 stages, if there will be more stages, type - * used for reference mask in gl_program_resource will need to be changed. - */ - assert(MESA_SHADER_STAGES < 8); - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *sh = shProg->_LinkedShaders[i]; - if (!sh) - continue; - - /* Shader symbol table may contain variables that have - * been optimized away. Search IR for the variable instead. - */ - foreach_in_list(ir_instruction, node, sh->ir) { - ir_variable *var = node->as_variable(); - if (var) { - unsigned baselen = strlen(var->name); - - if (included_in_packed_varying(var, name)) { - stages |= (1 << i); - break; - } - - /* Type needs to match if specified, otherwise we might - * pick a variable with same name but different interface. - */ - if (var->data.mode != mode) - continue; - - if (strncmp(var->name, name, baselen) == 0) { - /* Check for exact name matches but also check for arrays and - * structs. - */ - if (name[baselen] == '\0' || - name[baselen] == '[' || - name[baselen] == '.') { - stages |= (1 << i); - break; - } - } - } - } - } - return stages; -} - -/** - * Create gl_shader_variable from ir_variable class. - */ -static gl_shader_variable * -create_shader_variable(struct gl_shader_program *shProg, const ir_variable *in) -{ - gl_shader_variable *out = ralloc(shProg, struct gl_shader_variable); - if (!out) - return NULL; - - out->type = in->type; - out->name = ralloc_strdup(shProg, in->name); - - if (!out->name) - return NULL; - - out->location = in->data.location; - out->index = in->data.index; - out->patch = in->data.patch; - out->mode = in->data.mode; - - return out; -} - -static bool -add_interface_variables(struct gl_shader_program *shProg, - exec_list *ir, GLenum programInterface) -{ - foreach_in_list(ir_instruction, node, ir) { - ir_variable *var = node->as_variable(); - uint8_t mask = 0; - - if (!var) - continue; - - switch (var->data.mode) { - /* From GL 4.3 core spec, section 11.1.1 (Vertex Attributes): - * "For GetActiveAttrib, all active vertex shader input variables - * are enumerated, including the special built-in inputs gl_VertexID - * and gl_InstanceID." - */ - case ir_var_system_value: - if (var->data.location != SYSTEM_VALUE_VERTEX_ID && - var->data.location != SYSTEM_VALUE_VERTEX_ID_ZERO_BASE && - var->data.location != SYSTEM_VALUE_INSTANCE_ID) - continue; - /* Mark special built-in inputs referenced by the vertex stage so - * that they are considered active by the shader queries. - */ - mask = (1 << (MESA_SHADER_VERTEX)); - /* FALLTHROUGH */ - case ir_var_shader_in: - if (programInterface != GL_PROGRAM_INPUT) - continue; - break; - case ir_var_shader_out: - if (programInterface != GL_PROGRAM_OUTPUT) - continue; - break; - default: - continue; - }; - - /* Skip packed varyings, packed varyings are handled separately - * by add_packed_varyings. - */ - if (strncmp(var->name, "packed:", 7) == 0) - continue; - - /* Skip fragdata arrays, these are handled separately - * by add_fragdata_arrays. - */ - if (strncmp(var->name, "gl_out_FragData", 15) == 0) - continue; - - gl_shader_variable *sha_v = create_shader_variable(shProg, var); - if (!sha_v) - return false; - - if (!add_program_resource(shProg, programInterface, sha_v, - build_stageref(shProg, sha_v->name, - sha_v->mode) | mask)) - return false; - } - return true; -} - -static bool -add_packed_varyings(struct gl_shader_program *shProg, int stage, GLenum type) -{ - struct gl_shader *sh = shProg->_LinkedShaders[stage]; - GLenum iface; - - if (!sh || !sh->packed_varyings) - return true; - - foreach_in_list(ir_instruction, node, sh->packed_varyings) { - ir_variable *var = node->as_variable(); - if (var) { - switch (var->data.mode) { - case ir_var_shader_in: - iface = GL_PROGRAM_INPUT; - break; - case ir_var_shader_out: - iface = GL_PROGRAM_OUTPUT; - break; - default: - unreachable("unexpected type"); - } - - if (type == iface) { - gl_shader_variable *sha_v = create_shader_variable(shProg, var); - if (!sha_v) - return false; - if (!add_program_resource(shProg, iface, sha_v, - build_stageref(shProg, sha_v->name, - sha_v->mode))) - return false; - } - } - } - return true; -} - -static bool -add_fragdata_arrays(struct gl_shader_program *shProg) -{ - struct gl_shader *sh = shProg->_LinkedShaders[MESA_SHADER_FRAGMENT]; - - if (!sh || !sh->fragdata_arrays) - return true; - - foreach_in_list(ir_instruction, node, sh->fragdata_arrays) { - ir_variable *var = node->as_variable(); - if (var) { - assert(var->data.mode == ir_var_shader_out); - gl_shader_variable *sha_v = create_shader_variable(shProg, var); - if (!sha_v) - return false; - if (!add_program_resource(shProg, GL_PROGRAM_OUTPUT, sha_v, - 1 << MESA_SHADER_FRAGMENT)) - return false; - } - } - return true; -} - -static char* -get_top_level_name(const char *name) -{ - const char *first_dot = strchr(name, '.'); - const char *first_square_bracket = strchr(name, '['); - int name_size = 0; - /* From ARB_program_interface_query spec: - * - * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer identifying the - * number of active array elements of the top-level shader storage block - * member containing to the active variable is written to . If the - * top-level block member is not declared as an array, the value one is - * written to . If the top-level block member is an array with no - * declared size, the value zero is written to . - */ - - /* The buffer variable is on top level.*/ - if (!first_square_bracket && !first_dot) - name_size = strlen(name); - else if ((!first_square_bracket || - (first_dot && first_dot < first_square_bracket))) - name_size = first_dot - name; - else - name_size = first_square_bracket - name; - - return strndup(name, name_size); -} - -static char* -get_var_name(const char *name) -{ - const char *first_dot = strchr(name, '.'); - - if (!first_dot) - return strdup(name); - - return strndup(first_dot+1, strlen(first_dot) - 1); -} - -static bool -is_top_level_shader_storage_block_member(const char* name, - const char* interface_name, - const char* field_name) -{ - bool result = false; - - /* If the given variable is already a top-level shader storage - * block member, then return array_size = 1. - * We could have two possibilities: if we have an instanced - * shader storage block or not instanced. - * - * For the first, we check create a name as it was in top level and - * compare it with the real name. If they are the same, then - * the variable is already at top-level. - * - * Full instanced name is: interface name + '.' + var name + - * NULL character - */ - int name_length = strlen(interface_name) + 1 + strlen(field_name) + 1; - char *full_instanced_name = (char *) calloc(name_length, sizeof(char)); - if (!full_instanced_name) { - fprintf(stderr, "%s: Cannot allocate space for name\n", __func__); - return false; - } - - snprintf(full_instanced_name, name_length, "%s.%s", - interface_name, field_name); - - /* Check if its top-level shader storage block member of an - * instanced interface block, or of a unnamed interface block. - */ - if (strcmp(name, full_instanced_name) == 0 || - strcmp(name, field_name) == 0) - result = true; - - free(full_instanced_name); - return result; -} - -static int -get_array_size(struct gl_uniform_storage *uni, const glsl_struct_field *field, - char *interface_name, char *var_name) -{ - /* From GL_ARB_program_interface_query spec: - * - * "For the property TOP_LEVEL_ARRAY_SIZE, a single integer - * identifying the number of active array elements of the top-level - * shader storage block member containing to the active variable is - * written to . If the top-level block member is not - * declared as an array, the value one is written to . If - * the top-level block member is an array with no declared size, - * the value zero is written to . - */ - if (is_top_level_shader_storage_block_member(uni->name, - interface_name, - var_name)) - return 1; - else if (field->type->is_unsized_array()) - return 0; - else if (field->type->is_array()) - return field->type->length; - - return 1; -} - -static int -get_array_stride(struct gl_uniform_storage *uni, const glsl_type *interface, - const glsl_struct_field *field, char *interface_name, - char *var_name) -{ - /* From GL_ARB_program_interface_query: - * - * "For the property TOP_LEVEL_ARRAY_STRIDE, a single integer - * identifying the stride between array elements of the top-level - * shader storage block member containing the active variable is - * written to . For top-level block members declared as - * arrays, the value written is the difference, in basic machine - * units, between the offsets of the active variable for - * consecutive elements in the top-level array. For top-level - * block members not declared as an array, zero is written to - * ." - */ - if (field->type->is_array()) { - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(field->matrix_layout); - bool row_major = matrix_layout == GLSL_MATRIX_LAYOUT_ROW_MAJOR; - const glsl_type *array_type = field->type->fields.array; - - if (is_top_level_shader_storage_block_member(uni->name, - interface_name, - var_name)) - return 0; - - if (interface->interface_packing != GLSL_INTERFACE_PACKING_STD430) { - if (array_type->is_record() || array_type->is_array()) - return glsl_align(array_type->std140_size(row_major), 16); - else - return MAX2(array_type->std140_base_alignment(row_major), 16); - } else { - return array_type->std430_array_stride(row_major); - } - } - return 0; -} - -static void -calculate_array_size_and_stride(struct gl_shader_program *shProg, - struct gl_uniform_storage *uni) -{ - int block_index = uni->block_index; - int array_size = -1; - int array_stride = -1; - char *var_name = get_top_level_name(uni->name); - char *interface_name = - get_top_level_name(shProg->BufferInterfaceBlocks[block_index].Name); - - if (strcmp(var_name, interface_name) == 0) { - /* Deal with instanced array of SSBOs */ - char *temp_name = get_var_name(uni->name); - if (!temp_name) { - linker_error(shProg, "Out of memory during linking.\n"); - goto write_top_level_array_size_and_stride; - } - free(var_name); - var_name = get_top_level_name(temp_name); - free(temp_name); - if (!var_name) { - linker_error(shProg, "Out of memory during linking.\n"); - goto write_top_level_array_size_and_stride; - } - } - - for (unsigned i = 0; i < shProg->NumShaders; i++) { - if (shProg->Shaders[i] == NULL) - continue; - - const gl_shader *stage = shProg->Shaders[i]; - foreach_in_list(ir_instruction, node, stage->ir) { - ir_variable *var = node->as_variable(); - if (!var || !var->get_interface_type() || - var->data.mode != ir_var_shader_storage) - continue; - - const glsl_type *interface = var->get_interface_type(); - - if (strcmp(interface_name, interface->name) != 0) - continue; - - for (unsigned i = 0; i < interface->length; i++) { - const glsl_struct_field *field = &interface->fields.structure[i]; - if (strcmp(field->name, var_name) != 0) - continue; - - array_stride = get_array_stride(uni, interface, field, - interface_name, var_name); - array_size = get_array_size(uni, field, interface_name, var_name); - goto write_top_level_array_size_and_stride; - } - } - } -write_top_level_array_size_and_stride: - free(interface_name); - free(var_name); - uni->top_level_array_stride = array_stride; - uni->top_level_array_size = array_size; -} - -/** - * Builds up a list of program resources that point to existing - * resource data. - */ -void -build_program_resource_list(struct gl_shader_program *shProg) -{ - /* Rebuild resource list. */ - if (shProg->ProgramResourceList) { - ralloc_free(shProg->ProgramResourceList); - shProg->ProgramResourceList = NULL; - shProg->NumProgramResourceList = 0; - } - - int input_stage = MESA_SHADER_STAGES, output_stage = 0; - - /* Determine first input and final output stage. These are used to - * detect which variables should be enumerated in the resource list - * for GL_PROGRAM_INPUT and GL_PROGRAM_OUTPUT. - */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (!shProg->_LinkedShaders[i]) - continue; - if (input_stage == MESA_SHADER_STAGES) - input_stage = i; - output_stage = i; - } - - /* Empty shader, no resources. */ - if (input_stage == MESA_SHADER_STAGES && output_stage == 0) - return; - - /* Program interface needs to expose varyings in case of SSO. */ - if (shProg->SeparateShader) { - if (!add_packed_varyings(shProg, input_stage, GL_PROGRAM_INPUT)) - return; - - if (!add_packed_varyings(shProg, output_stage, GL_PROGRAM_OUTPUT)) - return; - } - - if (!add_fragdata_arrays(shProg)) - return; - - /* Add inputs and outputs to the resource list. */ - if (!add_interface_variables(shProg, shProg->_LinkedShaders[input_stage]->ir, - GL_PROGRAM_INPUT)) - return; - - if (!add_interface_variables(shProg, shProg->_LinkedShaders[output_stage]->ir, - GL_PROGRAM_OUTPUT)) - return; - - /* Add transform feedback varyings. */ - if (shProg->LinkedTransformFeedback.NumVarying > 0) { - for (int i = 0; i < shProg->LinkedTransformFeedback.NumVarying; i++) { - if (!add_program_resource(shProg, GL_TRANSFORM_FEEDBACK_VARYING, - &shProg->LinkedTransformFeedback.Varyings[i], - 0)) - return; - } - } - - /* Add uniforms from uniform storage. */ - for (unsigned i = 0; i < shProg->NumUniformStorage; i++) { - /* Do not add uniforms internally used by Mesa. */ - if (shProg->UniformStorage[i].hidden) - continue; - - uint8_t stageref = - build_stageref(shProg, shProg->UniformStorage[i].name, - ir_var_uniform); - - /* Add stagereferences for uniforms in a uniform block. */ - int block_index = shProg->UniformStorage[i].block_index; - if (block_index != -1) { - for (unsigned j = 0; j < MESA_SHADER_STAGES; j++) { - if (shProg->InterfaceBlockStageIndex[j][block_index] != -1) - stageref |= (1 << j); - } - } - - bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage; - GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM; - if (!should_add_buffer_variable(shProg, type, - shProg->UniformStorage[i].name)) - continue; - - if (is_shader_storage) { - calculate_array_size_and_stride(shProg, &shProg->UniformStorage[i]); - } - - if (!add_program_resource(shProg, type, - &shProg->UniformStorage[i], stageref)) - return; - } - - /* Add program uniform blocks and shader storage blocks. */ - for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { - bool is_shader_storage = shProg->BufferInterfaceBlocks[i].IsShaderStorage; - GLenum type = is_shader_storage ? GL_SHADER_STORAGE_BLOCK : GL_UNIFORM_BLOCK; - if (!add_program_resource(shProg, type, - &shProg->BufferInterfaceBlocks[i], 0)) - return; - } - - /* Add atomic counter buffers. */ - for (unsigned i = 0; i < shProg->NumAtomicBuffers; i++) { - if (!add_program_resource(shProg, GL_ATOMIC_COUNTER_BUFFER, - &shProg->AtomicBuffers[i], 0)) - return; - } - - for (unsigned i = 0; i < shProg->NumUniformStorage; i++) { - GLenum type; - if (!shProg->UniformStorage[i].hidden) - continue; - - for (int j = MESA_SHADER_VERTEX; j < MESA_SHADER_STAGES; j++) { - if (!shProg->UniformStorage[i].opaque[j].active || - !shProg->UniformStorage[i].type->is_subroutine()) - continue; - - type = _mesa_shader_stage_to_subroutine_uniform((gl_shader_stage)j); - /* add shader subroutines */ - if (!add_program_resource(shProg, type, &shProg->UniformStorage[i], 0)) - return; - } - } - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - struct gl_shader *sh = shProg->_LinkedShaders[i]; - GLuint type; - - if (!sh) - continue; - - type = _mesa_shader_stage_to_subroutine((gl_shader_stage)i); - for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { - if (!add_program_resource(shProg, type, &sh->SubroutineFunctions[j], 0)) - return; - } - } -} - -/** - * This check is done to make sure we allow only constant expression - * indexing and "constant-index-expression" (indexing with an expression - * that includes loop induction variable). - */ -static bool -validate_sampler_array_indexing(struct gl_context *ctx, - struct gl_shader_program *prog) -{ - dynamic_sampler_array_indexing_visitor v; - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - bool no_dynamic_indexing = - ctx->Const.ShaderCompilerOptions[i].EmitNoIndirectSampler; - - /* Search for array derefs in shader. */ - v.run(prog->_LinkedShaders[i]->ir); - if (v.uses_dynamic_sampler_array_indexing()) { - const char *msg = "sampler arrays indexed with non-constant " - "expressions is forbidden in GLSL %s %u"; - /* Backend has indicated that it has no dynamic indexing support. */ - if (no_dynamic_indexing) { - linker_error(prog, msg, prog->IsES ? "ES" : "", prog->Version); - return false; - } else { - linker_warning(prog, msg, prog->IsES ? "ES" : "", prog->Version); - } - } - } - return true; -} - -static void -link_assign_subroutine_types(struct gl_shader_program *prog) -{ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - gl_shader *sh = prog->_LinkedShaders[i]; - - if (sh == NULL) - continue; - - foreach_in_list(ir_instruction, node, sh->ir) { - ir_function *fn = node->as_function(); - if (!fn) - continue; - - if (fn->is_subroutine) - sh->NumSubroutineUniformTypes++; - - if (!fn->num_subroutine_types) - continue; - - sh->SubroutineFunctions = reralloc(sh, sh->SubroutineFunctions, - struct gl_subroutine_function, - sh->NumSubroutineFunctions + 1); - sh->SubroutineFunctions[sh->NumSubroutineFunctions].name = ralloc_strdup(sh, fn->name); - sh->SubroutineFunctions[sh->NumSubroutineFunctions].num_compat_types = fn->num_subroutine_types; - sh->SubroutineFunctions[sh->NumSubroutineFunctions].types = - ralloc_array(sh, const struct glsl_type *, - fn->num_subroutine_types); - - /* From Section 4.4.4(Subroutine Function Layout Qualifiers) of the - * GLSL 4.5 spec: - * - * "Each subroutine with an index qualifier in the shader must be - * given a unique index, otherwise a compile or link error will be - * generated." - */ - for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { - if (sh->SubroutineFunctions[j].index != -1 && - sh->SubroutineFunctions[j].index == fn->subroutine_index) { - linker_error(prog, "each subroutine index qualifier in the " - "shader must be unique\n"); - return; - } - } - sh->SubroutineFunctions[sh->NumSubroutineFunctions].index = - fn->subroutine_index; - - for (int j = 0; j < fn->num_subroutine_types; j++) - sh->SubroutineFunctions[sh->NumSubroutineFunctions].types[j] = fn->subroutine_types[j]; - sh->NumSubroutineFunctions++; - } - - /* Assign index for subroutines without an explicit index*/ - int index = 0; - for (unsigned j = 0; j < sh->NumSubroutineFunctions; j++) { - while (sh->SubroutineFunctions[j].index == -1) { - for (unsigned k = 0; k < sh->NumSubroutineFunctions; k++) { - if (sh->SubroutineFunctions[k].index == index) - break; - else if (k == sh->NumSubroutineFunctions - 1) - sh->SubroutineFunctions[j].index = index; - } - index++; - } - } - } -} - -static void -split_ubos_and_ssbos(void *mem_ctx, - struct gl_uniform_block *blocks, - unsigned num_blocks, - struct gl_uniform_block ***ubos, - unsigned *num_ubos, - unsigned **ubo_interface_block_indices, - struct gl_uniform_block ***ssbos, - unsigned *num_ssbos, - unsigned **ssbo_interface_block_indices) -{ - unsigned num_ubo_blocks = 0; - unsigned num_ssbo_blocks = 0; - - for (unsigned i = 0; i < num_blocks; i++) { - if (blocks[i].IsShaderStorage) - num_ssbo_blocks++; - else - num_ubo_blocks++; - } - - *ubos = ralloc_array(mem_ctx, gl_uniform_block *, num_ubo_blocks); - *num_ubos = 0; - - *ssbos = ralloc_array(mem_ctx, gl_uniform_block *, num_ssbo_blocks); - *num_ssbos = 0; - - if (ubo_interface_block_indices) - *ubo_interface_block_indices = - ralloc_array(mem_ctx, unsigned, num_ubo_blocks); - - if (ssbo_interface_block_indices) - *ssbo_interface_block_indices = - ralloc_array(mem_ctx, unsigned, num_ssbo_blocks); - - for (unsigned i = 0; i < num_blocks; i++) { - if (blocks[i].IsShaderStorage) { - (*ssbos)[*num_ssbos] = &blocks[i]; - if (ssbo_interface_block_indices) - (*ssbo_interface_block_indices)[*num_ssbos] = i; - (*num_ssbos)++; - } else { - (*ubos)[*num_ubos] = &blocks[i]; - if (ubo_interface_block_indices) - (*ubo_interface_block_indices)[*num_ubos] = i; - (*num_ubos)++; - } - } - - assert(*num_ubos + *num_ssbos == num_blocks); -} - -static void -set_always_active_io(exec_list *ir, ir_variable_mode io_mode) -{ - assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); - - foreach_in_list(ir_instruction, node, ir) { - ir_variable *const var = node->as_variable(); - - if (var == NULL || var->data.mode != io_mode) - continue; - - /* Don't set always active on builtins that haven't been redeclared */ - if (var->data.how_declared == ir_var_declared_implicitly) - continue; - - var->data.always_active_io = true; - } -} - -/** - * When separate shader programs are enabled, only input/outputs between - * the stages of a multi-stage separate program can be safely removed - * from the shader interface. Other inputs/outputs must remain active. - */ -static void -disable_varying_optimizations_for_sso(struct gl_shader_program *prog) -{ - unsigned first, last; - assert(prog->SeparateShader); - - first = MESA_SHADER_STAGES; - last = 0; - - /* Determine first and last stage. Excluding the compute stage */ - for (unsigned i = 0; i < MESA_SHADER_COMPUTE; i++) { - if (!prog->_LinkedShaders[i]) - continue; - if (first == MESA_SHADER_STAGES) - first = i; - last = i; - } - - if (first == MESA_SHADER_STAGES) - return; - - for (unsigned stage = 0; stage < MESA_SHADER_STAGES; stage++) { - gl_shader *sh = prog->_LinkedShaders[stage]; - if (!sh) - continue; - - if (first == last) { - /* For a single shader program only allow inputs to the vertex shader - * and outputs from the fragment shader to be removed. - */ - if (stage != MESA_SHADER_VERTEX) - set_always_active_io(sh->ir, ir_var_shader_in); - if (stage != MESA_SHADER_FRAGMENT) - set_always_active_io(sh->ir, ir_var_shader_out); - } else { - /* For multi-stage separate shader programs only allow inputs and - * outputs between the shader stages to be removed as well as inputs - * to the vertex shader and outputs from the fragment shader. - */ - if (stage == first && stage != MESA_SHADER_VERTEX) - set_always_active_io(sh->ir, ir_var_shader_in); - else if (stage == last && stage != MESA_SHADER_FRAGMENT) - set_always_active_io(sh->ir, ir_var_shader_out); - } - } -} - -void -link_shaders(struct gl_context *ctx, struct gl_shader_program *prog) -{ - tfeedback_decl *tfeedback_decls = NULL; - unsigned num_tfeedback_decls = prog->TransformFeedback.NumVarying; - - void *mem_ctx = ralloc_context(NULL); // temporary linker context - - prog->LinkStatus = true; /* All error paths will set this to false */ - prog->Validated = false; - prog->_Used = false; - - prog->ARB_fragment_coord_conventions_enable = false; - - /* Separate the shaders into groups based on their type. - */ - struct gl_shader **shader_list[MESA_SHADER_STAGES]; - unsigned num_shaders[MESA_SHADER_STAGES]; - - for (int i = 0; i < MESA_SHADER_STAGES; i++) { - shader_list[i] = (struct gl_shader **) - calloc(prog->NumShaders, sizeof(struct gl_shader *)); - num_shaders[i] = 0; - } - - unsigned min_version = UINT_MAX; - unsigned max_version = 0; - const bool is_es_prog = - (prog->NumShaders > 0 && prog->Shaders[0]->IsES) ? true : false; - for (unsigned i = 0; i < prog->NumShaders; i++) { - min_version = MIN2(min_version, prog->Shaders[i]->Version); - max_version = MAX2(max_version, prog->Shaders[i]->Version); - - if (prog->Shaders[i]->IsES != is_es_prog) { - linker_error(prog, "all shaders must use same shading " - "language version\n"); - goto done; - } - - if (prog->Shaders[i]->ARB_fragment_coord_conventions_enable) { - prog->ARB_fragment_coord_conventions_enable = true; - } - - gl_shader_stage shader_type = prog->Shaders[i]->Stage; - shader_list[shader_type][num_shaders[shader_type]] = prog->Shaders[i]; - num_shaders[shader_type]++; - } - - /* In desktop GLSL, different shader versions may be linked together. In - * GLSL ES, all shader versions must be the same. - */ - if (is_es_prog && min_version != max_version) { - linker_error(prog, "all shaders must use same shading " - "language version\n"); - goto done; - } - - prog->Version = max_version; - prog->IsES = is_es_prog; - - /* From OpenGL 4.5 Core specification (7.3 Program Objects): - * "Linking can fail for a variety of reasons as specified in the OpenGL - * Shading Language Specification, as well as any of the following - * reasons: - * - * * No shader objects are attached to program. - * - * ..." - * - * Same rule applies for OpenGL ES >= 3.1. - */ - - if (prog->NumShaders == 0 && - ((ctx->API == API_OPENGL_CORE && ctx->Version >= 45) || - (ctx->API == API_OPENGLES2 && ctx->Version >= 31))) { - linker_error(prog, "No shader objects are attached to program.\n"); - goto done; - } - - /* Some shaders have to be linked with some other shaders present. - */ - if (num_shaders[MESA_SHADER_GEOMETRY] > 0 && - num_shaders[MESA_SHADER_VERTEX] == 0 && - !prog->SeparateShader) { - linker_error(prog, "Geometry shader must be linked with " - "vertex shader\n"); - goto done; - } - if (num_shaders[MESA_SHADER_TESS_EVAL] > 0 && - num_shaders[MESA_SHADER_VERTEX] == 0 && - !prog->SeparateShader) { - linker_error(prog, "Tessellation evaluation shader must be linked with " - "vertex shader\n"); - goto done; - } - if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && - num_shaders[MESA_SHADER_VERTEX] == 0 && - !prog->SeparateShader) { - linker_error(prog, "Tessellation control shader must be linked with " - "vertex shader\n"); - goto done; - } - - /* The spec is self-contradictory here. It allows linking without a tess - * eval shader, but that can only be used with transform feedback and - * rasterization disabled. However, transform feedback isn't allowed - * with GL_PATCHES, so it can't be used. - * - * More investigation showed that the idea of transform feedback after - * a tess control shader was dropped, because some hw vendors couldn't - * support tessellation without a tess eval shader, but the linker section - * wasn't updated to reflect that. - * - * All specifications (ARB_tessellation_shader, GL 4.0-4.5) have this - * spec bug. - * - * Do what's reasonable and always require a tess eval shader if a tess - * control shader is present. - */ - if (num_shaders[MESA_SHADER_TESS_CTRL] > 0 && - num_shaders[MESA_SHADER_TESS_EVAL] == 0 && - !prog->SeparateShader) { - linker_error(prog, "Tessellation control shader must be linked with " - "tessellation evaluation shader\n"); - goto done; - } - - /* Compute shaders have additional restrictions. */ - if (num_shaders[MESA_SHADER_COMPUTE] > 0 && - num_shaders[MESA_SHADER_COMPUTE] != prog->NumShaders) { - linker_error(prog, "Compute shaders may not be linked with any other " - "type of shader\n"); - } - - for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] != NULL) - _mesa_delete_shader(ctx, prog->_LinkedShaders[i]); - - prog->_LinkedShaders[i] = NULL; - } - - /* Link all shaders for a particular stage and validate the result. - */ - for (int stage = 0; stage < MESA_SHADER_STAGES; stage++) { - if (num_shaders[stage] > 0) { - gl_shader *const sh = - link_intrastage_shaders(mem_ctx, ctx, prog, shader_list[stage], - num_shaders[stage]); - - if (!prog->LinkStatus) { - if (sh) - _mesa_delete_shader(ctx, sh); - goto done; - } - - switch (stage) { - case MESA_SHADER_VERTEX: - validate_vertex_shader_executable(prog, sh); - break; - case MESA_SHADER_TESS_CTRL: - /* nothing to be done */ - break; - case MESA_SHADER_TESS_EVAL: - validate_tess_eval_shader_executable(prog, sh); - break; - case MESA_SHADER_GEOMETRY: - validate_geometry_shader_executable(prog, sh); - break; - case MESA_SHADER_FRAGMENT: - validate_fragment_shader_executable(prog, sh); - break; - } - if (!prog->LinkStatus) { - if (sh) - _mesa_delete_shader(ctx, sh); - goto done; - } - - _mesa_reference_shader(ctx, &prog->_LinkedShaders[stage], sh); - } - } - - if (num_shaders[MESA_SHADER_GEOMETRY] > 0) - prog->LastClipDistanceArraySize = prog->Geom.ClipDistanceArraySize; - else if (num_shaders[MESA_SHADER_TESS_EVAL] > 0) - prog->LastClipDistanceArraySize = prog->TessEval.ClipDistanceArraySize; - else if (num_shaders[MESA_SHADER_VERTEX] > 0) - prog->LastClipDistanceArraySize = prog->Vert.ClipDistanceArraySize; - else - prog->LastClipDistanceArraySize = 0; /* Not used */ - - /* Here begins the inter-stage linking phase. Some initial validation is - * performed, then locations are assigned for uniforms, attributes, and - * varyings. - */ - cross_validate_uniforms(prog); - if (!prog->LinkStatus) - goto done; - - unsigned first, last, prev; - - first = MESA_SHADER_STAGES; - last = 0; - - /* Determine first and last stage. */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (!prog->_LinkedShaders[i]) - continue; - if (first == MESA_SHADER_STAGES) - first = i; - last = i; - } - - check_explicit_uniform_locations(ctx, prog); - link_assign_subroutine_types(prog); - - if (!prog->LinkStatus) - goto done; - - resize_tes_inputs(ctx, prog); - - /* Validate the inputs of each stage with the output of the preceding - * stage. - */ - prev = first; - for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - validate_interstage_inout_blocks(prog, prog->_LinkedShaders[prev], - prog->_LinkedShaders[i]); - if (!prog->LinkStatus) - goto done; - - cross_validate_outputs_to_inputs(prog, - prog->_LinkedShaders[prev], - prog->_LinkedShaders[i]); - if (!prog->LinkStatus) - goto done; - - prev = i; - } - - /* Cross-validate uniform blocks between shader stages */ - validate_interstage_uniform_blocks(prog, prog->_LinkedShaders, - MESA_SHADER_STAGES); - if (!prog->LinkStatus) - goto done; - - for (unsigned int i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] != NULL) - lower_named_interface_blocks(mem_ctx, prog->_LinkedShaders[i]); - } - - /* Implement the GLSL 1.30+ rule for discard vs infinite loops Do - * it before optimization because we want most of the checks to get - * dropped thanks to constant propagation. - * - * This rule also applies to GLSL ES 3.00. - */ - if (max_version >= (is_es_prog ? 300 : 130)) { - struct gl_shader *sh = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; - if (sh) { - lower_discard_flow(sh->ir); - } - } - - if (prog->SeparateShader) - disable_varying_optimizations_for_sso(prog); - - if (!interstage_cross_validate_uniform_blocks(prog)) - goto done; - - /* Do common optimization before assigning storage for attributes, - * uniforms, and varyings. Later optimization could possibly make - * some of that unused. - */ - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - detect_recursion_linked(prog, prog->_LinkedShaders[i]->ir); - if (!prog->LinkStatus) - goto done; - - if (ctx->Const.ShaderCompilerOptions[i].LowerClipDistance) { - lower_clip_distance(prog->_LinkedShaders[i]); - } - - if (ctx->Const.LowerTessLevel) { - lower_tess_level(prog->_LinkedShaders[i]); - } - - while (do_common_optimization(prog->_LinkedShaders[i]->ir, true, false, - &ctx->Const.ShaderCompilerOptions[i], - ctx->Const.NativeIntegers)) - ; - - lower_const_arrays_to_uniforms(prog->_LinkedShaders[i]->ir); - } - - /* Validation for special cases where we allow sampler array indexing - * with loop induction variable. This check emits a warning or error - * depending if backend can handle dynamic indexing. - */ - if ((!prog->IsES && prog->Version < 130) || - (prog->IsES && prog->Version < 300)) { - if (!validate_sampler_array_indexing(ctx, prog)) - goto done; - } - - /* Check and validate stream emissions in geometry shaders */ - validate_geometry_shader_emissions(ctx, prog); - - /* Mark all generic shader inputs and outputs as unpaired. */ - for (unsigned i = MESA_SHADER_VERTEX; i <= MESA_SHADER_FRAGMENT; i++) { - if (prog->_LinkedShaders[i] != NULL) { - link_invalidate_variable_locations(prog->_LinkedShaders[i]->ir); - } - } - - prev = first; - for (unsigned i = prev + 1; i <= MESA_SHADER_FRAGMENT; i++) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - match_explicit_outputs_to_inputs(prog, prog->_LinkedShaders[prev], - prog->_LinkedShaders[i]); - prev = i; - } - - if (!assign_attribute_or_color_locations(prog, &ctx->Const, - MESA_SHADER_VERTEX)) { - goto done; - } - - if (!assign_attribute_or_color_locations(prog, &ctx->Const, - MESA_SHADER_FRAGMENT)) { - goto done; - } - - if (num_tfeedback_decls != 0) { - /* From GL_EXT_transform_feedback: - * A program will fail to link if: - * - * * the specified by TransformFeedbackVaryingsEXT is - * non-zero, but the program object has no vertex or geometry - * shader; - */ - if (first == MESA_SHADER_FRAGMENT) { - linker_error(prog, "Transform feedback varyings specified, but " - "no vertex or geometry shader is present.\n"); - goto done; - } - - tfeedback_decls = ralloc_array(mem_ctx, tfeedback_decl, - prog->TransformFeedback.NumVarying); - if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls, - prog->TransformFeedback.VaryingNames, - tfeedback_decls)) - goto done; - } - - /* Linking the stages in the opposite order (from fragment to vertex) - * ensures that inter-shader outputs written to in an earlier stage are - * eliminated if they are (transitively) not used in a later stage. - */ - int next; - - if (first < MESA_SHADER_FRAGMENT) { - gl_shader *const sh = prog->_LinkedShaders[last]; - - if (first != MESA_SHADER_VERTEX) { - /* There was no vertex shader, but we still have to assign varying - * locations for use by tessellation/geometry shader inputs in SSO. - * - * If the shader is not separable (i.e., prog->SeparateShader is - * false), linking will have already failed when first is not - * MESA_SHADER_VERTEX. - */ - if (!assign_varying_locations(ctx, mem_ctx, prog, - NULL, prog->_LinkedShaders[first], - num_tfeedback_decls, tfeedback_decls)) - goto done; - } - - if (last != MESA_SHADER_FRAGMENT && - (num_tfeedback_decls != 0 || prog->SeparateShader)) { - /* There was no fragment shader, but we still have to assign varying - * locations for use by transform feedback. - */ - if (!assign_varying_locations(ctx, mem_ctx, prog, - sh, NULL, - num_tfeedback_decls, tfeedback_decls)) - goto done; - } - - do_dead_builtin_varyings(ctx, sh, NULL, - num_tfeedback_decls, tfeedback_decls); - - remove_unused_shader_inputs_and_outputs(prog->SeparateShader, sh, - ir_var_shader_out); - } - else if (first == MESA_SHADER_FRAGMENT) { - /* If the program only contains a fragment shader... - */ - gl_shader *const sh = prog->_LinkedShaders[first]; - - do_dead_builtin_varyings(ctx, NULL, sh, - num_tfeedback_decls, tfeedback_decls); - - if (prog->SeparateShader) { - if (!assign_varying_locations(ctx, mem_ctx, prog, - NULL /* producer */, - sh /* consumer */, - 0 /* num_tfeedback_decls */, - NULL /* tfeedback_decls */)) - goto done; - } else { - remove_unused_shader_inputs_and_outputs(false, sh, - ir_var_shader_in); - } - } - - next = last; - for (int i = next - 1; i >= 0; i--) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - gl_shader *const sh_i = prog->_LinkedShaders[i]; - gl_shader *const sh_next = prog->_LinkedShaders[next]; - - if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, - next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, - tfeedback_decls)) - goto done; - - do_dead_builtin_varyings(ctx, sh_i, sh_next, - next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, - tfeedback_decls); - - /* This must be done after all dead varyings are eliminated. */ - if (!check_against_output_limit(ctx, prog, sh_i)) - goto done; - if (!check_against_input_limit(ctx, prog, sh_next)) - goto done; - - next = i; - } - - if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls)) - goto done; - - update_array_sizes(prog); - link_assign_uniform_locations(prog, ctx->Const.UniformBooleanTrue); - link_assign_atomic_counter_resources(ctx, prog); - store_fragdepth_layout(prog); - - link_calculate_subroutine_compat(prog); - check_resources(ctx, prog); - check_subroutine_resources(prog); - check_image_resources(ctx, prog); - link_check_atomic_counter_resources(ctx, prog); - - if (!prog->LinkStatus) - goto done; - - /* OpenGL ES requires that a vertex shader and a fragment shader both be - * present in a linked program. GL_ARB_ES2_compatibility doesn't say - * anything about shader linking when one of the shaders (vertex or - * fragment shader) is absent. So, the extension shouldn't change the - * behavior specified in GLSL specification. - */ - if (!prog->SeparateShader && ctx->API == API_OPENGLES2) { - /* With ES < 3.1 one needs to have always vertex + fragment shader. */ - if (ctx->Version < 31) { - if (prog->_LinkedShaders[MESA_SHADER_VERTEX] == NULL) { - linker_error(prog, "program lacks a vertex shader\n"); - } else if (prog->_LinkedShaders[MESA_SHADER_FRAGMENT] == NULL) { - linker_error(prog, "program lacks a fragment shader\n"); - } - } else { - /* From OpenGL ES 3.1 specification (7.3 Program Objects): - * "Linking can fail for a variety of reasons as specified in the - * OpenGL ES Shading Language Specification, as well as any of the - * following reasons: - * - * ... - * - * * program contains objects to form either a vertex shader or - * fragment shader, and program is not separable, and does not - * contain objects to form both a vertex shader and fragment - * shader." - */ - if (!!prog->_LinkedShaders[MESA_SHADER_VERTEX] ^ - !!prog->_LinkedShaders[MESA_SHADER_FRAGMENT]) { - linker_error(prog, "Program needs to contain both vertex and " - "fragment shaders.\n"); - } - } - } - - /* Split BufferInterfaceBlocks into UniformBlocks and ShaderStorageBlocks - * for gl_shader_program and gl_shader, so that drivers that need separate - * index spaces for each set can have that. - */ - for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] != NULL) { - gl_shader *sh = prog->_LinkedShaders[i]; - split_ubos_and_ssbos(sh, - sh->BufferInterfaceBlocks, - sh->NumBufferInterfaceBlocks, - &sh->UniformBlocks, - &sh->NumUniformBlocks, - NULL, - &sh->ShaderStorageBlocks, - &sh->NumShaderStorageBlocks, - NULL); - } - } - - split_ubos_and_ssbos(prog, - prog->BufferInterfaceBlocks, - prog->NumBufferInterfaceBlocks, - &prog->UniformBlocks, - &prog->NumUniformBlocks, - &prog->UboInterfaceBlockIndex, - &prog->ShaderStorageBlocks, - &prog->NumShaderStorageBlocks, - &prog->SsboInterfaceBlockIndex); - - /* FINISHME: Assign fragment shader output locations. */ - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - if (prog->_LinkedShaders[i] == NULL) - continue; - - if (ctx->Const.ShaderCompilerOptions[i].LowerBufferInterfaceBlocks) - lower_ubo_reference(prog->_LinkedShaders[i]); - - if (ctx->Const.ShaderCompilerOptions[i].LowerShaderSharedVariables) - lower_shared_reference(prog->_LinkedShaders[i], - &prog->Comp.SharedSize); - - lower_vector_derefs(prog->_LinkedShaders[i]); - } - -done: - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { - free(shader_list[i]); - if (prog->_LinkedShaders[i] == NULL) - continue; - - /* Do a final validation step to make sure that the IR wasn't - * invalidated by any modifications performed after intrastage linking. - */ - validate_ir_tree(prog->_LinkedShaders[i]->ir); - - /* Retain any live IR, but trash the rest. */ - reparent_ir(prog->_LinkedShaders[i]->ir, prog->_LinkedShaders[i]->ir); - - /* The symbol table in the linked shaders may contain references to - * variables that were removed (e.g., unused uniforms). Since it may - * contain junk, there is no possible valid use. Delete it and set the - * pointer to NULL. - */ - delete prog->_LinkedShaders[i]->symbols; - prog->_LinkedShaders[i]->symbols = NULL; - } - - ralloc_free(mem_ctx); -} diff --git a/src/glsl/linker.h b/src/glsl/linker.h deleted file mode 100644 index c80be1c7e22..00000000000 --- a/src/glsl/linker.h +++ /dev/null @@ -1,205 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef GLSL_LINKER_H -#define GLSL_LINKER_H - -extern bool -link_function_calls(gl_shader_program *prog, gl_shader *main, - gl_shader **shader_list, unsigned num_shaders); - -extern void -link_invalidate_variable_locations(exec_list *ir); - -extern void -link_assign_uniform_locations(struct gl_shader_program *prog, - unsigned int boolean_true); - -extern void -link_set_uniform_initializers(struct gl_shader_program *prog, - unsigned int boolean_true); - -extern int -link_cross_validate_uniform_block(void *mem_ctx, - struct gl_uniform_block **linked_blocks, - unsigned int *num_linked_blocks, - struct gl_uniform_block *new_block); - -extern bool -link_uniform_blocks_are_compatible(const gl_uniform_block *a, - const gl_uniform_block *b); - -extern unsigned -link_uniform_blocks(void *mem_ctx, - struct gl_context *ctx, - struct gl_shader_program *prog, - struct gl_shader **shader_list, - unsigned num_shaders, - struct gl_uniform_block **blocks_ret); - -bool -validate_intrastage_arrays(struct gl_shader_program *prog, - ir_variable *const var, - ir_variable *const existing); - -void -validate_intrastage_interface_blocks(struct gl_shader_program *prog, - const gl_shader **shader_list, - unsigned num_shaders); - -void -validate_interstage_inout_blocks(struct gl_shader_program *prog, - const gl_shader *producer, - const gl_shader *consumer); - -void -validate_interstage_uniform_blocks(struct gl_shader_program *prog, - gl_shader **stages, int num_stages); - -extern void -link_assign_atomic_counter_resources(struct gl_context *ctx, - struct gl_shader_program *prog); - -extern void -link_check_atomic_counter_resources(struct gl_context *ctx, - struct gl_shader_program *prog); - -/** - * Class for processing all of the leaf fields of a variable that corresponds - * to a program resource. - * - * The leaf fields are all the parts of the variable that the application - * could query using \c glGetProgramResourceIndex (or that could be returned - * by \c glGetProgramResourceName). - * - * Classes my derive from this class to implement specific functionality. - * This class only provides the mechanism to iterate over the leaves. Derived - * classes must implement \c ::visit_field and may override \c ::process. - */ -class program_resource_visitor { -public: - /** - * Begin processing a variable - * - * Classes that overload this function should call \c ::process from the - * base class to start the recursive processing of the variable. - * - * \param var The variable that is to be processed - * - * Calls \c ::visit_field for each leaf of the variable. - * - * \warning - * When processing a uniform block, this entry should only be used in cases - * where the row / column ordering of matrices in the block does not - * matter. For example, enumerating the names of members of the block, but - * not for determining the offsets of members. - */ - void process(ir_variable *var); - - /** - * Begin processing a variable of a structured type. - * - * This flavor of \c process should be used to handle structured types - * (i.e., structures, interfaces, or arrays there of) that need special - * name handling. A common usage is to handle cases where the block name - * (instead of the instance name) is used for an interface block. - * - * \param type Type that is to be processed, associated with \c name - * \param name Base name of the structured variable being processed - * - * \note - * \c type must be \c GLSL_TYPE_RECORD, \c GLSL_TYPE_INTERFACE, or an array - * there of. - */ - void process(const glsl_type *type, const char *name); - -protected: - /** - * Method invoked for each leaf of the variable - * - * \param type Type of the field. - * \param name Fully qualified name of the field. - * \param row_major For a matrix type, is it stored row-major. - * \param record_type Type of the record containing the field. - * \param last_field Set if \c name is the last field of the structure - * containing it. This will always be false for items - * not contained in a structure or interface block. - * - * The default implementation just calls the other \c visit_field method. - */ - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major, const glsl_type *record_type, - const unsigned packing, - bool last_field); - - /** - * Method invoked for each leaf of the variable - * - * \param type Type of the field. - * \param name Fully qualified name of the field. - * \param row_major For a matrix type, is it stored row-major. - */ - virtual void visit_field(const glsl_type *type, const char *name, - bool row_major) = 0; - - /** - * Visit a record before visiting its fields - * - * For structures-of-structures or interfaces-of-structures, this visits - * the inner structure before visiting its fields. - * - * The default implementation does nothing. - */ - virtual void visit_field(const glsl_struct_field *field); - - virtual void enter_record(const glsl_type *type, const char *name, - bool row_major, const unsigned packing); - - virtual void leave_record(const glsl_type *type, const char *name, - bool row_major, const unsigned packing); - - virtual void set_record_array_count(unsigned record_array_count); - -private: - /** - * \param name_length Length of the current name \b not including the - * terminating \c NUL character. - * \param last_field Set if \c name is the last field of the structure - * containing it. This will always be false for items - * not contained in a structure or interface block. - */ - void recursion(const glsl_type *t, char **name, size_t name_length, - bool row_major, const glsl_type *record_type, - const unsigned packing, - bool last_field, unsigned record_array_count); -}; - -void -linker_error(gl_shader_program *prog, const char *fmt, ...); - -void -linker_warning(gl_shader_program *prog, const char *fmt, ...); - -#endif /* GLSL_LINKER_H */ diff --git a/src/glsl/list.h b/src/glsl/list.h deleted file mode 100644 index a1c4d82b017..00000000000 --- a/src/glsl/list.h +++ /dev/null @@ -1,700 +0,0 @@ -/* - * Copyright © 2008, 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file list.h - * \brief Doubly-linked list abstract container type. - * - * Each doubly-linked list has a sentinel head and tail node. These nodes - * contain no data. The head sentinel can be identified by its \c prev - * pointer being \c NULL. The tail sentinel can be identified by its - * \c next pointer being \c NULL. - * - * A list is empty if either the head sentinel's \c next pointer points to the - * tail sentinel or the tail sentinel's \c prev poiner points to the head - * sentinel. - * - * Instead of tracking two separate \c node structures and a \c list structure - * that points to them, the sentinel nodes are in a single structure. Noting - * that each sentinel node always has one \c NULL pointer, the \c NULL - * pointers occupy the same memory location. In the \c list structure - * contains a the following: - * - * - A \c head pointer that represents the \c next pointer of the - * head sentinel node. - * - A \c tail pointer that represents the \c prev pointer of the head - * sentinel node and the \c next pointer of the tail sentinel node. This - * pointer is \b always \c NULL. - * - A \c tail_prev pointer that represents the \c prev pointer of the - * tail sentinel node. - * - * Therefore, if \c head->next is \c NULL or \c tail_prev->prev is \c NULL, - * the list is empty. - * - * Do note that this means that the list nodes will contain pointers into the - * list structure itself and as a result you may not \c realloc() an \c - * exec_list or any structure in which an \c exec_list is embedded. - * - * To anyone familiar with "exec lists" on the Amiga, this structure should - * be immediately recognizable. See the following link for the original Amiga - * operating system documentation on the subject. - * - * http://www.natami.net/dev/Libraries_Manual_guide/node02D7.html - * - * \author Ian Romanick - */ - -#pragma once -#ifndef LIST_CONTAINER_H -#define LIST_CONTAINER_H - -#ifndef __cplusplus -#include -#endif -#include - -#include "util/ralloc.h" - -struct exec_node { - struct exec_node *next; - struct exec_node *prev; - -#ifdef __cplusplus - DECLARE_RALLOC_CXX_OPERATORS(exec_node) - - exec_node() : next(NULL), prev(NULL) - { - /* empty */ - } - - const exec_node *get_next() const; - exec_node *get_next(); - - const exec_node *get_prev() const; - exec_node *get_prev(); - - void remove(); - - /** - * Link a node with itself - * - * This creates a sort of degenerate list that is occasionally useful. - */ - void self_link(); - - /** - * Insert a node in the list after the current node - */ - void insert_after(exec_node *after); - /** - * Insert a node in the list before the current node - */ - void insert_before(exec_node *before); - - /** - * Insert another list in the list before the current node - */ - void insert_before(struct exec_list *before); - - /** - * Replace the current node with the given node. - */ - void replace_with(exec_node *replacement); - - /** - * Is this the sentinel at the tail of the list? - */ - bool is_tail_sentinel() const; - - /** - * Is this the sentinel at the head of the list? - */ - bool is_head_sentinel() const; -#endif -}; - -static inline void -exec_node_init(struct exec_node *n) -{ - n->next = NULL; - n->prev = NULL; -} - -static inline const struct exec_node * -exec_node_get_next_const(const struct exec_node *n) -{ - return n->next; -} - -static inline struct exec_node * -exec_node_get_next(struct exec_node *n) -{ - return n->next; -} - -static inline const struct exec_node * -exec_node_get_prev_const(const struct exec_node *n) -{ - return n->prev; -} - -static inline struct exec_node * -exec_node_get_prev(struct exec_node *n) -{ - return n->prev; -} - -static inline void -exec_node_remove(struct exec_node *n) -{ - n->next->prev = n->prev; - n->prev->next = n->next; - n->next = NULL; - n->prev = NULL; -} - -static inline void -exec_node_self_link(struct exec_node *n) -{ - n->next = n; - n->prev = n; -} - -static inline void -exec_node_insert_after(struct exec_node *n, struct exec_node *after) -{ - after->next = n->next; - after->prev = n; - - n->next->prev = after; - n->next = after; -} - -static inline void -exec_node_insert_node_before(struct exec_node *n, struct exec_node *before) -{ - before->next = n; - before->prev = n->prev; - - n->prev->next = before; - n->prev = before; -} - -static inline void -exec_node_replace_with(struct exec_node *n, struct exec_node *replacement) -{ - replacement->prev = n->prev; - replacement->next = n->next; - - n->prev->next = replacement; - n->next->prev = replacement; -} - -static inline bool -exec_node_is_tail_sentinel(const struct exec_node *n) -{ - return n->next == NULL; -} - -static inline bool -exec_node_is_head_sentinel(const struct exec_node *n) -{ - return n->prev == NULL; -} - -#ifdef __cplusplus -inline const exec_node *exec_node::get_next() const -{ - return exec_node_get_next_const(this); -} - -inline exec_node *exec_node::get_next() -{ - return exec_node_get_next(this); -} - -inline const exec_node *exec_node::get_prev() const -{ - return exec_node_get_prev_const(this); -} - -inline exec_node *exec_node::get_prev() -{ - return exec_node_get_prev(this); -} - -inline void exec_node::remove() -{ - exec_node_remove(this); -} - -inline void exec_node::self_link() -{ - exec_node_self_link(this); -} - -inline void exec_node::insert_after(exec_node *after) -{ - exec_node_insert_after(this, after); -} - -inline void exec_node::insert_before(exec_node *before) -{ - exec_node_insert_node_before(this, before); -} - -inline void exec_node::replace_with(exec_node *replacement) -{ - exec_node_replace_with(this, replacement); -} - -inline bool exec_node::is_tail_sentinel() const -{ - return exec_node_is_tail_sentinel(this); -} - -inline bool exec_node::is_head_sentinel() const -{ - return exec_node_is_head_sentinel(this); -} -#endif - -#ifdef __cplusplus -/* This macro will not work correctly if `t' uses virtual inheritance. If you - * are using virtual inheritance, you deserve a slow and painful death. Enjoy! - */ -#define exec_list_offsetof(t, f, p) \ - (((char *) &((t *) p)->f) - ((char *) p)) -#else -#define exec_list_offsetof(t, f, p) offsetof(t, f) -#endif - -/** - * Get a pointer to the structure containing an exec_node - * - * Given a pointer to an \c exec_node embedded in a structure, get a pointer to - * the containing structure. - * - * \param type Base type of the structure containing the node - * \param node Pointer to the \c exec_node - * \param field Name of the field in \c type that is the embedded \c exec_node - */ -#define exec_node_data(type, node, field) \ - ((type *) (((char *) node) - exec_list_offsetof(type, field, node))) - -#ifdef __cplusplus -struct exec_node; -#endif - -struct exec_list { - struct exec_node *head; - struct exec_node *tail; - struct exec_node *tail_pred; - -#ifdef __cplusplus - DECLARE_RALLOC_CXX_OPERATORS(exec_list) - - exec_list() - { - make_empty(); - } - - void make_empty(); - - bool is_empty() const; - - const exec_node *get_head() const; - exec_node *get_head(); - - const exec_node *get_tail() const; - exec_node *get_tail(); - - unsigned length() const; - - void push_head(exec_node *n); - void push_tail(exec_node *n); - void push_degenerate_list_at_head(exec_node *n); - - /** - * Remove the first node from a list and return it - * - * \return - * The first node in the list or \c NULL if the list is empty. - * - * \sa exec_list::get_head - */ - exec_node *pop_head(); - - /** - * Move all of the nodes from this list to the target list - */ - void move_nodes_to(exec_list *target); - - /** - * Append all nodes from the source list to the end of the target list - */ - void append_list(exec_list *source); - - /** - * Prepend all nodes from the source list to the beginning of the target - * list - */ - void prepend_list(exec_list *source); -#endif -}; - -static inline void -exec_list_make_empty(struct exec_list *list) -{ - list->head = (struct exec_node *) & list->tail; - list->tail = NULL; - list->tail_pred = (struct exec_node *) & list->head; -} - -static inline bool -exec_list_is_empty(const struct exec_list *list) -{ - /* There are three ways to test whether a list is empty or not. - * - * - Check to see if the \c head points to the \c tail. - * - Check to see if the \c tail_pred points to the \c head. - * - Check to see if the \c head is the sentinel node by test whether its - * \c next pointer is \c NULL. - * - * The first two methods tend to generate better code on modern systems - * because they save a pointer dereference. - */ - return list->head == (struct exec_node *) &list->tail; -} - -static inline const struct exec_node * -exec_list_get_head_const(const struct exec_list *list) -{ - return !exec_list_is_empty(list) ? list->head : NULL; -} - -static inline struct exec_node * -exec_list_get_head(struct exec_list *list) -{ - return !exec_list_is_empty(list) ? list->head : NULL; -} - -static inline const struct exec_node * -exec_list_get_tail_const(const struct exec_list *list) -{ - return !exec_list_is_empty(list) ? list->tail_pred : NULL; -} - -static inline struct exec_node * -exec_list_get_tail(struct exec_list *list) -{ - return !exec_list_is_empty(list) ? list->tail_pred : NULL; -} - -static inline unsigned -exec_list_length(const struct exec_list *list) -{ - unsigned size = 0; - struct exec_node *node; - - for (node = list->head; node->next != NULL; node = node->next) { - size++; - } - - return size; -} - -static inline void -exec_list_push_head(struct exec_list *list, struct exec_node *n) -{ - n->next = list->head; - n->prev = (struct exec_node *) &list->head; - - n->next->prev = n; - list->head = n; -} - -static inline void -exec_list_push_tail(struct exec_list *list, struct exec_node *n) -{ - n->next = (struct exec_node *) &list->tail; - n->prev = list->tail_pred; - - n->prev->next = n; - list->tail_pred = n; -} - -static inline void -exec_list_push_degenerate_list_at_head(struct exec_list *list, struct exec_node *n) -{ - assert(n->prev->next == n); - - n->prev->next = list->head; - list->head->prev = n->prev; - n->prev = (struct exec_node *) &list->head; - list->head = n; -} - -static inline struct exec_node * -exec_list_pop_head(struct exec_list *list) -{ - struct exec_node *const n = exec_list_get_head(list); - if (n != NULL) - exec_node_remove(n); - - return n; -} - -static inline void -exec_list_move_nodes_to(struct exec_list *list, struct exec_list *target) -{ - if (exec_list_is_empty(list)) { - exec_list_make_empty(target); - } else { - target->head = list->head; - target->tail = NULL; - target->tail_pred = list->tail_pred; - - target->head->prev = (struct exec_node *) &target->head; - target->tail_pred->next = (struct exec_node *) &target->tail; - - exec_list_make_empty(list); - } -} - -static inline void -exec_list_append(struct exec_list *list, struct exec_list *source) -{ - if (exec_list_is_empty(source)) - return; - - /* Link the first node of the source with the last node of the target list. - */ - list->tail_pred->next = source->head; - source->head->prev = list->tail_pred; - - /* Make the tail of the source list be the tail of the target list. - */ - list->tail_pred = source->tail_pred; - list->tail_pred->next = (struct exec_node *) &list->tail; - - /* Make the source list empty for good measure. - */ - exec_list_make_empty(source); -} - -static inline void -exec_list_prepend(struct exec_list *list, struct exec_list *source) -{ - exec_list_append(source, list); - exec_list_move_nodes_to(source, list); -} - -static inline void -exec_node_insert_list_before(struct exec_node *n, struct exec_list *before) -{ - if (exec_list_is_empty(before)) - return; - - before->tail_pred->next = n; - before->head->prev = n->prev; - - n->prev->next = before->head; - n->prev = before->tail_pred; - - exec_list_make_empty(before); -} - -static inline void -exec_list_validate(const struct exec_list *list) -{ - const struct exec_node *node; - - assert(list->head->prev == (const struct exec_node *) &list->head); - assert(list->tail == NULL); - assert(list->tail_pred->next == (const struct exec_node *) &list->tail); - - /* We could try to use one of the interators below for this but they all - * either require C++ or assume the exec_node is embedded in a structure - * which is not the case for this function. - */ - for (node = list->head; node->next != NULL; node = node->next) { - assert(node->next->prev == node); - assert(node->prev->next == node); - } -} - -#ifdef __cplusplus -inline void exec_list::make_empty() -{ - exec_list_make_empty(this); -} - -inline bool exec_list::is_empty() const -{ - return exec_list_is_empty(this); -} - -inline const exec_node *exec_list::get_head() const -{ - return exec_list_get_head_const(this); -} - -inline exec_node *exec_list::get_head() -{ - return exec_list_get_head(this); -} - -inline const exec_node *exec_list::get_tail() const -{ - return exec_list_get_tail_const(this); -} - -inline exec_node *exec_list::get_tail() -{ - return exec_list_get_tail(this); -} - -inline unsigned exec_list::length() const -{ - return exec_list_length(this); -} - -inline void exec_list::push_head(exec_node *n) -{ - exec_list_push_head(this, n); -} - -inline void exec_list::push_tail(exec_node *n) -{ - exec_list_push_tail(this, n); -} - -inline void exec_list::push_degenerate_list_at_head(exec_node *n) -{ - exec_list_push_degenerate_list_at_head(this, n); -} - -inline exec_node *exec_list::pop_head() -{ - return exec_list_pop_head(this); -} - -inline void exec_list::move_nodes_to(exec_list *target) -{ - exec_list_move_nodes_to(this, target); -} - -inline void exec_list::append_list(exec_list *source) -{ - exec_list_append(this, source); -} - -inline void exec_list::prepend_list(exec_list *source) -{ - exec_list_prepend(this, source); -} - -inline void exec_node::insert_before(exec_list *before) -{ - exec_node_insert_list_before(this, before); -} -#endif - -#define foreach_in_list(__type, __inst, __list) \ - for (__type *(__inst) = (__type *)(__list)->head; \ - !(__inst)->is_tail_sentinel(); \ - (__inst) = (__type *)(__inst)->next) - -#define foreach_in_list_reverse(__type, __inst, __list) \ - for (__type *(__inst) = (__type *)(__list)->tail_pred; \ - !(__inst)->is_head_sentinel(); \ - (__inst) = (__type *)(__inst)->prev) - -/** - * This version is safe even if the current node is removed. - */ -#define foreach_in_list_safe(__type, __node, __list) \ - for (__type *__node = (__type *)(__list)->head, \ - *__next = (__type *)__node->next; \ - __next != NULL; \ - __node = __next, __next = (__type *)__next->next) - -#define foreach_in_list_reverse_safe(__type, __node, __list) \ - for (__type *__node = (__type *)(__list)->tail_pred, \ - *__prev = (__type *)__node->prev; \ - __prev != NULL; \ - __node = __prev, __prev = (__type *)__prev->prev) - -#define foreach_in_list_use_after(__type, __inst, __list) \ - __type *(__inst); \ - for ((__inst) = (__type *)(__list)->head; \ - !(__inst)->is_tail_sentinel(); \ - (__inst) = (__type *)(__inst)->next) -/** - * Iterate through two lists at once. Stops at the end of the shorter list. - * - * This is safe against either current node being removed or replaced. - */ -#define foreach_two_lists(__node1, __list1, __node2, __list2) \ - for (struct exec_node * __node1 = (__list1)->head, \ - * __node2 = (__list2)->head, \ - * __next1 = __node1->next, \ - * __next2 = __node2->next \ - ; __next1 != NULL && __next2 != NULL \ - ; __node1 = __next1, \ - __node2 = __next2, \ - __next1 = __next1->next, \ - __next2 = __next2->next) - -#define foreach_list_typed(__type, __node, __field, __list) \ - for (__type * __node = \ - exec_node_data(__type, (__list)->head, __field); \ - (__node)->__field.next != NULL; \ - (__node) = exec_node_data(__type, (__node)->__field.next, __field)) - -#define foreach_list_typed_reverse(__type, __node, __field, __list) \ - for (__type * __node = \ - exec_node_data(__type, (__list)->tail_pred, __field); \ - (__node)->__field.prev != NULL; \ - (__node) = exec_node_data(__type, (__node)->__field.prev, __field)) - -#define foreach_list_typed_safe(__type, __node, __field, __list) \ - for (__type * __node = \ - exec_node_data(__type, (__list)->head, __field), \ - * __next = \ - exec_node_data(__type, (__node)->__field.next, __field); \ - (__node)->__field.next != NULL; \ - __node = __next, __next = \ - exec_node_data(__type, (__next)->__field.next, __field)) - -#define foreach_list_typed_reverse_safe(__type, __node, __field, __list) \ - for (__type * __node = \ - exec_node_data(__type, (__list)->tail_pred, __field), \ - * __prev = \ - exec_node_data(__type, (__node)->__field.prev, __field); \ - (__node)->__field.prev != NULL; \ - __node = __prev, __prev = \ - exec_node_data(__type, (__prev)->__field.prev, __field)) - -#endif /* LIST_CONTAINER_H */ diff --git a/src/glsl/loop_analysis.cpp b/src/glsl/loop_analysis.cpp deleted file mode 100644 index 096a80abb34..00000000000 --- a/src/glsl/loop_analysis.cpp +++ /dev/null @@ -1,640 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "compiler/glsl_types.h" -#include "loop_analysis.h" -#include "ir_hierarchical_visitor.h" - -static bool is_loop_terminator(ir_if *ir); - -static bool all_expression_operands_are_loop_constant(ir_rvalue *, - hash_table *); - -static ir_rvalue *get_basic_induction_increment(ir_assignment *, hash_table *); - - -/** - * Record the fact that the given loop variable was referenced inside the loop. - * - * \arg in_assignee is true if the reference was on the LHS of an assignment. - * - * \arg in_conditional_code_or_nested_loop is true if the reference occurred - * inside an if statement or a nested loop. - * - * \arg current_assignment is the ir_assignment node that the loop variable is - * on the LHS of, if any (ignored if \c in_assignee is false). - */ -void -loop_variable::record_reference(bool in_assignee, - bool in_conditional_code_or_nested_loop, - ir_assignment *current_assignment) -{ - if (in_assignee) { - assert(current_assignment != NULL); - - if (in_conditional_code_or_nested_loop || - current_assignment->condition != NULL) { - this->conditional_or_nested_assignment = true; - } - - if (this->first_assignment == NULL) { - assert(this->num_assignments == 0); - - this->first_assignment = current_assignment; - } - - this->num_assignments++; - } else if (this->first_assignment == current_assignment) { - /* This catches the case where the variable is used in the RHS of an - * assignment where it is also in the LHS. - */ - this->read_before_write = true; - } -} - - -loop_state::loop_state() -{ - this->ht = hash_table_ctor(0, hash_table_pointer_hash, - hash_table_pointer_compare); - this->mem_ctx = ralloc_context(NULL); - this->loop_found = false; -} - - -loop_state::~loop_state() -{ - hash_table_dtor(this->ht); - ralloc_free(this->mem_ctx); -} - - -loop_variable_state * -loop_state::insert(ir_loop *ir) -{ - loop_variable_state *ls = new(this->mem_ctx) loop_variable_state; - - hash_table_insert(this->ht, ls, ir); - this->loop_found = true; - - return ls; -} - - -loop_variable_state * -loop_state::get(const ir_loop *ir) -{ - return (loop_variable_state *) hash_table_find(this->ht, ir); -} - - -loop_variable * -loop_variable_state::get(const ir_variable *ir) -{ - return (loop_variable *) hash_table_find(this->var_hash, ir); -} - - -loop_variable * -loop_variable_state::insert(ir_variable *var) -{ - void *mem_ctx = ralloc_parent(this); - loop_variable *lv = rzalloc(mem_ctx, loop_variable); - - lv->var = var; - - hash_table_insert(this->var_hash, lv, lv->var); - this->variables.push_tail(lv); - - return lv; -} - - -loop_terminator * -loop_variable_state::insert(ir_if *if_stmt) -{ - void *mem_ctx = ralloc_parent(this); - loop_terminator *t = new(mem_ctx) loop_terminator(); - - t->ir = if_stmt; - this->terminators.push_tail(t); - - return t; -} - - -/** - * If the given variable already is recorded in the state for this loop, - * return the corresponding loop_variable object that records information - * about it. - * - * Otherwise, create a new loop_variable object to record information about - * the variable, and set its \c read_before_write field appropriately based on - * \c in_assignee. - * - * \arg in_assignee is true if this variable was encountered on the LHS of an - * assignment. - */ -loop_variable * -loop_variable_state::get_or_insert(ir_variable *var, bool in_assignee) -{ - loop_variable *lv = this->get(var); - - if (lv == NULL) { - lv = this->insert(var); - lv->read_before_write = !in_assignee; - } - - return lv; -} - - -namespace { - -class loop_analysis : public ir_hierarchical_visitor { -public: - loop_analysis(loop_state *loops); - - virtual ir_visitor_status visit(ir_loop_jump *); - virtual ir_visitor_status visit(ir_dereference_variable *); - - virtual ir_visitor_status visit_enter(ir_call *); - - virtual ir_visitor_status visit_enter(ir_loop *); - virtual ir_visitor_status visit_leave(ir_loop *); - virtual ir_visitor_status visit_enter(ir_assignment *); - virtual ir_visitor_status visit_leave(ir_assignment *); - virtual ir_visitor_status visit_enter(ir_if *); - virtual ir_visitor_status visit_leave(ir_if *); - - loop_state *loops; - - int if_statement_depth; - - ir_assignment *current_assignment; - - exec_list state; -}; - -} /* anonymous namespace */ - -loop_analysis::loop_analysis(loop_state *loops) - : loops(loops), if_statement_depth(0), current_assignment(NULL) -{ - /* empty */ -} - - -ir_visitor_status -loop_analysis::visit(ir_loop_jump *ir) -{ - (void) ir; - - assert(!this->state.is_empty()); - - loop_variable_state *const ls = - (loop_variable_state *) this->state.get_head(); - - ls->num_loop_jumps++; - - return visit_continue; -} - - -ir_visitor_status -loop_analysis::visit_enter(ir_call *) -{ - /* Mark every loop that we're currently analyzing as containing an ir_call - * (even those at outer nesting levels). - */ - foreach_in_list(loop_variable_state, ls, &this->state) { - ls->contains_calls = true; - } - - return visit_continue_with_parent; -} - - -ir_visitor_status -loop_analysis::visit(ir_dereference_variable *ir) -{ - /* If we're not somewhere inside a loop, there's nothing to do. - */ - if (this->state.is_empty()) - return visit_continue; - - bool nested = false; - - foreach_in_list(loop_variable_state, ls, &this->state) { - ir_variable *var = ir->variable_referenced(); - loop_variable *lv = ls->get_or_insert(var, this->in_assignee); - - lv->record_reference(this->in_assignee, - nested || this->if_statement_depth > 0, - this->current_assignment); - nested = true; - } - - return visit_continue; -} - -ir_visitor_status -loop_analysis::visit_enter(ir_loop *ir) -{ - loop_variable_state *ls = this->loops->insert(ir); - this->state.push_head(ls); - - return visit_continue; -} - -ir_visitor_status -loop_analysis::visit_leave(ir_loop *ir) -{ - loop_variable_state *const ls = - (loop_variable_state *) this->state.pop_head(); - - /* Function calls may contain side effects. These could alter any of our - * variables in ways that cannot be known, and may even terminate shader - * execution (say, calling discard in the fragment shader). So we can't - * rely on any of our analysis about assignments to variables. - * - * We could perform some conservative analysis (prove there's no statically - * possible assignment, etc.) but it isn't worth it for now; function - * inlining will allow us to unroll loops anyway. - */ - if (ls->contains_calls) - return visit_continue; - - foreach_in_list(ir_instruction, node, &ir->body_instructions) { - /* Skip over declarations at the start of a loop. - */ - if (node->as_variable()) - continue; - - ir_if *if_stmt = ((ir_instruction *) node)->as_if(); - - if ((if_stmt != NULL) && is_loop_terminator(if_stmt)) - ls->insert(if_stmt); - else - break; - } - - - foreach_in_list_safe(loop_variable, lv, &ls->variables) { - /* Move variables that are already marked as being loop constant to - * a separate list. These trivially don't need to be tested. - */ - if (lv->is_loop_constant()) { - lv->remove(); - ls->constants.push_tail(lv); - } - } - - /* Each variable assigned in the loop that isn't already marked as being loop - * constant might still be loop constant. The requirements at this point - * are: - * - * - Variable is written before it is read. - * - * - Only one assignment to the variable. - * - * - All operands on the RHS of the assignment are also loop constants. - * - * The last requirement is the reason for the progress loop. A variable - * marked as a loop constant on one pass may allow other variables to be - * marked as loop constant on following passes. - */ - bool progress; - do { - progress = false; - - foreach_in_list_safe(loop_variable, lv, &ls->variables) { - if (lv->conditional_or_nested_assignment || (lv->num_assignments > 1)) - continue; - - /* Process the RHS of the assignment. If all of the variables - * accessed there are loop constants, then add this - */ - ir_rvalue *const rhs = lv->first_assignment->rhs; - if (all_expression_operands_are_loop_constant(rhs, ls->var_hash)) { - lv->rhs_clean = true; - - if (lv->is_loop_constant()) { - progress = true; - - lv->remove(); - ls->constants.push_tail(lv); - } - } - } - } while (progress); - - /* The remaining variables that are not loop invariant might be loop - * induction variables. - */ - foreach_in_list_safe(loop_variable, lv, &ls->variables) { - /* If there is more than one assignment to a variable, it cannot be a - * loop induction variable. This isn't strictly true, but this is a - * very simple induction variable detector, and it can't handle more - * complex cases. - */ - if (lv->num_assignments > 1) - continue; - - /* All of the variables with zero assignments in the loop are loop - * invariant, and they should have already been filtered out. - */ - assert(lv->num_assignments == 1); - assert(lv->first_assignment != NULL); - - /* The assignment to the variable in the loop must be unconditional and - * not inside a nested loop. - */ - if (lv->conditional_or_nested_assignment) - continue; - - /* Basic loop induction variables have a single assignment in the loop - * that has the form 'VAR = VAR + i' or 'VAR = VAR - i' where i is a - * loop invariant. - */ - ir_rvalue *const inc = - get_basic_induction_increment(lv->first_assignment, ls->var_hash); - if (inc != NULL) { - lv->increment = inc; - - lv->remove(); - ls->induction_variables.push_tail(lv); - } - } - - /* Search the loop terminating conditions for those of the form 'i < c' - * where i is a loop induction variable, c is a constant, and < is any - * relative operator. From each of these we can infer an iteration count. - * Also figure out which terminator (if any) produces the smallest - * iteration count--this is the limiting terminator. - */ - foreach_in_list(loop_terminator, t, &ls->terminators) { - ir_if *if_stmt = t->ir; - - /* If-statements can be either 'if (expr)' or 'if (deref)'. We only care - * about the former here. - */ - ir_expression *cond = if_stmt->condition->as_expression(); - if (cond == NULL) - continue; - - switch (cond->operation) { - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: { - /* The expressions that we care about will either be of the form - * 'counter < limit' or 'limit < counter'. Figure out which is - * which. - */ - ir_rvalue *counter = cond->operands[0]->as_dereference_variable(); - ir_constant *limit = cond->operands[1]->as_constant(); - enum ir_expression_operation cmp = cond->operation; - - if (limit == NULL) { - counter = cond->operands[1]->as_dereference_variable(); - limit = cond->operands[0]->as_constant(); - - switch (cmp) { - case ir_binop_less: cmp = ir_binop_greater; break; - case ir_binop_greater: cmp = ir_binop_less; break; - case ir_binop_lequal: cmp = ir_binop_gequal; break; - case ir_binop_gequal: cmp = ir_binop_lequal; break; - default: assert(!"Should not get here."); - } - } - - if ((counter == NULL) || (limit == NULL)) - break; - - ir_variable *var = counter->variable_referenced(); - - ir_rvalue *init = find_initial_value(ir, var); - - loop_variable *lv = ls->get(var); - if (lv != NULL && lv->is_induction_var()) { - t->iterations = calculate_iterations(init, limit, lv->increment, - cmp); - - if (t->iterations >= 0 && - (ls->limiting_terminator == NULL || - t->iterations < ls->limiting_terminator->iterations)) { - ls->limiting_terminator = t; - } - } - break; - } - - default: - break; - } - } - - return visit_continue; -} - -ir_visitor_status -loop_analysis::visit_enter(ir_if *ir) -{ - (void) ir; - - if (!this->state.is_empty()) - this->if_statement_depth++; - - return visit_continue; -} - -ir_visitor_status -loop_analysis::visit_leave(ir_if *ir) -{ - (void) ir; - - if (!this->state.is_empty()) - this->if_statement_depth--; - - return visit_continue; -} - -ir_visitor_status -loop_analysis::visit_enter(ir_assignment *ir) -{ - /* If we're not somewhere inside a loop, there's nothing to do. - */ - if (this->state.is_empty()) - return visit_continue_with_parent; - - this->current_assignment = ir; - - return visit_continue; -} - -ir_visitor_status -loop_analysis::visit_leave(ir_assignment *ir) -{ - /* Since the visit_enter exits with visit_continue_with_parent for this - * case, the loop state stack should never be empty here. - */ - assert(!this->state.is_empty()); - - assert(this->current_assignment == ir); - this->current_assignment = NULL; - - return visit_continue; -} - - -class examine_rhs : public ir_hierarchical_visitor { -public: - examine_rhs(hash_table *loop_variables) - { - this->only_uses_loop_constants = true; - this->loop_variables = loop_variables; - } - - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - loop_variable *lv = - (loop_variable *) hash_table_find(this->loop_variables, ir->var); - - assert(lv != NULL); - - if (lv->is_loop_constant()) { - return visit_continue; - } else { - this->only_uses_loop_constants = false; - return visit_stop; - } - } - - hash_table *loop_variables; - bool only_uses_loop_constants; -}; - - -bool -all_expression_operands_are_loop_constant(ir_rvalue *ir, hash_table *variables) -{ - examine_rhs v(variables); - - ir->accept(&v); - - return v.only_uses_loop_constants; -} - - -ir_rvalue * -get_basic_induction_increment(ir_assignment *ir, hash_table *var_hash) -{ - /* The RHS must be a binary expression. - */ - ir_expression *const rhs = ir->rhs->as_expression(); - if ((rhs == NULL) - || ((rhs->operation != ir_binop_add) - && (rhs->operation != ir_binop_sub))) - return NULL; - - /* One of the of operands of the expression must be the variable assigned. - * If the operation is subtraction, the variable in question must be the - * "left" operand. - */ - ir_variable *const var = ir->lhs->variable_referenced(); - - ir_variable *const op0 = rhs->operands[0]->variable_referenced(); - ir_variable *const op1 = rhs->operands[1]->variable_referenced(); - - if (((op0 != var) && (op1 != var)) - || ((op1 == var) && (rhs->operation == ir_binop_sub))) - return NULL; - - ir_rvalue *inc = (op0 == var) ? rhs->operands[1] : rhs->operands[0]; - - if (inc->as_constant() == NULL) { - ir_variable *const inc_var = inc->variable_referenced(); - if (inc_var != NULL) { - loop_variable *lv = - (loop_variable *) hash_table_find(var_hash, inc_var); - - if (lv == NULL || !lv->is_loop_constant()) { - assert(lv != NULL); - inc = NULL; - } - } else - inc = NULL; - } - - if ((inc != NULL) && (rhs->operation == ir_binop_sub)) { - void *mem_ctx = ralloc_parent(ir); - - inc = new(mem_ctx) ir_expression(ir_unop_neg, - inc->type, - inc->clone(mem_ctx, NULL), - NULL); - } - - return inc; -} - - -/** - * Detect whether an if-statement is a loop terminating condition - * - * Detects if-statements of the form - * - * (if (expression bool ...) (break)) - */ -bool -is_loop_terminator(ir_if *ir) -{ - if (!ir->else_instructions.is_empty()) - return false; - - ir_instruction *const inst = - (ir_instruction *) ir->then_instructions.get_head(); - if (inst == NULL) - return false; - - if (inst->ir_type != ir_type_loop_jump) - return false; - - ir_loop_jump *const jump = (ir_loop_jump *) inst; - if (jump->mode != ir_loop_jump::jump_break) - return false; - - return true; -} - - -loop_state * -analyze_loop_variables(exec_list *instructions) -{ - loop_state *loops = new loop_state; - loop_analysis v(loops); - - v.run(instructions); - return v.loops; -} diff --git a/src/glsl/loop_analysis.h b/src/glsl/loop_analysis.h deleted file mode 100644 index 3b1971d7edc..00000000000 --- a/src/glsl/loop_analysis.h +++ /dev/null @@ -1,259 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef LOOP_ANALYSIS_H -#define LOOP_ANALYSIS_H - -#include "ir.h" -#include "program/hash_table.h" - -/** - * Analyze and classify all variables used in all loops in the instruction list - */ -extern class loop_state * -analyze_loop_variables(exec_list *instructions); - - -/** - * Fill in loop control fields - * - * Based on analysis of loop variables, this function tries to remove - * redundant sequences in the loop of the form - * - * (if (expression bool ...) (break)) - * - * For example, if it is provable that one loop exit condition will - * always be satisfied before another, the unnecessary exit condition will be - * removed. - */ -extern bool -set_loop_controls(exec_list *instructions, loop_state *ls); - - -extern bool -unroll_loops(exec_list *instructions, loop_state *ls, - const struct gl_shader_compiler_options *options); - -ir_rvalue * -find_initial_value(ir_loop *loop, ir_variable *var); - -int -calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, - enum ir_expression_operation op); - - -/** - * Tracking for all variables used in a loop - */ -class loop_variable_state : public exec_node { -public: - class loop_variable *get(const ir_variable *); - class loop_variable *insert(ir_variable *); - class loop_variable *get_or_insert(ir_variable *, bool in_assignee); - class loop_terminator *insert(ir_if *); - - - /** - * Variables that have not yet been classified - */ - exec_list variables; - - /** - * Variables whose values are constant within the body of the loop - * - * This list contains \c loop_variable objects. - */ - exec_list constants; - - /** - * Induction variables for this loop - * - * This list contains \c loop_variable objects. - */ - exec_list induction_variables; - - /** - * Simple if-statements that lead to the termination of the loop - * - * This list contains \c loop_terminator objects. - * - * \sa is_loop_terminator - */ - exec_list terminators; - - /** - * If any of the terminators in \c terminators leads to termination of the - * loop after a constant number of iterations, this is the terminator that - * leads to termination after the smallest number of iterations. Otherwise - * NULL. - */ - loop_terminator *limiting_terminator; - - /** - * Hash table containing all variables accessed in this loop - */ - hash_table *var_hash; - - /** - * Number of ir_loop_jump instructions that operate on this loop - */ - unsigned num_loop_jumps; - - /** - * Whether this loop contains any function calls. - */ - bool contains_calls; - - loop_variable_state() - { - this->num_loop_jumps = 0; - this->contains_calls = false; - this->var_hash = hash_table_ctor(0, hash_table_pointer_hash, - hash_table_pointer_compare); - this->limiting_terminator = NULL; - } - - ~loop_variable_state() - { - hash_table_dtor(this->var_hash); - } - - DECLARE_RALLOC_CXX_OPERATORS(loop_variable_state) -}; - - -class loop_variable : public exec_node { -public: - /** The variable in question. */ - ir_variable *var; - - /** Is the variable read in the loop before it is written? */ - bool read_before_write; - - /** Are all variables in the RHS of the assignment loop constants? */ - bool rhs_clean; - - /** - * Is there an assignment to the variable that is conditional, or inside a - * nested loop? - */ - bool conditional_or_nested_assignment; - - /** Reference to the first assignment to the variable in the loop body. */ - ir_assignment *first_assignment; - - /** Number of assignments to the variable in the loop body. */ - unsigned num_assignments; - - /** - * Increment value for a loop induction variable - * - * If this is a loop induction variable, the amount by which the variable - * is incremented on each iteration through the loop. - * - * If this is not a loop induction variable, NULL. - */ - ir_rvalue *increment; - - - inline bool is_induction_var() const - { - /* Induction variables always have a non-null increment, and vice - * versa. - */ - return this->increment != NULL; - } - - - inline bool is_loop_constant() const - { - const bool is_const = (this->num_assignments == 0) - || (((this->num_assignments == 1) - && !this->conditional_or_nested_assignment - && !this->read_before_write - && this->rhs_clean) || this->var->data.read_only); - - /* If the RHS of *the* assignment is clean, then there must be exactly - * one assignment of the variable. - */ - assert((this->rhs_clean && (this->num_assignments == 1)) - || !this->rhs_clean); - - return is_const; - } - - void record_reference(bool in_assignee, - bool in_conditional_code_or_nested_loop, - ir_assignment *current_assignment); -}; - - -class loop_terminator : public exec_node { -public: - loop_terminator() - : ir(NULL), iterations(-1) - { - } - - /** - * Statement which terminates the loop. - */ - ir_if *ir; - - /** - * The number of iterations after which the terminator is known to - * terminate the loop (if that is a fixed value). Otherwise -1. - */ - int iterations; -}; - - -class loop_state { -public: - ~loop_state(); - - /** - * Get the loop variable state data for a particular loop - */ - loop_variable_state *get(const ir_loop *); - - loop_variable_state *insert(ir_loop *ir); - - bool loop_found; - -private: - loop_state(); - - /** - * Hash table containing all loops that have been analyzed. - */ - hash_table *ht; - - void *mem_ctx; - - friend loop_state *analyze_loop_variables(exec_list *instructions); -}; - -#endif /* LOOP_ANALYSIS_H */ diff --git a/src/glsl/loop_controls.cpp b/src/glsl/loop_controls.cpp deleted file mode 100644 index c717605ec74..00000000000 --- a/src/glsl/loop_controls.cpp +++ /dev/null @@ -1,246 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include "main/compiler.h" -#include "compiler/glsl_types.h" -#include "loop_analysis.h" -#include "ir_hierarchical_visitor.h" - -/** - * Find an initializer of a variable outside a loop - * - * Works backwards from the loop to find the pre-loop value of the variable. - * This is used, for example, to find the initial value of loop induction - * variables. - * - * \param loop Loop where \c var is an induction variable - * \param var Variable whose initializer is to be found - * - * \return - * The \c ir_rvalue assigned to the variable outside the loop. May return - * \c NULL if no initializer can be found. - */ -ir_rvalue * -find_initial_value(ir_loop *loop, ir_variable *var) -{ - for (exec_node *node = loop->prev; - !node->is_head_sentinel(); - node = node->prev) { - ir_instruction *ir = (ir_instruction *) node; - - switch (ir->ir_type) { - case ir_type_call: - case ir_type_loop: - case ir_type_loop_jump: - case ir_type_return: - case ir_type_if: - return NULL; - - case ir_type_function: - case ir_type_function_signature: - assert(!"Should not get here."); - return NULL; - - case ir_type_assignment: { - ir_assignment *assign = ir->as_assignment(); - ir_variable *assignee = assign->lhs->whole_variable_referenced(); - - if (assignee == var) - return (assign->condition != NULL) ? NULL : assign->rhs; - - break; - } - - default: - break; - } - } - - return NULL; -} - - -int -calculate_iterations(ir_rvalue *from, ir_rvalue *to, ir_rvalue *increment, - enum ir_expression_operation op) -{ - if (from == NULL || to == NULL || increment == NULL) - return -1; - - void *mem_ctx = ralloc_context(NULL); - - ir_expression *const sub = - new(mem_ctx) ir_expression(ir_binop_sub, from->type, to, from); - - ir_expression *const div = - new(mem_ctx) ir_expression(ir_binop_div, sub->type, sub, increment); - - ir_constant *iter = div->constant_expression_value(); - - if (iter == NULL) - return -1; - - if (!iter->type->is_integer()) { - const ir_expression_operation op = iter->type->is_double() - ? ir_unop_d2i : ir_unop_f2i; - ir_rvalue *cast = - new(mem_ctx) ir_expression(op, glsl_type::int_type, iter, NULL); - - iter = cast->constant_expression_value(); - } - - int iter_value = iter->get_int_component(0); - - /* Make sure that the calculated number of iterations satisfies the exit - * condition. This is needed to catch off-by-one errors and some types of - * ill-formed loops. For example, we need to detect that the following - * loop does not have a maximum iteration count. - * - * for (float x = 0.0; x != 0.9; x += 0.2) - * ; - */ - const int bias[] = { -1, 0, 1 }; - bool valid_loop = false; - - for (unsigned i = 0; i < ARRAY_SIZE(bias); i++) { - /* Increment may be of type int, uint or float. */ - switch (increment->type->base_type) { - case GLSL_TYPE_INT: - iter = new(mem_ctx) ir_constant(iter_value + bias[i]); - break; - case GLSL_TYPE_UINT: - iter = new(mem_ctx) ir_constant(unsigned(iter_value + bias[i])); - break; - case GLSL_TYPE_FLOAT: - iter = new(mem_ctx) ir_constant(float(iter_value + bias[i])); - break; - case GLSL_TYPE_DOUBLE: - iter = new(mem_ctx) ir_constant(double(iter_value + bias[i])); - break; - default: - unreachable("Unsupported type for loop iterator."); - } - - ir_expression *const mul = - new(mem_ctx) ir_expression(ir_binop_mul, increment->type, iter, - increment); - - ir_expression *const add = - new(mem_ctx) ir_expression(ir_binop_add, mul->type, mul, from); - - ir_expression *const cmp = - new(mem_ctx) ir_expression(op, glsl_type::bool_type, add, to); - - ir_constant *const cmp_result = cmp->constant_expression_value(); - - assert(cmp_result != NULL); - if (cmp_result->get_bool_component(0)) { - iter_value += bias[i]; - valid_loop = true; - break; - } - } - - ralloc_free(mem_ctx); - return (valid_loop) ? iter_value : -1; -} - -namespace { - -class loop_control_visitor : public ir_hierarchical_visitor { -public: - loop_control_visitor(loop_state *state) - { - this->state = state; - this->progress = false; - } - - virtual ir_visitor_status visit_leave(ir_loop *ir); - - loop_state *state; - - bool progress; -}; - -} /* anonymous namespace */ - -ir_visitor_status -loop_control_visitor::visit_leave(ir_loop *ir) -{ - loop_variable_state *const ls = this->state->get(ir); - - /* If we've entered a loop that hasn't been analyzed, something really, - * really bad has happened. - */ - if (ls == NULL) { - assert(ls != NULL); - return visit_continue; - } - - if (ls->limiting_terminator != NULL) { - /* If the limiting terminator has an iteration count of zero, then we've - * proven that the loop cannot run, so delete it. - */ - int iterations = ls->limiting_terminator->iterations; - if (iterations == 0) { - ir->remove(); - this->progress = true; - return visit_continue; - } - } - - /* Remove the conditional break statements associated with all terminators - * that are associated with a fixed iteration count, except for the one - * associated with the limiting terminator--that one needs to stay, since - * it terminates the loop. Exception: if the loop still has a normative - * bound, then that terminates the loop, so we don't even need the limiting - * terminator. - */ - foreach_in_list(loop_terminator, t, &ls->terminators) { - if (t->iterations < 0) - continue; - - if (t != ls->limiting_terminator) { - t->ir->remove(); - - assert(ls->num_loop_jumps > 0); - ls->num_loop_jumps--; - - this->progress = true; - } - } - - return visit_continue; -} - - -bool -set_loop_controls(exec_list *instructions, loop_state *ls) -{ - loop_control_visitor v(ls); - - v.run(instructions); - - return v.progress; -} diff --git a/src/glsl/loop_unroll.cpp b/src/glsl/loop_unroll.cpp deleted file mode 100644 index aea2743cdb1..00000000000 --- a/src/glsl/loop_unroll.cpp +++ /dev/null @@ -1,432 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "compiler/glsl_types.h" -#include "loop_analysis.h" -#include "ir_hierarchical_visitor.h" - -#include "main/mtypes.h" - -namespace { - -class loop_unroll_visitor : public ir_hierarchical_visitor { -public: - loop_unroll_visitor(loop_state *state, - const struct gl_shader_compiler_options *options) - { - this->state = state; - this->progress = false; - this->options = options; - } - - virtual ir_visitor_status visit_leave(ir_loop *ir); - void simple_unroll(ir_loop *ir, int iterations); - void complex_unroll(ir_loop *ir, int iterations, - bool continue_from_then_branch); - void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest); - - loop_state *state; - - bool progress; - const struct gl_shader_compiler_options *options; -}; - -} /* anonymous namespace */ - -static bool -is_break(ir_instruction *ir) -{ - return ir != NULL && ir->ir_type == ir_type_loop_jump - && ((ir_loop_jump *) ir)->is_break(); -} - -class loop_unroll_count : public ir_hierarchical_visitor { -public: - int nodes; - bool unsupported_variable_indexing; - bool array_indexed_by_induction_var_with_exact_iterations; - /* If there are nested loops, the node count will be inaccurate. */ - bool nested_loop; - - loop_unroll_count(exec_list *list, loop_variable_state *ls, - const struct gl_shader_compiler_options *options) - : ls(ls), options(options) - { - nodes = 0; - nested_loop = false; - unsupported_variable_indexing = false; - array_indexed_by_induction_var_with_exact_iterations = false; - - run(list); - } - - virtual ir_visitor_status visit_enter(ir_assignment *) - { - nodes++; - return visit_continue; - } - - virtual ir_visitor_status visit_enter(ir_expression *) - { - nodes++; - return visit_continue; - } - - virtual ir_visitor_status visit_enter(ir_loop *) - { - nested_loop = true; - return visit_continue; - } - - virtual ir_visitor_status visit_enter(ir_dereference_array *ir) - { - /* Force unroll in case of dynamic indexing with sampler arrays - * when EmitNoIndirectSampler is set. - */ - if (options->EmitNoIndirectSampler) { - if ((ir->array->type->is_array() && - ir->array->type->contains_sampler()) && - !ir->array_index->constant_expression_value()) { - unsupported_variable_indexing = true; - return visit_continue; - } - } - - /* Check for arrays variably-indexed by a loop induction variable. - * Unrolling the loop may convert that access into constant-indexing. - * - * Many drivers don't support particular kinds of variable indexing, - * and have to resort to using lower_variable_index_to_cond_assign to - * handle it. This results in huge amounts of horrible code, so we'd - * like to avoid that if possible. Here, we just note that it will - * happen. - */ - if ((ir->array->type->is_array() || ir->array->type->is_matrix()) && - !ir->array_index->as_constant()) { - ir_variable *array = ir->array->variable_referenced(); - loop_variable *lv = ls->get(ir->array_index->variable_referenced()); - if (array && lv && lv->is_induction_var()) { - /* If an array is indexed by a loop induction variable, and the - * array size is exactly the number of loop iterations, this is - * probably a simple for-loop trying to access each element in - * turn; the application may expect it to be unrolled. - */ - if (int(array->type->length) == ls->limiting_terminator->iterations) - array_indexed_by_induction_var_with_exact_iterations = true; - - switch (array->data.mode) { - case ir_var_auto: - case ir_var_temporary: - case ir_var_const_in: - case ir_var_function_in: - case ir_var_function_out: - case ir_var_function_inout: - if (options->EmitNoIndirectTemp) - unsupported_variable_indexing = true; - break; - case ir_var_uniform: - case ir_var_shader_storage: - if (options->EmitNoIndirectUniform) - unsupported_variable_indexing = true; - break; - case ir_var_shader_in: - if (options->EmitNoIndirectInput) - unsupported_variable_indexing = true; - break; - case ir_var_shader_out: - if (options->EmitNoIndirectOutput) - unsupported_variable_indexing = true; - break; - } - } - } - return visit_continue; - } - -private: - loop_variable_state *ls; - const struct gl_shader_compiler_options *options; -}; - - -/** - * Unroll a loop which does not contain any jumps. For example, if the input - * is: - * - * (loop (...) ...instrs...) - * - * And the iteration count is 3, the output will be: - * - * ...instrs... ...instrs... ...instrs... - */ -void -loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) -{ - void *const mem_ctx = ralloc_parent(ir); - - for (int i = 0; i < iterations; i++) { - exec_list copy_list; - - copy_list.make_empty(); - clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); - - ir->insert_before(©_list); - } - - /* The loop has been replaced by the unrolled copies. Remove the original - * loop from the IR sequence. - */ - ir->remove(); - - this->progress = true; -} - - -/** - * Unroll a loop whose last statement is an ir_if. If \c - * continue_from_then_branch is true, the loop is repeated only when the - * "then" branch of the if is taken; otherwise it is repeated only when the - * "else" branch of the if is taken. - * - * For example, if the input is: - * - * (loop (...) - * ...body... - * (if (cond) - * (...then_instrs...) - * (...else_instrs...))) - * - * And the iteration count is 3, and \c continue_from_then_branch is true, - * then the output will be: - * - * ...body... - * (if (cond) - * (...then_instrs... - * ...body... - * (if (cond) - * (...then_instrs... - * ...body... - * (if (cond) - * (...then_instrs...) - * (...else_instrs...))) - * (...else_instrs...))) - * (...else_instrs)) - */ -void -loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, - bool continue_from_then_branch) -{ - void *const mem_ctx = ralloc_parent(ir); - ir_instruction *ir_to_replace = ir; - - for (int i = 0; i < iterations; i++) { - exec_list copy_list; - - copy_list.make_empty(); - clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); - - ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if(); - assert(ir_if != NULL); - - ir_to_replace->insert_before(©_list); - ir_to_replace->remove(); - - /* placeholder that will be removed in the next iteration */ - ir_to_replace = - new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); - - exec_list *const list = (continue_from_then_branch) - ? &ir_if->then_instructions : &ir_if->else_instructions; - - list->push_tail(ir_to_replace); - } - - ir_to_replace->remove(); - - this->progress = true; -} - - -/** - * Move all of the instructions which follow \c ir_if to the end of - * \c splice_dest. - * - * For example, in the code snippet: - * - * (if (cond) - * (...then_instructions... - * break) - * (...else_instructions...)) - * ...post_if_instructions... - * - * If \c ir_if points to the "if" instruction, and \c splice_dest points to - * (...else_instructions...), the code snippet is transformed into: - * - * (if (cond) - * (...then_instructions... - * break) - * (...else_instructions... - * ...post_if_instructions...)) - */ -void -loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if, - exec_list *splice_dest) -{ - while (!ir_if->get_next()->is_tail_sentinel()) { - ir_instruction *move_ir = (ir_instruction *) ir_if->get_next(); - - move_ir->remove(); - splice_dest->push_tail(move_ir); - } -} - - -ir_visitor_status -loop_unroll_visitor::visit_leave(ir_loop *ir) -{ - loop_variable_state *const ls = this->state->get(ir); - int iterations; - - /* If we've entered a loop that hasn't been analyzed, something really, - * really bad has happened. - */ - if (ls == NULL) { - assert(ls != NULL); - return visit_continue; - } - - /* Don't try to unroll loops where the number of iterations is not known - * at compile-time. - */ - if (ls->limiting_terminator == NULL) - return visit_continue; - - iterations = ls->limiting_terminator->iterations; - - const int max_iterations = options->MaxUnrollIterations; - - /* Don't try to unroll loops that have zillions of iterations either. - */ - if (iterations > max_iterations) - return visit_continue; - - /* Don't try to unroll nested loops and loops with a huge body. - */ - loop_unroll_count count(&ir->body_instructions, ls, options); - - bool loop_too_large = - count.nested_loop || count.nodes * iterations > max_iterations * 5; - - if (loop_too_large && !count.unsupported_variable_indexing && - !count.array_indexed_by_induction_var_with_exact_iterations) - return visit_continue; - - /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. - * We'll be removing the limiting terminator before we unroll. - */ - assert(ls->num_loop_jumps > 0); - unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1; - - if (predicted_num_loop_jumps > 1) - return visit_continue; - - if (predicted_num_loop_jumps == 0) { - ls->limiting_terminator->ir->remove(); - simple_unroll(ir, iterations); - return visit_continue; - } - - ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail(); - assert(last_ir != NULL); - - if (is_break(last_ir)) { - /* If the only loop-jump is a break at the end of the loop, the loop - * will execute exactly once. Remove the break and use the simple - * unroller with an iteration count of 1. - */ - last_ir->remove(); - - ls->limiting_terminator->ir->remove(); - simple_unroll(ir, 1); - return visit_continue; - } - - /* recognize loops in the form produced by ir_lower_jumps */ - foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) { - /* Skip the limiting terminator, since it will go away when we - * unroll. - */ - if (cur_ir == ls->limiting_terminator->ir) - continue; - - ir_if *ir_if = cur_ir->as_if(); - if (ir_if != NULL) { - /* Determine which if-statement branch, if any, ends with a - * break. The branch that did *not* have the break will get a - * temporary continue inserted in each iteration of the loop - * unroll. - * - * Note that since ls->num_loop_jumps is <= 1, it is impossible - * for both branches to end with a break. - */ - ir_instruction *ir_if_last = - (ir_instruction *) ir_if->then_instructions.get_tail(); - - if (is_break(ir_if_last)) { - ls->limiting_terminator->ir->remove(); - splice_post_if_instructions(ir_if, &ir_if->else_instructions); - ir_if_last->remove(); - complex_unroll(ir, iterations, false); - return visit_continue; - } else { - ir_if_last = - (ir_instruction *) ir_if->else_instructions.get_tail(); - - if (is_break(ir_if_last)) { - ls->limiting_terminator->ir->remove(); - splice_post_if_instructions(ir_if, &ir_if->then_instructions); - ir_if_last->remove(); - complex_unroll(ir, iterations, true); - return visit_continue; - } - } - } - } - - /* Did not find the break statement. It must be in a complex if-nesting, - * so don't try to unroll. - */ - return visit_continue; -} - - -bool -unroll_loops(exec_list *instructions, loop_state *ls, - const struct gl_shader_compiler_options *options) -{ - loop_unroll_visitor v(ls, options); - - v.run(instructions); - - return v.progress; -} diff --git a/src/glsl/lower_buffer_access.cpp b/src/glsl/lower_buffer_access.cpp deleted file mode 100644 index f8c8d140ea8..00000000000 --- a/src/glsl/lower_buffer_access.cpp +++ /dev/null @@ -1,490 +0,0 @@ -/* - * Copyright (c) 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_buffer_access.cpp - * - * Helper for IR lowering pass to replace dereferences of buffer object based - * shader variables with intrinsic function calls. - * - * This helper is used by lowering passes for UBOs, SSBOs and compute shader - * shared variables. - */ - -#include "lower_buffer_access.h" -#include "ir_builder.h" -#include "main/macros.h" -#include "util/list.h" -#include "glsl_parser_extras.h" - -using namespace ir_builder; - -namespace lower_buffer_access { - -static inline int -writemask_for_size(unsigned n) -{ - return ((1 << n) - 1); -} - -/** - * Takes a deref and recursively calls itself to break the deref down to the - * point that the reads or writes generated are contiguous scalars or vectors. - */ -void -lower_buffer_access::emit_access(void *mem_ctx, - bool is_write, - ir_dereference *deref, - ir_variable *base_offset, - unsigned int deref_offset, - bool row_major, - int matrix_columns, - unsigned int packing, - unsigned int write_mask) -{ - if (deref->type->is_record()) { - unsigned int field_offset = 0; - - for (unsigned i = 0; i < deref->type->length; i++) { - const struct glsl_struct_field *field = - &deref->type->fields.structure[i]; - ir_dereference *field_deref = - new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL), - field->name); - - field_offset = - glsl_align(field_offset, - field->type->std140_base_alignment(row_major)); - - emit_access(mem_ctx, is_write, field_deref, base_offset, - deref_offset + field_offset, - row_major, 1, packing, - writemask_for_size(field_deref->type->vector_elements)); - - field_offset += field->type->std140_size(row_major); - } - return; - } - - if (deref->type->is_array()) { - unsigned array_stride = packing == GLSL_INTERFACE_PACKING_STD430 ? - deref->type->fields.array->std430_array_stride(row_major) : - glsl_align(deref->type->fields.array->std140_size(row_major), 16); - - for (unsigned i = 0; i < deref->type->length; i++) { - ir_constant *element = new(mem_ctx) ir_constant(i); - ir_dereference *element_deref = - new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), - element); - emit_access(mem_ctx, is_write, element_deref, base_offset, - deref_offset + i * array_stride, - row_major, 1, packing, - writemask_for_size(element_deref->type->vector_elements)); - } - return; - } - - if (deref->type->is_matrix()) { - for (unsigned i = 0; i < deref->type->matrix_columns; i++) { - ir_constant *col = new(mem_ctx) ir_constant(i); - ir_dereference *col_deref = - new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), col); - - if (row_major) { - /* For a row-major matrix, the next column starts at the next - * element. - */ - int size_mul = deref->type->is_double() ? 8 : 4; - emit_access(mem_ctx, is_write, col_deref, base_offset, - deref_offset + i * size_mul, - row_major, deref->type->matrix_columns, packing, - writemask_for_size(col_deref->type->vector_elements)); - } else { - int size_mul; - - /* std430 doesn't round up vec2 size to a vec4 size */ - if (packing == GLSL_INTERFACE_PACKING_STD430 && - deref->type->vector_elements == 2 && - !deref->type->is_double()) { - size_mul = 8; - } else { - /* std140 always rounds the stride of arrays (and matrices) to a - * vec4, so matrices are always 16 between columns/rows. With - * doubles, they will be 32 apart when there are more than 2 rows. - * - * For both std140 and std430, if the member is a - * three-'component vector with components consuming N basic - * machine units, the base alignment is 4N. For vec4, base - * alignment is 4N. - */ - size_mul = (deref->type->is_double() && - deref->type->vector_elements > 2) ? 32 : 16; - } - - emit_access(mem_ctx, is_write, col_deref, base_offset, - deref_offset + i * size_mul, - row_major, deref->type->matrix_columns, packing, - writemask_for_size(col_deref->type->vector_elements)); - } - } - return; - } - - assert(deref->type->is_scalar() || deref->type->is_vector()); - - if (!row_major) { - ir_rvalue *offset = - add(base_offset, new(mem_ctx) ir_constant(deref_offset)); - unsigned mask = - is_write ? write_mask : (1 << deref->type->vector_elements) - 1; - insert_buffer_access(mem_ctx, deref, deref->type, offset, mask, -1); - } else { - unsigned N = deref->type->is_double() ? 8 : 4; - - /* We're dereffing a column out of a row-major matrix, so we - * gather the vector from each stored row. - */ - assert(deref->type->base_type == GLSL_TYPE_FLOAT || - deref->type->base_type == GLSL_TYPE_DOUBLE); - /* Matrices, row_major or not, are stored as if they were - * arrays of vectors of the appropriate size in std140. - * Arrays have their strides rounded up to a vec4, so the - * matrix stride is always 16. However a double matrix may either be 16 - * or 32 depending on the number of columns. - */ - assert(matrix_columns <= 4); - unsigned matrix_stride = 0; - /* Matrix stride for std430 mat2xY matrices are not rounded up to - * vec4 size. From OpenGL 4.3 spec, section 7.6.2.2 "Standard Uniform - * Block Layout": - * - * "2. If the member is a two- or four-component vector with components - * consuming N basic machine units, the base alignment is 2N or 4N, - * respectively." [...] - * "4. If the member is an array of scalars or vectors, the base alignment - * and array stride are set to match the base alignment of a single array - * element, according to rules (1), (2), and (3), and rounded up to the - * base alignment of a vec4." [...] - * "7. If the member is a row-major matrix with C columns and R rows, the - * matrix is stored identically to an array of R row vectors with C - * components each, according to rule (4)." [...] - * "When using the std430 storage layout, shader storage blocks will be - * laid out in buffer storage identically to uniform and shader storage - * blocks using the std140 layout, except that the base alignment and - * stride of arrays of scalars and vectors in rule 4 and of structures in - * rule 9 are not rounded up a multiple of the base alignment of a vec4." - */ - if (packing == GLSL_INTERFACE_PACKING_STD430 && matrix_columns == 2) - matrix_stride = 2 * N; - else - matrix_stride = glsl_align(matrix_columns * N, 16); - - const glsl_type *deref_type = deref->type->base_type == GLSL_TYPE_FLOAT ? - glsl_type::float_type : glsl_type::double_type; - - for (unsigned i = 0; i < deref->type->vector_elements; i++) { - ir_rvalue *chan_offset = - add(base_offset, - new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); - if (!is_write || ((1U << i) & write_mask)) - insert_buffer_access(mem_ctx, deref, deref_type, chan_offset, - (1U << i), i); - } - } -} - -/** - * Determine if a thing being dereferenced is row-major - * - * There is some trickery here. - * - * If the thing being dereferenced is a member of uniform block \b without an - * instance name, then the name of the \c ir_variable is the field name of an - * interface type. If this field is row-major, then the thing referenced is - * row-major. - * - * If the thing being dereferenced is a member of uniform block \b with an - * instance name, then the last dereference in the tree will be an - * \c ir_dereference_record. If that record field is row-major, then the - * thing referenced is row-major. - */ -bool -lower_buffer_access::is_dereferenced_thing_row_major(const ir_rvalue *deref) -{ - bool matrix = false; - const ir_rvalue *ir = deref; - - while (true) { - matrix = matrix || ir->type->without_array()->is_matrix(); - - switch (ir->ir_type) { - case ir_type_dereference_array: { - const ir_dereference_array *const array_deref = - (const ir_dereference_array *) ir; - - ir = array_deref->array; - break; - } - - case ir_type_dereference_record: { - const ir_dereference_record *const record_deref = - (const ir_dereference_record *) ir; - - ir = record_deref->record; - - const int idx = ir->type->field_index(record_deref->field); - assert(idx >= 0); - - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(ir->type->fields.structure[idx].matrix_layout); - - switch (matrix_layout) { - case GLSL_MATRIX_LAYOUT_INHERITED: - break; - case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: - return false; - case GLSL_MATRIX_LAYOUT_ROW_MAJOR: - return matrix || deref->type->without_array()->is_record(); - } - - break; - } - - case ir_type_dereference_variable: { - const ir_dereference_variable *const var_deref = - (const ir_dereference_variable *) ir; - - const enum glsl_matrix_layout matrix_layout = - glsl_matrix_layout(var_deref->var->data.matrix_layout); - - switch (matrix_layout) { - case GLSL_MATRIX_LAYOUT_INHERITED: { - /* For interface block matrix variables we handle inherited - * layouts at HIR generation time, but we don't do that for shared - * variables, which are always column-major - */ - ir_variable *var = deref->variable_referenced(); - assert((var->is_in_buffer_block() && !matrix) || - var->data.mode == ir_var_shader_shared); - return false; - } - case GLSL_MATRIX_LAYOUT_COLUMN_MAJOR: - return false; - case GLSL_MATRIX_LAYOUT_ROW_MAJOR: - return matrix || deref->type->without_array()->is_record(); - } - - unreachable("invalid matrix layout"); - break; - } - - default: - return false; - } - } - - /* The tree must have ended with a dereference that wasn't an - * ir_dereference_variable. That is invalid, and it should be impossible. - */ - unreachable("invalid dereference tree"); - return false; -} - -/** - * This function initializes various values that will be used later by - * emit_access when actually emitting loads or stores. - * - * Note: const_offset is an input as well as an output, clients must - * initialize it to the offset of the variable in the underlying block, and - * this function will adjust it by adding the constant offset of the member - * being accessed into that variable. - */ -void -lower_buffer_access::setup_buffer_access(void *mem_ctx, - ir_variable *var, - ir_rvalue *deref, - ir_rvalue **offset, - unsigned *const_offset, - bool *row_major, - int *matrix_columns, - unsigned packing) -{ - *offset = new(mem_ctx) ir_constant(0u); - *row_major = is_dereferenced_thing_row_major(deref); - *matrix_columns = 1; - - /* Calculate the offset to the start of the region of the UBO - * dereferenced by *rvalue. This may be a variable offset if an - * array dereference has a variable index. - */ - while (deref) { - switch (deref->ir_type) { - case ir_type_dereference_variable: { - deref = NULL; - break; - } - - case ir_type_dereference_array: { - ir_dereference_array *deref_array = (ir_dereference_array *) deref; - unsigned array_stride; - if (deref_array->array->type->is_vector()) { - /* We get this when storing or loading a component out of a vector - * with a non-constant index. This happens for v[i] = f where v is - * a vector (or m[i][j] = f where m is a matrix). If we don't - * lower that here, it gets turned into v = vector_insert(v, i, - * f), which loads the entire vector, modifies one component and - * then write the entire thing back. That breaks if another - * thread or SIMD channel is modifying the same vector. - */ - array_stride = 4; - if (deref_array->array->type->is_double()) - array_stride *= 2; - } else if (deref_array->array->type->is_matrix() && *row_major) { - /* When loading a vector out of a row major matrix, the - * step between the columns (vectors) is the size of a - * float, while the step between the rows (elements of a - * vector) is handled below in emit_ubo_loads. - */ - array_stride = 4; - if (deref_array->array->type->is_double()) - array_stride *= 2; - *matrix_columns = deref_array->array->type->matrix_columns; - } else if (deref_array->type->without_array()->is_interface()) { - /* We're processing an array dereference of an interface instance - * array. The thing being dereferenced *must* be a variable - * dereference because interfaces cannot be embedded in other - * types. In terms of calculating the offsets for the lowering - * pass, we don't care about the array index. All elements of an - * interface instance array will have the same offsets relative to - * the base of the block that backs them. - */ - deref = deref_array->array->as_dereference(); - break; - } else { - /* Whether or not the field is row-major (because it might be a - * bvec2 or something) does not affect the array itself. We need - * to know whether an array element in its entirety is row-major. - */ - const bool array_row_major = - is_dereferenced_thing_row_major(deref_array); - - /* The array type will give the correct interface packing - * information - */ - if (packing == GLSL_INTERFACE_PACKING_STD430) { - array_stride = deref_array->type->std430_array_stride(array_row_major); - } else { - array_stride = deref_array->type->std140_size(array_row_major); - array_stride = glsl_align(array_stride, 16); - } - } - - ir_rvalue *array_index = deref_array->array_index; - if (array_index->type->base_type == GLSL_TYPE_INT) - array_index = i2u(array_index); - - ir_constant *const_index = - array_index->constant_expression_value(NULL); - if (const_index) { - *const_offset += array_stride * const_index->value.u[0]; - } else { - *offset = add(*offset, - mul(array_index, - new(mem_ctx) ir_constant(array_stride))); - } - deref = deref_array->array->as_dereference(); - break; - } - - case ir_type_dereference_record: { - ir_dereference_record *deref_record = (ir_dereference_record *) deref; - const glsl_type *struct_type = deref_record->record->type; - unsigned intra_struct_offset = 0; - - for (unsigned int i = 0; i < struct_type->length; i++) { - const glsl_type *type = struct_type->fields.structure[i].type; - - ir_dereference_record *field_deref = new(mem_ctx) - ir_dereference_record(deref_record->record, - struct_type->fields.structure[i].name); - const bool field_row_major = - is_dereferenced_thing_row_major(field_deref); - - ralloc_free(field_deref); - - unsigned field_align = 0; - - if (packing == GLSL_INTERFACE_PACKING_STD430) - field_align = type->std430_base_alignment(field_row_major); - else - field_align = type->std140_base_alignment(field_row_major); - - intra_struct_offset = glsl_align(intra_struct_offset, field_align); - - if (strcmp(struct_type->fields.structure[i].name, - deref_record->field) == 0) - break; - - if (packing == GLSL_INTERFACE_PACKING_STD430) - intra_struct_offset += type->std430_size(field_row_major); - else - intra_struct_offset += type->std140_size(field_row_major); - - /* If the field just examined was itself a structure, apply rule - * #9: - * - * "The structure may have padding at the end; the base offset - * of the member following the sub-structure is rounded up to - * the next multiple of the base alignment of the structure." - */ - if (type->without_array()->is_record()) { - intra_struct_offset = glsl_align(intra_struct_offset, - field_align); - - } - } - - *const_offset += intra_struct_offset; - deref = deref_record->record->as_dereference(); - break; - } - - case ir_type_swizzle: { - ir_swizzle *deref_swizzle = (ir_swizzle *) deref; - - assert(deref_swizzle->mask.num_components == 1); - - *const_offset += deref_swizzle->mask.x * sizeof(int); - deref = deref_swizzle->val->as_dereference(); - break; - } - - default: - assert(!"not reached"); - deref = NULL; - break; - } - } -} - -} /* namespace lower_buffer_access */ diff --git a/src/glsl/lower_buffer_access.h b/src/glsl/lower_buffer_access.h deleted file mode 100644 index cc4614e9792..00000000000 --- a/src/glsl/lower_buffer_access.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Copyright (c) 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_buffer_access.h - * - * Helper for IR lowering pass to replace dereferences of buffer object based - * shader variables with intrinsic function calls. - * - * This helper is used by lowering passes for UBOs, SSBOs and compute shader - * shared variables. - */ - -#pragma once -#ifndef LOWER_BUFFER_ACCESS_H -#define LOWER_BUFFER_ACCESS_H - -#include "ir.h" -#include "ir_rvalue_visitor.h" - -namespace lower_buffer_access { - -class lower_buffer_access : public ir_rvalue_enter_visitor { -public: - virtual void - insert_buffer_access(void *mem_ctx, ir_dereference *deref, - const glsl_type *type, ir_rvalue *offset, - unsigned mask, int channel) = 0; - - void emit_access(void *mem_ctx, bool is_write, ir_dereference *deref, - ir_variable *base_offset, unsigned int deref_offset, - bool row_major, int matrix_columns, - unsigned int packing, unsigned int write_mask); - - bool is_dereferenced_thing_row_major(const ir_rvalue *deref); - - void setup_buffer_access(void *mem_ctx, ir_variable *var, ir_rvalue *deref, - ir_rvalue **offset, unsigned *const_offset, - bool *row_major, int *matrix_columns, - unsigned packing); -}; - -} /* namespace lower_buffer_access */ - -#endif /* LOWER_BUFFER_ACCESS_H */ diff --git a/src/glsl/lower_clip_distance.cpp b/src/glsl/lower_clip_distance.cpp deleted file mode 100644 index 1ada215796c..00000000000 --- a/src/glsl/lower_clip_distance.cpp +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_clip_distance.cpp - * - * This pass accounts for the difference between the way - * gl_ClipDistance is declared in standard GLSL (as an array of - * floats), and the way it is frequently implemented in hardware (as - * a pair of vec4s, with four clip distances packed into each). - * - * The declaration of gl_ClipDistance is replaced with a declaration - * of gl_ClipDistanceMESA, and any references to gl_ClipDistance are - * translated to refer to gl_ClipDistanceMESA with the appropriate - * swizzling of array indices. For instance: - * - * gl_ClipDistance[i] - * - * is translated into: - * - * gl_ClipDistanceMESA[i>>2][i&3] - * - * Since some hardware may not internally represent gl_ClipDistance as a pair - * of vec4's, this lowering pass is optional. To enable it, set the - * LowerClipDistance flag in gl_shader_compiler_options to true. - */ - -#include "glsl_symbol_table.h" -#include "ir_rvalue_visitor.h" -#include "ir.h" -#include "program/prog_instruction.h" /* For WRITEMASK_* */ - -namespace { - -class lower_clip_distance_visitor : public ir_rvalue_visitor { -public: - explicit lower_clip_distance_visitor(gl_shader_stage shader_stage) - : progress(false), old_clip_distance_out_var(NULL), - old_clip_distance_in_var(NULL), new_clip_distance_out_var(NULL), - new_clip_distance_in_var(NULL), shader_stage(shader_stage) - { - } - - virtual ir_visitor_status visit(ir_variable *); - void create_indices(ir_rvalue*, ir_rvalue *&, ir_rvalue *&); - bool is_clip_distance_vec8(ir_rvalue *ir); - ir_rvalue *lower_clip_distance_vec8(ir_rvalue *ir); - virtual ir_visitor_status visit_leave(ir_assignment *); - void visit_new_assignment(ir_assignment *ir); - virtual ir_visitor_status visit_leave(ir_call *); - - virtual void handle_rvalue(ir_rvalue **rvalue); - - void fix_lhs(ir_assignment *); - - bool progress; - - /** - * Pointer to the declaration of gl_ClipDistance, if found. - * - * Note: - * - * - the in_var is for geometry and both tessellation shader inputs only. - * - * - since gl_ClipDistance is available in tessellation control, - * tessellation evaluation and geometry shaders as both an input - * and an output, it's possible for both old_clip_distance_out_var - * and old_clip_distance_in_var to be non-null. - */ - ir_variable *old_clip_distance_out_var; - ir_variable *old_clip_distance_in_var; - - /** - * Pointer to the newly-created gl_ClipDistanceMESA variable. - */ - ir_variable *new_clip_distance_out_var; - ir_variable *new_clip_distance_in_var; - - /** - * Type of shader we are compiling (e.g. MESA_SHADER_VERTEX) - */ - const gl_shader_stage shader_stage; -}; - -} /* anonymous namespace */ - -/** - * Replace any declaration of gl_ClipDistance as an array of floats with a - * declaration of gl_ClipDistanceMESA as an array of vec4's. - */ -ir_visitor_status -lower_clip_distance_visitor::visit(ir_variable *ir) -{ - ir_variable **old_var; - ir_variable **new_var; - - if (!ir->name || strcmp(ir->name, "gl_ClipDistance") != 0) - return visit_continue; - assert (ir->type->is_array()); - - if (ir->data.mode == ir_var_shader_out) { - if (this->old_clip_distance_out_var) - return visit_continue; - old_var = &old_clip_distance_out_var; - new_var = &new_clip_distance_out_var; - } else if (ir->data.mode == ir_var_shader_in) { - if (this->old_clip_distance_in_var) - return visit_continue; - old_var = &old_clip_distance_in_var; - new_var = &new_clip_distance_in_var; - } else { - unreachable("not reached"); - } - - this->progress = true; - - if (!ir->type->fields.array->is_array()) { - /* gl_ClipDistance (used for vertex, tessellation evaluation and - * geometry output, and fragment input). - */ - assert((ir->data.mode == ir_var_shader_in && - this->shader_stage == MESA_SHADER_FRAGMENT) || - (ir->data.mode == ir_var_shader_out && - (this->shader_stage == MESA_SHADER_VERTEX || - this->shader_stage == MESA_SHADER_TESS_EVAL || - this->shader_stage == MESA_SHADER_GEOMETRY))); - - *old_var = ir; - assert (ir->type->fields.array == glsl_type::float_type); - unsigned new_size = (ir->type->array_size() + 3) / 4; - - /* Clone the old var so that we inherit all of its properties */ - *new_var = ir->clone(ralloc_parent(ir), NULL); - - /* And change the properties that we need to change */ - (*new_var)->name = ralloc_strdup(*new_var, "gl_ClipDistanceMESA"); - (*new_var)->type = glsl_type::get_array_instance(glsl_type::vec4_type, - new_size); - (*new_var)->data.max_array_access = ir->data.max_array_access / 4; - - ir->replace_with(*new_var); - } else { - /* 2D gl_ClipDistance (used for tessellation control, tessellation - * evaluation and geometry input, and tessellation control output). - */ - assert((ir->data.mode == ir_var_shader_in && - (this->shader_stage == MESA_SHADER_GEOMETRY || - this->shader_stage == MESA_SHADER_TESS_EVAL)) || - this->shader_stage == MESA_SHADER_TESS_CTRL); - - *old_var = ir; - assert (ir->type->fields.array->fields.array == glsl_type::float_type); - unsigned new_size = (ir->type->fields.array->array_size() + 3) / 4; - - /* Clone the old var so that we inherit all of its properties */ - *new_var = ir->clone(ralloc_parent(ir), NULL); - - /* And change the properties that we need to change */ - (*new_var)->name = ralloc_strdup(*new_var, "gl_ClipDistanceMESA"); - (*new_var)->type = glsl_type::get_array_instance( - glsl_type::get_array_instance(glsl_type::vec4_type, - new_size), - ir->type->array_size()); - (*new_var)->data.max_array_access = ir->data.max_array_access / 4; - - ir->replace_with(*new_var); - } - - return visit_continue; -} - - -/** - * Create the necessary GLSL rvalues to index into gl_ClipDistanceMESA based - * on the rvalue previously used to index into gl_ClipDistance. - * - * \param array_index Selects one of the vec4's in gl_ClipDistanceMESA - * \param swizzle_index Selects a component within the vec4 selected by - * array_index. - */ -void -lower_clip_distance_visitor::create_indices(ir_rvalue *old_index, - ir_rvalue *&array_index, - ir_rvalue *&swizzle_index) -{ - void *ctx = ralloc_parent(old_index); - - /* Make sure old_index is a signed int so that the bitwise "shift" and - * "and" operations below type check properly. - */ - if (old_index->type != glsl_type::int_type) { - assert (old_index->type == glsl_type::uint_type); - old_index = new(ctx) ir_expression(ir_unop_u2i, old_index); - } - - ir_constant *old_index_constant = old_index->constant_expression_value(); - if (old_index_constant) { - /* gl_ClipDistance is being accessed via a constant index. Don't bother - * creating expressions to calculate the lowered indices. Just create - * constants. - */ - int const_val = old_index_constant->get_int_component(0); - array_index = new(ctx) ir_constant(const_val / 4); - swizzle_index = new(ctx) ir_constant(const_val % 4); - } else { - /* Create a variable to hold the value of old_index (so that we - * don't compute it twice). - */ - ir_variable *old_index_var = new(ctx) ir_variable( - glsl_type::int_type, "clip_distance_index", ir_var_temporary); - this->base_ir->insert_before(old_index_var); - this->base_ir->insert_before(new(ctx) ir_assignment( - new(ctx) ir_dereference_variable(old_index_var), old_index)); - - /* Create the expression clip_distance_index / 4. Do this as a bit - * shift because that's likely to be more efficient. - */ - array_index = new(ctx) ir_expression( - ir_binop_rshift, new(ctx) ir_dereference_variable(old_index_var), - new(ctx) ir_constant(2)); - - /* Create the expression clip_distance_index % 4. Do this as a bitwise - * AND because that's likely to be more efficient. - */ - swizzle_index = new(ctx) ir_expression( - ir_binop_bit_and, new(ctx) ir_dereference_variable(old_index_var), - new(ctx) ir_constant(3)); - } -} - - -/** - * Determine whether the given rvalue describes an array of 8 floats that - * needs to be lowered to an array of 2 vec4's; that is, determine whether it - * matches one of the following patterns: - * - * - gl_ClipDistance (if gl_ClipDistance is 1D) - * - gl_ClipDistance[i] (if gl_ClipDistance is 2D) - */ -bool -lower_clip_distance_visitor::is_clip_distance_vec8(ir_rvalue *ir) -{ - /* Note that geometry shaders contain gl_ClipDistance both as an input - * (which is a 2D array) and an output (which is a 1D array), so it's - * possible for both this->old_clip_distance_out_var and - * this->old_clip_distance_in_var to be non-NULL in the same shader. - */ - - if (!ir->type->is_array()) - return false; - if (ir->type->fields.array != glsl_type::float_type) - return false; - - if (this->old_clip_distance_out_var) { - if (ir->variable_referenced() == this->old_clip_distance_out_var) - return true; - } - if (this->old_clip_distance_in_var) { - assert(this->shader_stage == MESA_SHADER_TESS_CTRL || - this->shader_stage == MESA_SHADER_TESS_EVAL || - this->shader_stage == MESA_SHADER_GEOMETRY || - this->shader_stage == MESA_SHADER_FRAGMENT); - - if (ir->variable_referenced() == this->old_clip_distance_in_var) - return true; - } - return false; -} - - -/** - * If the given ir satisfies is_clip_distance_vec8(), return new ir - * representing its lowered equivalent. That is, map: - * - * - gl_ClipDistance => gl_ClipDistanceMESA (if gl_ClipDistance is 1D) - * - gl_ClipDistance[i] => gl_ClipDistanceMESA[i] (if gl_ClipDistance is 2D) - * - * Otherwise return NULL. - */ -ir_rvalue * -lower_clip_distance_visitor::lower_clip_distance_vec8(ir_rvalue *ir) -{ - if (!ir->type->is_array()) - return NULL; - if (ir->type->fields.array != glsl_type::float_type) - return NULL; - - ir_variable **new_var = NULL; - if (this->old_clip_distance_out_var) { - if (ir->variable_referenced() == this->old_clip_distance_out_var) - new_var = &this->new_clip_distance_out_var; - } - if (this->old_clip_distance_in_var) { - if (ir->variable_referenced() == this->old_clip_distance_in_var) - new_var = &this->new_clip_distance_in_var; - } - if (new_var == NULL) - return NULL; - - if (ir->as_dereference_variable()) { - return new(ralloc_parent(ir)) ir_dereference_variable(*new_var); - } else { - ir_dereference_array *array_ref = ir->as_dereference_array(); - assert(array_ref); - assert(array_ref->array->as_dereference_variable()); - - return new(ralloc_parent(ir)) - ir_dereference_array(*new_var, array_ref->array_index); - } -} - - -void -lower_clip_distance_visitor::handle_rvalue(ir_rvalue **rv) -{ - if (*rv == NULL) - return; - - ir_dereference_array *const array_deref = (*rv)->as_dereference_array(); - if (array_deref == NULL) - return; - - /* Replace any expression that indexes one of the floats in gl_ClipDistance - * with an expression that indexes into one of the vec4's in - * gl_ClipDistanceMESA and accesses the appropriate component. - */ - ir_rvalue *lowered_vec8 = - this->lower_clip_distance_vec8(array_deref->array); - if (lowered_vec8 != NULL) { - this->progress = true; - ir_rvalue *array_index; - ir_rvalue *swizzle_index; - this->create_indices(array_deref->array_index, array_index, swizzle_index); - void *mem_ctx = ralloc_parent(array_deref); - - ir_dereference_array *const new_array_deref = - new(mem_ctx) ir_dereference_array(lowered_vec8, array_index); - - ir_expression *const expr = - new(mem_ctx) ir_expression(ir_binop_vector_extract, - new_array_deref, - swizzle_index); - - *rv = expr; - } -} - -void -lower_clip_distance_visitor::fix_lhs(ir_assignment *ir) -{ - if (ir->lhs->ir_type == ir_type_expression) { - void *mem_ctx = ralloc_parent(ir); - ir_expression *const expr = (ir_expression *) ir->lhs; - - /* The expression must be of the form: - * - * (vector_extract gl_ClipDistanceMESA[i], j). - */ - assert(expr->operation == ir_binop_vector_extract); - assert(expr->operands[0]->ir_type == ir_type_dereference_array); - assert(expr->operands[0]->type == glsl_type::vec4_type); - - ir_dereference *const new_lhs = (ir_dereference *) expr->operands[0]; - ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, - glsl_type::vec4_type, - new_lhs->clone(mem_ctx, NULL), - ir->rhs, - expr->operands[1]); - ir->set_lhs(new_lhs); - ir->write_mask = WRITEMASK_XYZW; - } -} - -/** - * Replace any assignment having the 1D gl_ClipDistance (undereferenced) as - * its LHS or RHS with a sequence of assignments, one for each component of - * the array. Each of these assignments is lowered to refer to - * gl_ClipDistanceMESA as appropriate. - * - * We need to do a similar replacement for 2D gl_ClipDistance, however since - * it's an input, the only case we need to address is where a 1D slice of it - * is the entire RHS of an assignment, e.g.: - * - * foo = gl_in[i].gl_ClipDistance - */ -ir_visitor_status -lower_clip_distance_visitor::visit_leave(ir_assignment *ir) -{ - /* First invoke the base class visitor. This causes handle_rvalue() to be - * called on ir->rhs and ir->condition. - */ - ir_rvalue_visitor::visit_leave(ir); - - if (this->is_clip_distance_vec8(ir->lhs) || - this->is_clip_distance_vec8(ir->rhs)) { - /* LHS or RHS of the assignment is the entire 1D gl_ClipDistance array - * (or a 1D slice of a 2D gl_ClipDistance input array). Since we are - * reshaping gl_ClipDistance from an array of floats to an array of - * vec4's, this isn't going to work as a bulk assignment anymore, so - * unroll it to element-by-element assignments and lower each of them. - * - * Note: to unroll into element-by-element assignments, we need to make - * clones of the LHS and RHS. This is safe because expressions and - * l-values are side-effect free. - */ - void *ctx = ralloc_parent(ir); - int array_size = ir->lhs->type->array_size(); - for (int i = 0; i < array_size; ++i) { - ir_dereference_array *new_lhs = new(ctx) ir_dereference_array( - ir->lhs->clone(ctx, NULL), new(ctx) ir_constant(i)); - ir_dereference_array *new_rhs = new(ctx) ir_dereference_array( - ir->rhs->clone(ctx, NULL), new(ctx) ir_constant(i)); - this->handle_rvalue((ir_rvalue **) &new_rhs); - - /* Handle the LHS after creating the new assignment. This must - * happen in this order because handle_rvalue may replace the old LHS - * with an ir_expression of ir_binop_vector_extract. Since this is - * not a valide l-value, this will cause an assertion in the - * ir_assignment constructor to fail. - * - * If this occurs, replace the mangled LHS with a dereference of the - * vector, and replace the RHS with an ir_triop_vector_insert. - */ - ir_assignment *const assign = new(ctx) ir_assignment(new_lhs, new_rhs); - this->handle_rvalue((ir_rvalue **) &assign->lhs); - this->fix_lhs(assign); - - this->base_ir->insert_before(assign); - } - ir->remove(); - - return visit_continue; - } - - /* Handle the LHS as if it were an r-value. Normally - * rvalue_visit(ir_assignment *) only visits the RHS, but we need to lower - * expressions in the LHS as well. - * - * This may cause the LHS to get replaced with an ir_expression of - * ir_binop_vector_extract. If this occurs, replace it with a dereference - * of the vector, and replace the RHS with an ir_triop_vector_insert. - */ - handle_rvalue((ir_rvalue **)&ir->lhs); - this->fix_lhs(ir); - - return rvalue_visit(ir); -} - - -/** - * Set up base_ir properly and call visit_leave() on a newly created - * ir_assignment node. This is used in cases where we have to insert an - * ir_assignment in a place where we know the hierarchical visitor won't see - * it. - */ -void -lower_clip_distance_visitor::visit_new_assignment(ir_assignment *ir) -{ - ir_instruction *old_base_ir = this->base_ir; - this->base_ir = ir; - ir->accept(this); - this->base_ir = old_base_ir; -} - - -/** - * If a 1D gl_ClipDistance variable appears as an argument in an ir_call - * expression, replace it with a temporary variable, and make sure the ir_call - * is preceded and/or followed by assignments that copy the contents of the - * temporary variable to and/or from gl_ClipDistance. Each of these - * assignments is then lowered to refer to gl_ClipDistanceMESA. - * - * We need to do a similar replacement for 2D gl_ClipDistance, however since - * it's an input, the only case we need to address is where a 1D slice of it - * is passed as an "in" parameter to an ir_call, e.g.: - * - * foo(gl_in[i].gl_ClipDistance) - */ -ir_visitor_status -lower_clip_distance_visitor::visit_leave(ir_call *ir) -{ - void *ctx = ralloc_parent(ir); - - const exec_node *formal_param_node = ir->callee->parameters.head; - const exec_node *actual_param_node = ir->actual_parameters.head; - while (!actual_param_node->is_tail_sentinel()) { - ir_variable *formal_param = (ir_variable *) formal_param_node; - ir_rvalue *actual_param = (ir_rvalue *) actual_param_node; - - /* Advance formal_param_node and actual_param_node now so that we can - * safely replace actual_param with another node, if necessary, below. - */ - formal_param_node = formal_param_node->next; - actual_param_node = actual_param_node->next; - - if (this->is_clip_distance_vec8(actual_param)) { - /* User is trying to pass the whole 1D gl_ClipDistance array (or a 1D - * slice of a 2D gl_ClipDistance array) to a function call. Since we - * are reshaping gl_ClipDistance from an array of floats to an array - * of vec4's, this isn't going to work anymore, so use a temporary - * array instead. - */ - ir_variable *temp_clip_distance = new(ctx) ir_variable( - actual_param->type, "temp_clip_distance", ir_var_temporary); - this->base_ir->insert_before(temp_clip_distance); - actual_param->replace_with( - new(ctx) ir_dereference_variable(temp_clip_distance)); - if (formal_param->data.mode == ir_var_function_in - || formal_param->data.mode == ir_var_function_inout) { - /* Copy from gl_ClipDistance to the temporary before the call. - * Since we are going to insert this copy before the current - * instruction, we need to visit it afterwards to make sure it - * gets lowered. - */ - ir_assignment *new_assignment = new(ctx) ir_assignment( - new(ctx) ir_dereference_variable(temp_clip_distance), - actual_param->clone(ctx, NULL)); - this->base_ir->insert_before(new_assignment); - this->visit_new_assignment(new_assignment); - } - if (formal_param->data.mode == ir_var_function_out - || formal_param->data.mode == ir_var_function_inout) { - /* Copy from the temporary to gl_ClipDistance after the call. - * Since visit_list_elements() has already decided which - * instruction it's going to visit next, we need to visit - * afterwards to make sure it gets lowered. - */ - ir_assignment *new_assignment = new(ctx) ir_assignment( - actual_param->clone(ctx, NULL), - new(ctx) ir_dereference_variable(temp_clip_distance)); - this->base_ir->insert_after(new_assignment); - this->visit_new_assignment(new_assignment); - } - } - } - - return rvalue_visit(ir); -} - - -bool -lower_clip_distance(gl_shader *shader) -{ - lower_clip_distance_visitor v(shader->Stage); - - visit_list_elements(&v, shader->ir); - - if (v.new_clip_distance_out_var) - shader->symbols->add_variable(v.new_clip_distance_out_var); - if (v.new_clip_distance_in_var) - shader->symbols->add_variable(v.new_clip_distance_in_var); - - return v.progress; -} diff --git a/src/glsl/lower_const_arrays_to_uniforms.cpp b/src/glsl/lower_const_arrays_to_uniforms.cpp deleted file mode 100644 index 2d024d4b78c..00000000000 --- a/src/glsl/lower_const_arrays_to_uniforms.cpp +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_const_arrays_to_uniforms.cpp - * - * Lower constant arrays to uniform arrays. - * - * Some driver backends (such as i965 and nouveau) don't handle constant arrays - * gracefully, instead treating them as ordinary writable temporary arrays. - * Since arrays can be large, this often means spilling them to scratch memory, - * which usually involves a large number of instructions. - * - * This must be called prior to link_set_uniform_initializers(); we need the - * linker to process our new uniform's constant initializer. - * - * This should be called after optimizations, since those can result in - * splitting and removing arrays that are indexed by constant expressions. - */ -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "compiler/glsl_types.h" - -namespace { -class lower_const_array_visitor : public ir_rvalue_visitor { -public: - lower_const_array_visitor(exec_list *insts) - { - instructions = insts; - progress = false; - } - - bool run() - { - visit_list_elements(this, instructions); - return progress; - } - - void handle_rvalue(ir_rvalue **rvalue); - -private: - exec_list *instructions; - bool progress; -}; - -void -lower_const_array_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_dereference_array *dra = (*rvalue)->as_dereference_array(); - if (!dra) - return; - - ir_constant *con = dra->array->as_constant(); - if (!con || !con->type->is_array()) - return; - - void *mem_ctx = ralloc_parent(con); - - char *uniform_name = ralloc_asprintf(mem_ctx, "constarray__%p", dra); - - ir_variable *uni = - new(mem_ctx) ir_variable(con->type, uniform_name, ir_var_uniform); - uni->constant_initializer = con; - uni->constant_value = con; - uni->data.has_initializer = true; - uni->data.how_declared = ir_var_hidden; - uni->data.read_only = true; - /* Assume the whole thing is accessed. */ - uni->data.max_array_access = uni->type->length - 1; - instructions->push_head(uni); - - ir_dereference_variable *varref = new(mem_ctx) ir_dereference_variable(uni); - *rvalue = new(mem_ctx) ir_dereference_array(varref, dra->array_index); - - progress = true; -} - -} /* anonymous namespace */ - -bool -lower_const_arrays_to_uniforms(exec_list *instructions) -{ - lower_const_array_visitor v(instructions); - return v.run(); -} diff --git a/src/glsl/lower_discard.cpp b/src/glsl/lower_discard.cpp deleted file mode 100644 index b62eb20dcb4..00000000000 --- a/src/glsl/lower_discard.cpp +++ /dev/null @@ -1,201 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_discard.cpp - * - * This pass moves discards out of if-statements. - * - * Case 1: The "then" branch contains a conditional discard: - * --------------------------------------------------------- - * - * if (cond1) { - * s1; - * discard cond2; - * s2; - * } else { - * s3; - * } - * - * becomes: - * - * temp = false; - * if (cond1) { - * s1; - * temp = cond2; - * s2; - * } else { - * s3; - * } - * discard temp; - * - * Case 2: The "else" branch contains a conditional discard: - * --------------------------------------------------------- - * - * if (cond1) { - * s1; - * } else { - * s2; - * discard cond2; - * s3; - * } - * - * becomes: - * - * temp = false; - * if (cond1) { - * s1; - * } else { - * s2; - * temp = cond2; - * s3; - * } - * discard temp; - * - * Case 3: Both branches contain a conditional discard: - * ---------------------------------------------------- - * - * if (cond1) { - * s1; - * discard cond2; - * s2; - * } else { - * s3; - * discard cond3; - * s4; - * } - * - * becomes: - * - * temp = false; - * if (cond1) { - * s1; - * temp = cond2; - * s2; - * } else { - * s3; - * temp = cond3; - * s4; - * } - * discard temp; - * - * If there are multiple conditional discards, we need only deal with one of - * them. Repeatedly applying this pass will take care of the others. - * - * Unconditional discards are treated as having a condition of "true". - */ - -#include "compiler/glsl_types.h" -#include "ir.h" - -namespace { - -class lower_discard_visitor : public ir_hierarchical_visitor { -public: - lower_discard_visitor() - { - this->progress = false; - } - - ir_visitor_status visit_leave(ir_if *); - - bool progress; -}; - -} /* anonymous namespace */ - -bool -lower_discard(exec_list *instructions) -{ - lower_discard_visitor v; - - visit_list_elements(&v, instructions); - - return v.progress; -} - - -static ir_discard * -find_discard(exec_list &instructions) -{ - foreach_in_list(ir_instruction, node, &instructions) { - ir_discard *ir = node->as_discard(); - if (ir != NULL) - return ir; - } - return NULL; -} - - -static void -replace_discard(void *mem_ctx, ir_variable *var, ir_discard *ir) -{ - ir_rvalue *condition = ir->condition; - - /* For unconditional discards, use "true" as the condition. */ - if (condition == NULL) - condition = new(mem_ctx) ir_constant(true); - - ir_assignment *assignment = - new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(var), - condition, NULL); - - ir->replace_with(assignment); -} - - -ir_visitor_status -lower_discard_visitor::visit_leave(ir_if *ir) -{ - ir_discard *then_discard = find_discard(ir->then_instructions); - ir_discard *else_discard = find_discard(ir->else_instructions); - - if (then_discard == NULL && else_discard == NULL) - return visit_continue; - - void *mem_ctx = ralloc_parent(ir); - - ir_variable *temp = new(mem_ctx) ir_variable(glsl_type::bool_type, - "discard_cond_temp", - ir_var_temporary); - ir_assignment *temp_initializer = - new(mem_ctx) ir_assignment(new(mem_ctx) ir_dereference_variable(temp), - new(mem_ctx) ir_constant(false), NULL); - - ir->insert_before(temp); - ir->insert_before(temp_initializer); - - if (then_discard != NULL) - replace_discard(mem_ctx, temp, then_discard); - - if (else_discard != NULL) - replace_discard(mem_ctx, temp, else_discard); - - ir_discard *discard = then_discard != NULL ? then_discard : else_discard; - discard->condition = new(mem_ctx) ir_dereference_variable(temp); - ir->insert_after(discard); - - this->progress = true; - - return visit_continue; -} diff --git a/src/glsl/lower_discard_flow.cpp b/src/glsl/lower_discard_flow.cpp deleted file mode 100644 index 9d0a56b230d..00000000000 --- a/src/glsl/lower_discard_flow.cpp +++ /dev/null @@ -1,155 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** @file lower_discard_flow.cpp - * - * Implements the GLSL 1.30 revision 9 rule for fragment shader - * discard handling: - * - * "Control flow exits the shader, and subsequent implicit or - * explicit derivatives are undefined when this control flow is - * non-uniform (meaning different fragments within the primitive - * take different control paths)." - * - * There seem to be two conflicting things here. "Control flow exits - * the shader" sounds like the discarded fragments should effectively - * jump to the end of the shader, but that breaks derivatives in the - * case of uniform control flow and causes rendering failure in the - * bushes in Unigine Tropics. - * - * The question, then, is whether the intent was "loops stop at the - * point that the only active channels left are discarded pixels" or - * "discarded pixels become inactive at the point that control flow - * returns to the top of a loop". This implements the second - * interpretation. - */ - -#include "compiler/glsl_types.h" -#include "ir.h" -#include "program/hash_table.h" - -namespace { - -class lower_discard_flow_visitor : public ir_hierarchical_visitor { -public: - lower_discard_flow_visitor(ir_variable *discarded) - : discarded(discarded) - { - mem_ctx = ralloc_parent(discarded); - } - - ~lower_discard_flow_visitor() - { - } - - ir_visitor_status visit_enter(ir_discard *ir); - ir_visitor_status visit_enter(ir_loop_jump *ir); - ir_visitor_status visit_enter(ir_loop *ir); - ir_visitor_status visit_enter(ir_function_signature *ir); - - ir_if *generate_discard_break(); - - ir_variable *discarded; - void *mem_ctx; -}; - -} /* anonymous namespace */ - -ir_visitor_status -lower_discard_flow_visitor::visit_enter(ir_loop_jump *ir) -{ - if (ir->mode != ir_loop_jump::jump_continue) - return visit_continue; - - ir->insert_before(generate_discard_break()); - - return visit_continue; -} - -ir_visitor_status -lower_discard_flow_visitor::visit_enter(ir_discard *ir) -{ - ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(discarded); - ir_rvalue *rhs; - if (ir->condition) { - /* discarded <- condition, use (var_ref discarded) as the condition */ - rhs = ir->condition; - ir->condition = new(mem_ctx) ir_dereference_variable(discarded); - } else { - rhs = new(mem_ctx) ir_constant(true); - } - ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, rhs); - ir->insert_before(assign); - - return visit_continue; -} - -ir_visitor_status -lower_discard_flow_visitor::visit_enter(ir_loop *ir) -{ - ir->body_instructions.push_tail(generate_discard_break()); - - return visit_continue; -} - -ir_visitor_status -lower_discard_flow_visitor::visit_enter(ir_function_signature *ir) -{ - if (strcmp(ir->function_name(), "main") != 0) - return visit_continue; - - ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(discarded); - ir_rvalue *rhs = new(mem_ctx) ir_constant(false); - ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, rhs); - ir->body.push_head(assign); - - return visit_continue; -} - -ir_if * -lower_discard_flow_visitor::generate_discard_break() -{ - ir_rvalue *if_condition = new(mem_ctx) ir_dereference_variable(discarded); - ir_if *if_inst = new(mem_ctx) ir_if(if_condition); - - ir_instruction *br = new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_break); - if_inst->then_instructions.push_tail(br); - - return if_inst; -} - -void -lower_discard_flow(exec_list *ir) -{ - void *mem_ctx = ir; - - ir_variable *var = new(mem_ctx) ir_variable(glsl_type::bool_type, - "discarded", - ir_var_temporary); - - ir->push_head(var); - - lower_discard_flow_visitor v(var); - - visit_list_elements(&v, ir); -} diff --git a/src/glsl/lower_if_to_cond_assign.cpp b/src/glsl/lower_if_to_cond_assign.cpp deleted file mode 100644 index 6a7034794b2..00000000000 --- a/src/glsl/lower_if_to_cond_assign.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_if_to_cond_assign.cpp - * - * This attempts to flatten if-statements to conditional assignments for - * GPUs with limited or no flow control support. - * - * It can't handle other control flow being inside of its block, such - * as calls or loops. Hopefully loop unrolling and inlining will take - * care of those. - * - * Drivers for GPUs with no control flow support should simply call - * - * lower_if_to_cond_assign(instructions) - * - * to attempt to flatten all if-statements. - * - * Some GPUs (such as i965 prior to gen6) do support control flow, but have a - * maximum nesting depth N. Drivers for such hardware can call - * - * lower_if_to_cond_assign(instructions, N) - * - * to attempt to flatten any if-statements appearing at depth > N. - */ - -#include "compiler/glsl_types.h" -#include "ir.h" -#include "program/hash_table.h" - -namespace { - -class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor { -public: - ir_if_to_cond_assign_visitor(unsigned max_depth) - { - this->progress = false; - this->max_depth = max_depth; - this->depth = 0; - - this->condition_variables = hash_table_ctor(0, hash_table_pointer_hash, - hash_table_pointer_compare); - } - - ~ir_if_to_cond_assign_visitor() - { - hash_table_dtor(this->condition_variables); - } - - ir_visitor_status visit_enter(ir_if *); - ir_visitor_status visit_leave(ir_if *); - - bool progress; - unsigned max_depth; - unsigned depth; - - struct hash_table *condition_variables; -}; - -} /* anonymous namespace */ - -bool -lower_if_to_cond_assign(exec_list *instructions, unsigned max_depth) -{ - if (max_depth == UINT_MAX) - return false; - - ir_if_to_cond_assign_visitor v(max_depth); - - visit_list_elements(&v, instructions); - - return v.progress; -} - -void -check_control_flow(ir_instruction *ir, void *data) -{ - bool *found_control_flow = (bool *)data; - switch (ir->ir_type) { - case ir_type_call: - case ir_type_discard: - case ir_type_loop: - case ir_type_loop_jump: - case ir_type_return: - *found_control_flow = true; - break; - default: - break; - } -} - -void -move_block_to_cond_assign(void *mem_ctx, - ir_if *if_ir, ir_rvalue *cond_expr, - exec_list *instructions, - struct hash_table *ht) -{ - foreach_in_list_safe(ir_instruction, ir, instructions) { - if (ir->ir_type == ir_type_assignment) { - ir_assignment *assign = (ir_assignment *)ir; - - if (hash_table_find(ht, assign) == NULL) { - hash_table_insert(ht, assign, assign); - - /* If the LHS of the assignment is a condition variable that was - * previously added, insert an additional assignment of false to - * the variable. - */ - const bool assign_to_cv = - hash_table_find(ht, assign->lhs->variable_referenced()) != NULL; - - if (!assign->condition) { - if (assign_to_cv) { - assign->rhs = - new(mem_ctx) ir_expression(ir_binop_logic_and, - glsl_type::bool_type, - cond_expr->clone(mem_ctx, NULL), - assign->rhs); - } else { - assign->condition = cond_expr->clone(mem_ctx, NULL); - } - } else { - assign->condition = - new(mem_ctx) ir_expression(ir_binop_logic_and, - glsl_type::bool_type, - cond_expr->clone(mem_ctx, NULL), - assign->condition); - } - } - } - - /* Now, move from the if block to the block surrounding it. */ - ir->remove(); - if_ir->insert_before(ir); - } -} - -ir_visitor_status -ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir) -{ - (void) ir; - this->depth++; - - return visit_continue; -} - -ir_visitor_status -ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir) -{ - /* Only flatten when beyond the GPU's maximum supported nesting depth. */ - if (this->depth-- <= this->max_depth) - return visit_continue; - - bool found_control_flow = false; - ir_assignment *assign; - - /* Check that both blocks don't contain anything we can't support. */ - foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) { - visit_tree(then_ir, check_control_flow, &found_control_flow); - } - foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) { - visit_tree(else_ir, check_control_flow, &found_control_flow); - } - if (found_control_flow) - return visit_continue; - - void *mem_ctx = ralloc_parent(ir); - - /* Store the condition to a variable. Move all of the instructions from - * the then-clause of the if-statement. Use the condition variable as a - * condition for all assignments. - */ - ir_variable *const then_var = - new(mem_ctx) ir_variable(glsl_type::bool_type, - "if_to_cond_assign_then", - ir_var_temporary); - ir->insert_before(then_var); - - ir_dereference_variable *then_cond = - new(mem_ctx) ir_dereference_variable(then_var); - - assign = new(mem_ctx) ir_assignment(then_cond, ir->condition); - ir->insert_before(assign); - - move_block_to_cond_assign(mem_ctx, ir, then_cond, - &ir->then_instructions, - this->condition_variables); - - /* Add the new condition variable to the hash table. This allows us to - * find this variable when lowering other (enclosing) if-statements. - */ - hash_table_insert(this->condition_variables, then_var, then_var); - - /* If there are instructions in the else-clause, store the inverse of the - * condition to a variable. Move all of the instructions from the - * else-clause if the if-statement. Use the (inverse) condition variable - * as a condition for all assignments. - */ - if (!ir->else_instructions.is_empty()) { - ir_variable *const else_var = - new(mem_ctx) ir_variable(glsl_type::bool_type, - "if_to_cond_assign_else", - ir_var_temporary); - ir->insert_before(else_var); - - ir_dereference_variable *else_cond = - new(mem_ctx) ir_dereference_variable(else_var); - - ir_rvalue *inverse = - new(mem_ctx) ir_expression(ir_unop_logic_not, - then_cond->clone(mem_ctx, NULL)); - - assign = new(mem_ctx) ir_assignment(else_cond, inverse); - ir->insert_before(assign); - - move_block_to_cond_assign(mem_ctx, ir, else_cond, - &ir->else_instructions, - this->condition_variables); - - /* Add the new condition variable to the hash table. This allows us to - * find this variable when lowering other (enclosing) if-statements. - */ - hash_table_insert(this->condition_variables, else_var, else_var); - } - - ir->remove(); - - this->progress = true; - - return visit_continue; -} diff --git a/src/glsl/lower_instructions.cpp b/src/glsl/lower_instructions.cpp deleted file mode 100644 index 1875149b7a6..00000000000 --- a/src/glsl/lower_instructions.cpp +++ /dev/null @@ -1,1061 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_instructions.cpp - * - * Many GPUs lack native instructions for certain expression operations, and - * must replace them with some other expression tree. This pass lowers some - * of the most common cases, allowing the lowering code to be implemented once - * rather than in each driver backend. - * - * Currently supported transformations: - * - SUB_TO_ADD_NEG - * - DIV_TO_MUL_RCP - * - INT_DIV_TO_MUL_RCP - * - EXP_TO_EXP2 - * - POW_TO_EXP2 - * - LOG_TO_LOG2 - * - MOD_TO_FLOOR - * - LDEXP_TO_ARITH - * - DFREXP_TO_ARITH - * - CARRY_TO_ARITH - * - BORROW_TO_ARITH - * - SAT_TO_CLAMP - * - DOPS_TO_DFRAC - * - * SUB_TO_ADD_NEG: - * --------------- - * Breaks an ir_binop_sub expression down to add(op0, neg(op1)) - * - * This simplifies expression reassociation, and for many backends - * there is no subtract operation separate from adding the negation. - * For backends with native subtract operations, they will probably - * want to recognize add(op0, neg(op1)) or the other way around to - * produce a subtract anyway. - * - * DIV_TO_MUL_RCP and INT_DIV_TO_MUL_RCP: - * -------------------------------------- - * Breaks an ir_binop_div expression down to op0 * (rcp(op1)). - * - * Many GPUs don't have a divide instruction (945 and 965 included), - * but they do have an RCP instruction to compute an approximate - * reciprocal. By breaking the operation down, constant reciprocals - * can get constant folded. - * - * DIV_TO_MUL_RCP only lowers floating point division; INT_DIV_TO_MUL_RCP - * handles the integer case, converting to and from floating point so that - * RCP is possible. - * - * EXP_TO_EXP2 and LOG_TO_LOG2: - * ---------------------------- - * Many GPUs don't have a base e log or exponent instruction, but they - * do have base 2 versions, so this pass converts exp and log to exp2 - * and log2 operations. - * - * POW_TO_EXP2: - * ----------- - * Many older GPUs don't have an x**y instruction. For these GPUs, convert - * x**y to 2**(y * log2(x)). - * - * MOD_TO_FLOOR: - * ------------- - * Breaks an ir_binop_mod expression down to (op0 - op1 * floor(op0 / op1)) - * - * Many GPUs don't have a MOD instruction (945 and 965 included), and - * if we have to break it down like this anyway, it gives an - * opportunity to do things like constant fold the (1.0 / op1) easily. - * - * Note: before we used to implement this as op1 * fract(op / op1) but this - * implementation had significant precision errors. - * - * LDEXP_TO_ARITH: - * ------------- - * Converts ir_binop_ldexp to arithmetic and bit operations for float sources. - * - * DFREXP_DLDEXP_TO_ARITH: - * --------------- - * Converts ir_binop_ldexp, ir_unop_frexp_sig, and ir_unop_frexp_exp to - * arithmetic and bit ops for double arguments. - * - * CARRY_TO_ARITH: - * --------------- - * Converts ir_carry into (x + y) < x. - * - * BORROW_TO_ARITH: - * ---------------- - * Converts ir_borrow into (x < y). - * - * SAT_TO_CLAMP: - * ------------- - * Converts ir_unop_saturate into min(max(x, 0.0), 1.0) - * - * DOPS_TO_DFRAC: - * -------------- - * Converts double trunc, ceil, floor, round to fract - */ - -#include "c99_math.h" -#include "program/prog_instruction.h" /* for swizzle */ -#include "compiler/glsl_types.h" -#include "ir.h" -#include "ir_builder.h" -#include "ir_optimization.h" - -using namespace ir_builder; - -namespace { - -class lower_instructions_visitor : public ir_hierarchical_visitor { -public: - lower_instructions_visitor(unsigned lower) - : progress(false), lower(lower) { } - - ir_visitor_status visit_leave(ir_expression *); - - bool progress; - -private: - unsigned lower; /** Bitfield of which operations to lower */ - - void sub_to_add_neg(ir_expression *); - void div_to_mul_rcp(ir_expression *); - void int_div_to_mul_rcp(ir_expression *); - void mod_to_floor(ir_expression *); - void exp_to_exp2(ir_expression *); - void pow_to_exp2(ir_expression *); - void log_to_log2(ir_expression *); - void ldexp_to_arith(ir_expression *); - void dldexp_to_arith(ir_expression *); - void dfrexp_sig_to_arith(ir_expression *); - void dfrexp_exp_to_arith(ir_expression *); - void carry_to_arith(ir_expression *); - void borrow_to_arith(ir_expression *); - void sat_to_clamp(ir_expression *); - void double_dot_to_fma(ir_expression *); - void double_lrp(ir_expression *); - void dceil_to_dfrac(ir_expression *); - void dfloor_to_dfrac(ir_expression *); - void dround_even_to_dfrac(ir_expression *); - void dtrunc_to_dfrac(ir_expression *); - void dsign_to_csel(ir_expression *); -}; - -} /* anonymous namespace */ - -/** - * Determine if a particular type of lowering should occur - */ -#define lowering(x) (this->lower & x) - -bool -lower_instructions(exec_list *instructions, unsigned what_to_lower) -{ - lower_instructions_visitor v(what_to_lower); - - visit_list_elements(&v, instructions); - return v.progress; -} - -void -lower_instructions_visitor::sub_to_add_neg(ir_expression *ir) -{ - ir->operation = ir_binop_add; - ir->operands[1] = new(ir) ir_expression(ir_unop_neg, ir->operands[1]->type, - ir->operands[1], NULL); - this->progress = true; -} - -void -lower_instructions_visitor::div_to_mul_rcp(ir_expression *ir) -{ - assert(ir->operands[1]->type->is_float() || ir->operands[1]->type->is_double()); - - /* New expression for the 1.0 / op1 */ - ir_rvalue *expr; - expr = new(ir) ir_expression(ir_unop_rcp, - ir->operands[1]->type, - ir->operands[1]); - - /* op0 / op1 -> op0 * (1.0 / op1) */ - ir->operation = ir_binop_mul; - ir->operands[1] = expr; - - this->progress = true; -} - -void -lower_instructions_visitor::int_div_to_mul_rcp(ir_expression *ir) -{ - assert(ir->operands[1]->type->is_integer()); - - /* Be careful with integer division -- we need to do it as a - * float and re-truncate, since rcp(n > 1) of an integer would - * just be 0. - */ - ir_rvalue *op0, *op1; - const struct glsl_type *vec_type; - - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, - ir->operands[1]->type->vector_elements, - ir->operands[1]->type->matrix_columns); - - if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) - op1 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[1], NULL); - else - op1 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[1], NULL); - - op1 = new(ir) ir_expression(ir_unop_rcp, op1->type, op1, NULL); - - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, - ir->operands[0]->type->vector_elements, - ir->operands[0]->type->matrix_columns); - - if (ir->operands[0]->type->base_type == GLSL_TYPE_INT) - op0 = new(ir) ir_expression(ir_unop_i2f, vec_type, ir->operands[0], NULL); - else - op0 = new(ir) ir_expression(ir_unop_u2f, vec_type, ir->operands[0], NULL); - - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, - ir->type->vector_elements, - ir->type->matrix_columns); - - op0 = new(ir) ir_expression(ir_binop_mul, vec_type, op0, op1); - - if (ir->operands[1]->type->base_type == GLSL_TYPE_INT) { - ir->operation = ir_unop_f2i; - ir->operands[0] = op0; - } else { - ir->operation = ir_unop_i2u; - ir->operands[0] = new(ir) ir_expression(ir_unop_f2i, op0); - } - ir->operands[1] = NULL; - - this->progress = true; -} - -void -lower_instructions_visitor::exp_to_exp2(ir_expression *ir) -{ - ir_constant *log2_e = new(ir) ir_constant(float(M_LOG2E)); - - ir->operation = ir_unop_exp2; - ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[0]->type, - ir->operands[0], log2_e); - this->progress = true; -} - -void -lower_instructions_visitor::pow_to_exp2(ir_expression *ir) -{ - ir_expression *const log2_x = - new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, - ir->operands[0]); - - ir->operation = ir_unop_exp2; - ir->operands[0] = new(ir) ir_expression(ir_binop_mul, ir->operands[1]->type, - ir->operands[1], log2_x); - ir->operands[1] = NULL; - this->progress = true; -} - -void -lower_instructions_visitor::log_to_log2(ir_expression *ir) -{ - ir->operation = ir_binop_mul; - ir->operands[0] = new(ir) ir_expression(ir_unop_log2, ir->operands[0]->type, - ir->operands[0], NULL); - ir->operands[1] = new(ir) ir_constant(float(1.0 / M_LOG2E)); - this->progress = true; -} - -void -lower_instructions_visitor::mod_to_floor(ir_expression *ir) -{ - ir_variable *x = new(ir) ir_variable(ir->operands[0]->type, "mod_x", - ir_var_temporary); - ir_variable *y = new(ir) ir_variable(ir->operands[1]->type, "mod_y", - ir_var_temporary); - this->base_ir->insert_before(x); - this->base_ir->insert_before(y); - - ir_assignment *const assign_x = - new(ir) ir_assignment(new(ir) ir_dereference_variable(x), - ir->operands[0], NULL); - ir_assignment *const assign_y = - new(ir) ir_assignment(new(ir) ir_dereference_variable(y), - ir->operands[1], NULL); - - this->base_ir->insert_before(assign_x); - this->base_ir->insert_before(assign_y); - - ir_expression *const div_expr = - new(ir) ir_expression(ir_binop_div, x->type, - new(ir) ir_dereference_variable(x), - new(ir) ir_dereference_variable(y)); - - /* Don't generate new IR that would need to be lowered in an additional - * pass. - */ - if (lowering(DIV_TO_MUL_RCP) && (ir->type->is_float() || ir->type->is_double())) - div_to_mul_rcp(div_expr); - - ir_expression *const floor_expr = - new(ir) ir_expression(ir_unop_floor, x->type, div_expr); - - if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) - dfloor_to_dfrac(floor_expr); - - ir_expression *const mul_expr = - new(ir) ir_expression(ir_binop_mul, - new(ir) ir_dereference_variable(y), - floor_expr); - - ir->operation = ir_binop_sub; - ir->operands[0] = new(ir) ir_dereference_variable(x); - ir->operands[1] = mul_expr; - this->progress = true; -} - -void -lower_instructions_visitor::ldexp_to_arith(ir_expression *ir) -{ - /* Translates - * ir_binop_ldexp x exp - * into - * - * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); - * resulting_biased_exp = extracted_biased_exp + exp; - * - * if (resulting_biased_exp < 1) { - * return copysign(0.0, x); - * } - * - * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | - * lshift(i2u(resulting_biased_exp), exp_shift)); - * - * which we can't actually implement as such, since the GLSL IR doesn't - * have vectorized if-statements. We actually implement it without branches - * using conditional-select: - * - * extracted_biased_exp = rshift(bitcast_f2i(abs(x)), exp_shift); - * resulting_biased_exp = extracted_biased_exp + exp; - * - * is_not_zero_or_underflow = gequal(resulting_biased_exp, 1); - * x = csel(is_not_zero_or_underflow, x, copysign(0.0f, x)); - * resulting_biased_exp = csel(is_not_zero_or_underflow, - * resulting_biased_exp, 0); - * - * return bitcast_u2f((bitcast_f2u(x) & sign_mantissa_mask) | - * lshift(i2u(resulting_biased_exp), exp_shift)); - */ - - const unsigned vec_elem = ir->type->vector_elements; - - /* Types */ - const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); - const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); - - /* Constants */ - ir_constant *zeroi = ir_constant::zero(ir, ivec); - - ir_constant *sign_mask = new(ir) ir_constant(0x80000000u, vec_elem); - - ir_constant *exp_shift = new(ir) ir_constant(23, vec_elem); - ir_constant *exp_width = new(ir) ir_constant(8, vec_elem); - - /* Temporary variables */ - ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); - ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); - - ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", - ir_var_temporary); - - ir_variable *extracted_biased_exp = - new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); - ir_variable *resulting_biased_exp = - new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); - - ir_variable *is_not_zero_or_underflow = - new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); - - ir_instruction &i = *base_ir; - - /* Copy and arguments. */ - i.insert_before(x); - i.insert_before(assign(x, ir->operands[0])); - i.insert_before(exp); - i.insert_before(assign(exp, ir->operands[1])); - - /* Extract the biased exponent from . */ - i.insert_before(extracted_biased_exp); - i.insert_before(assign(extracted_biased_exp, - rshift(bitcast_f2i(abs(x)), exp_shift))); - - i.insert_before(resulting_biased_exp); - i.insert_before(assign(resulting_biased_exp, - add(extracted_biased_exp, exp))); - - /* Test if result is ±0.0, subnormal, or underflow by checking if the - * resulting biased exponent would be less than 0x1. If so, the result is - * 0.0 with the sign of x. (Actually, invert the conditions so that - * immediate values are the second arguments, which is better for i965) - */ - i.insert_before(zero_sign_x); - i.insert_before(assign(zero_sign_x, - bitcast_u2f(bit_and(bitcast_f2u(x), sign_mask)))); - - i.insert_before(is_not_zero_or_underflow); - i.insert_before(assign(is_not_zero_or_underflow, - gequal(resulting_biased_exp, - new(ir) ir_constant(0x1, vec_elem)))); - i.insert_before(assign(x, csel(is_not_zero_or_underflow, - x, zero_sign_x))); - i.insert_before(assign(resulting_biased_exp, - csel(is_not_zero_or_underflow, - resulting_biased_exp, zeroi))); - - /* We could test for overflows by checking if the resulting biased exponent - * would be greater than 0xFE. Turns out we don't need to because the GLSL - * spec says: - * - * "If this product is too large to be represented in the - * floating-point type, the result is undefined." - */ - - ir_constant *exp_shift_clone = exp_shift->clone(ir, NULL); - ir->operation = ir_unop_bitcast_i2f; - ir->operands[0] = bitfield_insert(bitcast_f2i(x), resulting_biased_exp, - exp_shift_clone, exp_width); - ir->operands[1] = NULL; - - this->progress = true; -} - -void -lower_instructions_visitor::dldexp_to_arith(ir_expression *ir) -{ - /* See ldexp_to_arith for structure. Uses frexp_exp to extract the exponent - * from the significand. - */ - - const unsigned vec_elem = ir->type->vector_elements; - - /* Types */ - const glsl_type *ivec = glsl_type::get_instance(GLSL_TYPE_INT, vec_elem, 1); - const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); - - /* Constants */ - ir_constant *zeroi = ir_constant::zero(ir, ivec); - - ir_constant *sign_mask = new(ir) ir_constant(0x80000000u); - - ir_constant *exp_shift = new(ir) ir_constant(20u); - ir_constant *exp_width = new(ir) ir_constant(11u); - ir_constant *exp_bias = new(ir) ir_constant(1022, vec_elem); - - /* Temporary variables */ - ir_variable *x = new(ir) ir_variable(ir->type, "x", ir_var_temporary); - ir_variable *exp = new(ir) ir_variable(ivec, "exp", ir_var_temporary); - - ir_variable *zero_sign_x = new(ir) ir_variable(ir->type, "zero_sign_x", - ir_var_temporary); - - ir_variable *extracted_biased_exp = - new(ir) ir_variable(ivec, "extracted_biased_exp", ir_var_temporary); - ir_variable *resulting_biased_exp = - new(ir) ir_variable(ivec, "resulting_biased_exp", ir_var_temporary); - - ir_variable *is_not_zero_or_underflow = - new(ir) ir_variable(bvec, "is_not_zero_or_underflow", ir_var_temporary); - - ir_instruction &i = *base_ir; - - /* Copy and arguments. */ - i.insert_before(x); - i.insert_before(assign(x, ir->operands[0])); - i.insert_before(exp); - i.insert_before(assign(exp, ir->operands[1])); - - ir_expression *frexp_exp = expr(ir_unop_frexp_exp, x); - if (lowering(DFREXP_DLDEXP_TO_ARITH)) - dfrexp_exp_to_arith(frexp_exp); - - /* Extract the biased exponent from . */ - i.insert_before(extracted_biased_exp); - i.insert_before(assign(extracted_biased_exp, add(frexp_exp, exp_bias))); - - i.insert_before(resulting_biased_exp); - i.insert_before(assign(resulting_biased_exp, - add(extracted_biased_exp, exp))); - - /* Test if result is ±0.0, subnormal, or underflow by checking if the - * resulting biased exponent would be less than 0x1. If so, the result is - * 0.0 with the sign of x. (Actually, invert the conditions so that - * immediate values are the second arguments, which is better for i965) - * TODO: Implement in a vector fashion. - */ - i.insert_before(zero_sign_x); - for (unsigned elem = 0; elem < vec_elem; elem++) { - ir_variable *unpacked = - new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); - i.insert_before(unpacked); - i.insert_before( - assign(unpacked, - expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); - i.insert_before(assign(unpacked, bit_and(swizzle_y(unpacked), sign_mask->clone(ir, NULL)), - WRITEMASK_Y)); - i.insert_before(assign(unpacked, ir_constant::zero(ir, glsl_type::uint_type), WRITEMASK_X)); - i.insert_before(assign(zero_sign_x, - expr(ir_unop_pack_double_2x32, unpacked), - 1 << elem)); - } - i.insert_before(is_not_zero_or_underflow); - i.insert_before(assign(is_not_zero_or_underflow, - gequal(resulting_biased_exp, - new(ir) ir_constant(0x1, vec_elem)))); - i.insert_before(assign(x, csel(is_not_zero_or_underflow, - x, zero_sign_x))); - i.insert_before(assign(resulting_biased_exp, - csel(is_not_zero_or_underflow, - resulting_biased_exp, zeroi))); - - /* We could test for overflows by checking if the resulting biased exponent - * would be greater than 0xFE. Turns out we don't need to because the GLSL - * spec says: - * - * "If this product is too large to be represented in the - * floating-point type, the result is undefined." - */ - - ir_rvalue *results[4] = {NULL}; - for (unsigned elem = 0; elem < vec_elem; elem++) { - ir_variable *unpacked = - new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); - i.insert_before(unpacked); - i.insert_before( - assign(unpacked, - expr(ir_unop_unpack_double_2x32, swizzle(x, elem, 1)))); - - ir_expression *bfi = bitfield_insert( - swizzle_y(unpacked), - i2u(swizzle(resulting_biased_exp, elem, 1)), - exp_shift->clone(ir, NULL), - exp_width->clone(ir, NULL)); - - i.insert_before(assign(unpacked, bfi, WRITEMASK_Y)); - - results[elem] = expr(ir_unop_pack_double_2x32, unpacked); - } - - ir->operation = ir_quadop_vector; - ir->operands[0] = results[0]; - ir->operands[1] = results[1]; - ir->operands[2] = results[2]; - ir->operands[3] = results[3]; - - /* Don't generate new IR that would need to be lowered in an additional - * pass. - */ - - this->progress = true; -} - -void -lower_instructions_visitor::dfrexp_sig_to_arith(ir_expression *ir) -{ - const unsigned vec_elem = ir->type->vector_elements; - const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); - - /* Double-precision floating-point values are stored as - * 1 sign bit; - * 11 exponent bits; - * 52 mantissa bits. - * - * We're just extracting the significand here, so we only need to modify - * the upper 32-bit uint. Unfortunately we must extract each double - * independently as there is no vector version of unpackDouble. - */ - - ir_instruction &i = *base_ir; - - ir_variable *is_not_zero = - new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); - ir_rvalue *results[4] = {NULL}; - - ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); - i.insert_before(is_not_zero); - i.insert_before( - assign(is_not_zero, - nequal(abs(ir->operands[0]->clone(ir, NULL)), dzero))); - - /* TODO: Remake this as more vector-friendly when int64 support is - * available. - */ - for (unsigned elem = 0; elem < vec_elem; elem++) { - ir_constant *zero = new(ir) ir_constant(0u, 1); - ir_constant *sign_mantissa_mask = new(ir) ir_constant(0x800fffffu, 1); - - /* Exponent of double floating-point values in the range [0.5, 1.0). */ - ir_constant *exponent_value = new(ir) ir_constant(0x3fe00000u, 1); - - ir_variable *bits = - new(ir) ir_variable(glsl_type::uint_type, "bits", ir_var_temporary); - ir_variable *unpacked = - new(ir) ir_variable(glsl_type::uvec2_type, "unpacked", ir_var_temporary); - - ir_rvalue *x = swizzle(ir->operands[0]->clone(ir, NULL), elem, 1); - - i.insert_before(bits); - i.insert_before(unpacked); - i.insert_before(assign(unpacked, expr(ir_unop_unpack_double_2x32, x))); - - /* Manipulate the high uint to remove the exponent and replace it with - * either the default exponent or zero. - */ - i.insert_before(assign(bits, swizzle_y(unpacked))); - i.insert_before(assign(bits, bit_and(bits, sign_mantissa_mask))); - i.insert_before(assign(bits, bit_or(bits, - csel(swizzle(is_not_zero, elem, 1), - exponent_value, - zero)))); - i.insert_before(assign(unpacked, bits, WRITEMASK_Y)); - results[elem] = expr(ir_unop_pack_double_2x32, unpacked); - } - - /* Put the dvec back together */ - ir->operation = ir_quadop_vector; - ir->operands[0] = results[0]; - ir->operands[1] = results[1]; - ir->operands[2] = results[2]; - ir->operands[3] = results[3]; - - this->progress = true; -} - -void -lower_instructions_visitor::dfrexp_exp_to_arith(ir_expression *ir) -{ - const unsigned vec_elem = ir->type->vector_elements; - const glsl_type *bvec = glsl_type::get_instance(GLSL_TYPE_BOOL, vec_elem, 1); - const glsl_type *uvec = glsl_type::get_instance(GLSL_TYPE_UINT, vec_elem, 1); - - /* Double-precision floating-point values are stored as - * 1 sign bit; - * 11 exponent bits; - * 52 mantissa bits. - * - * We're just extracting the exponent here, so we only care about the upper - * 32-bit uint. - */ - - ir_instruction &i = *base_ir; - - ir_variable *is_not_zero = - new(ir) ir_variable(bvec, "is_not_zero", ir_var_temporary); - ir_variable *high_words = - new(ir) ir_variable(uvec, "high_words", ir_var_temporary); - ir_constant *dzero = new(ir) ir_constant(0.0, vec_elem); - ir_constant *izero = new(ir) ir_constant(0, vec_elem); - - ir_rvalue *absval = abs(ir->operands[0]); - - i.insert_before(is_not_zero); - i.insert_before(high_words); - i.insert_before(assign(is_not_zero, nequal(absval->clone(ir, NULL), dzero))); - - /* Extract all of the upper uints. */ - for (unsigned elem = 0; elem < vec_elem; elem++) { - ir_rvalue *x = swizzle(absval->clone(ir, NULL), elem, 1); - - i.insert_before(assign(high_words, - swizzle_y(expr(ir_unop_unpack_double_2x32, x)), - 1 << elem)); - - } - ir_constant *exponent_shift = new(ir) ir_constant(20, vec_elem); - ir_constant *exponent_bias = new(ir) ir_constant(-1022, vec_elem); - - /* For non-zero inputs, shift the exponent down and apply bias. */ - ir->operation = ir_triop_csel; - ir->operands[0] = new(ir) ir_dereference_variable(is_not_zero); - ir->operands[1] = add(exponent_bias, u2i(rshift(high_words, exponent_shift))); - ir->operands[2] = izero; - - this->progress = true; -} - -void -lower_instructions_visitor::carry_to_arith(ir_expression *ir) -{ - /* Translates - * ir_binop_carry x y - * into - * sum = ir_binop_add x y - * bcarry = ir_binop_less sum x - * carry = ir_unop_b2i bcarry - */ - - ir_rvalue *x_clone = ir->operands[0]->clone(ir, NULL); - ir->operation = ir_unop_i2u; - ir->operands[0] = b2i(less(add(ir->operands[0], ir->operands[1]), x_clone)); - ir->operands[1] = NULL; - - this->progress = true; -} - -void -lower_instructions_visitor::borrow_to_arith(ir_expression *ir) -{ - /* Translates - * ir_binop_borrow x y - * into - * bcarry = ir_binop_less x y - * carry = ir_unop_b2i bcarry - */ - - ir->operation = ir_unop_i2u; - ir->operands[0] = b2i(less(ir->operands[0], ir->operands[1])); - ir->operands[1] = NULL; - - this->progress = true; -} - -void -lower_instructions_visitor::sat_to_clamp(ir_expression *ir) -{ - /* Translates - * ir_unop_saturate x - * into - * ir_binop_min (ir_binop_max(x, 0.0), 1.0) - */ - - ir->operation = ir_binop_min; - ir->operands[0] = new(ir) ir_expression(ir_binop_max, ir->operands[0]->type, - ir->operands[0], - new(ir) ir_constant(0.0f)); - ir->operands[1] = new(ir) ir_constant(1.0f); - - this->progress = true; -} - -void -lower_instructions_visitor::double_dot_to_fma(ir_expression *ir) -{ - ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type->get_base_type(), "dot_res", - ir_var_temporary); - this->base_ir->insert_before(temp); - - int nc = ir->operands[0]->type->components(); - for (int i = nc - 1; i >= 1; i--) { - ir_assignment *assig; - if (i == (nc - 1)) { - assig = assign(temp, mul(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), - swizzle(ir->operands[1]->clone(ir, NULL), i, 1))); - } else { - assig = assign(temp, fma(swizzle(ir->operands[0]->clone(ir, NULL), i, 1), - swizzle(ir->operands[1]->clone(ir, NULL), i, 1), - temp)); - } - this->base_ir->insert_before(assig); - } - - ir->operation = ir_triop_fma; - ir->operands[0] = swizzle(ir->operands[0], 0, 1); - ir->operands[1] = swizzle(ir->operands[1], 0, 1); - ir->operands[2] = new(ir) ir_dereference_variable(temp); - - this->progress = true; - -} - -void -lower_instructions_visitor::double_lrp(ir_expression *ir) -{ - int swizval; - ir_rvalue *op0 = ir->operands[0], *op2 = ir->operands[2]; - ir_constant *one = new(ir) ir_constant(1.0, op2->type->vector_elements); - - switch (op2->type->vector_elements) { - case 1: - swizval = SWIZZLE_XXXX; - break; - default: - assert(op0->type->vector_elements == op2->type->vector_elements); - swizval = SWIZZLE_XYZW; - break; - } - - ir->operation = ir_triop_fma; - ir->operands[0] = swizzle(op2, swizval, op0->type->vector_elements); - ir->operands[2] = mul(sub(one, op2->clone(ir, NULL)), op0); - - this->progress = true; -} - -void -lower_instructions_visitor::dceil_to_dfrac(ir_expression *ir) -{ - /* - * frtemp = frac(x); - * temp = sub(x, frtemp); - * result = temp + ((frtemp != 0.0) ? 1.0 : 0.0); - */ - ir_instruction &i = *base_ir; - ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); - ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); - ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", - ir_var_temporary); - - i.insert_before(frtemp); - i.insert_before(assign(frtemp, fract(ir->operands[0]))); - - ir->operation = ir_binop_add; - ir->operands[0] = sub(ir->operands[0]->clone(ir, NULL), frtemp); - ir->operands[1] = csel(nequal(frtemp, zero), one, zero->clone(ir, NULL)); - - this->progress = true; -} - -void -lower_instructions_visitor::dfloor_to_dfrac(ir_expression *ir) -{ - /* - * frtemp = frac(x); - * result = sub(x, frtemp); - */ - ir->operation = ir_binop_sub; - ir->operands[1] = fract(ir->operands[0]->clone(ir, NULL)); - - this->progress = true; -} -void -lower_instructions_visitor::dround_even_to_dfrac(ir_expression *ir) -{ - /* - * insane but works - * temp = x + 0.5; - * frtemp = frac(temp); - * t2 = sub(temp, frtemp); - * if (frac(x) == 0.5) - * result = frac(t2 * 0.5) == 0 ? t2 : t2 - 1; - * else - * result = t2; - - */ - ir_instruction &i = *base_ir; - ir_variable *frtemp = new(ir) ir_variable(ir->operands[0]->type, "frtemp", - ir_var_temporary); - ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", - ir_var_temporary); - ir_variable *t2 = new(ir) ir_variable(ir->operands[0]->type, "t2", - ir_var_temporary); - ir_constant *p5 = new(ir) ir_constant(0.5, ir->operands[0]->type->vector_elements); - ir_constant *one = new(ir) ir_constant(1.0, ir->operands[0]->type->vector_elements); - ir_constant *zero = new(ir) ir_constant(0.0, ir->operands[0]->type->vector_elements); - - i.insert_before(temp); - i.insert_before(assign(temp, add(ir->operands[0], p5))); - - i.insert_before(frtemp); - i.insert_before(assign(frtemp, fract(temp))); - - i.insert_before(t2); - i.insert_before(assign(t2, sub(temp, frtemp))); - - ir->operation = ir_triop_csel; - ir->operands[0] = equal(fract(ir->operands[0]->clone(ir, NULL)), - p5->clone(ir, NULL)); - ir->operands[1] = csel(equal(fract(mul(t2, p5->clone(ir, NULL))), - zero), - t2, - sub(t2, one)); - ir->operands[2] = new(ir) ir_dereference_variable(t2); - - this->progress = true; -} - -void -lower_instructions_visitor::dtrunc_to_dfrac(ir_expression *ir) -{ - /* - * frtemp = frac(x); - * temp = sub(x, frtemp); - * result = x >= 0 ? temp : temp + (frtemp == 0.0) ? 0 : 1; - */ - ir_rvalue *arg = ir->operands[0]; - ir_instruction &i = *base_ir; - - ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); - ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); - ir_variable *frtemp = new(ir) ir_variable(arg->type, "frtemp", - ir_var_temporary); - ir_variable *temp = new(ir) ir_variable(ir->operands[0]->type, "temp", - ir_var_temporary); - - i.insert_before(frtemp); - i.insert_before(assign(frtemp, fract(arg))); - i.insert_before(temp); - i.insert_before(assign(temp, sub(arg->clone(ir, NULL), frtemp))); - - ir->operation = ir_triop_csel; - ir->operands[0] = gequal(arg->clone(ir, NULL), zero); - ir->operands[1] = new (ir) ir_dereference_variable(temp); - ir->operands[2] = add(temp, - csel(equal(frtemp, zero->clone(ir, NULL)), - zero->clone(ir, NULL), - one)); - - this->progress = true; -} - -void -lower_instructions_visitor::dsign_to_csel(ir_expression *ir) -{ - /* - * temp = x > 0.0 ? 1.0 : 0.0; - * result = x < 0.0 ? -1.0 : temp; - */ - ir_rvalue *arg = ir->operands[0]; - ir_constant *zero = new(ir) ir_constant(0.0, arg->type->vector_elements); - ir_constant *one = new(ir) ir_constant(1.0, arg->type->vector_elements); - ir_constant *neg_one = new(ir) ir_constant(-1.0, arg->type->vector_elements); - - ir->operation = ir_triop_csel; - ir->operands[0] = less(arg->clone(ir, NULL), - zero->clone(ir, NULL)); - ir->operands[1] = neg_one; - ir->operands[2] = csel(greater(arg, zero), - one, - zero->clone(ir, NULL)); - - this->progress = true; -} - -ir_visitor_status -lower_instructions_visitor::visit_leave(ir_expression *ir) -{ - switch (ir->operation) { - case ir_binop_dot: - if (ir->operands[0]->type->is_double()) - double_dot_to_fma(ir); - break; - case ir_triop_lrp: - if (ir->operands[0]->type->is_double()) - double_lrp(ir); - break; - case ir_binop_sub: - if (lowering(SUB_TO_ADD_NEG)) - sub_to_add_neg(ir); - break; - - case ir_binop_div: - if (ir->operands[1]->type->is_integer() && lowering(INT_DIV_TO_MUL_RCP)) - int_div_to_mul_rcp(ir); - else if ((ir->operands[1]->type->is_float() || - ir->operands[1]->type->is_double()) && lowering(DIV_TO_MUL_RCP)) - div_to_mul_rcp(ir); - break; - - case ir_unop_exp: - if (lowering(EXP_TO_EXP2)) - exp_to_exp2(ir); - break; - - case ir_unop_log: - if (lowering(LOG_TO_LOG2)) - log_to_log2(ir); - break; - - case ir_binop_mod: - if (lowering(MOD_TO_FLOOR) && (ir->type->is_float() || ir->type->is_double())) - mod_to_floor(ir); - break; - - case ir_binop_pow: - if (lowering(POW_TO_EXP2)) - pow_to_exp2(ir); - break; - - case ir_binop_ldexp: - if (lowering(LDEXP_TO_ARITH) && ir->type->is_float()) - ldexp_to_arith(ir); - if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->type->is_double()) - dldexp_to_arith(ir); - break; - - case ir_unop_frexp_exp: - if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) - dfrexp_exp_to_arith(ir); - break; - - case ir_unop_frexp_sig: - if (lowering(DFREXP_DLDEXP_TO_ARITH) && ir->operands[0]->type->is_double()) - dfrexp_sig_to_arith(ir); - break; - - case ir_binop_carry: - if (lowering(CARRY_TO_ARITH)) - carry_to_arith(ir); - break; - - case ir_binop_borrow: - if (lowering(BORROW_TO_ARITH)) - borrow_to_arith(ir); - break; - - case ir_unop_saturate: - if (lowering(SAT_TO_CLAMP)) - sat_to_clamp(ir); - break; - - case ir_unop_trunc: - if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) - dtrunc_to_dfrac(ir); - break; - - case ir_unop_ceil: - if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) - dceil_to_dfrac(ir); - break; - - case ir_unop_floor: - if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) - dfloor_to_dfrac(ir); - break; - - case ir_unop_round_even: - if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) - dround_even_to_dfrac(ir); - break; - - case ir_unop_sign: - if (lowering(DOPS_TO_DFRAC) && ir->type->is_double()) - dsign_to_csel(ir); - break; - default: - return visit_continue; - } - - return visit_continue; -} diff --git a/src/glsl/lower_jumps.cpp b/src/glsl/lower_jumps.cpp deleted file mode 100644 index 3cfa2e00ae8..00000000000 --- a/src/glsl/lower_jumps.cpp +++ /dev/null @@ -1,1022 +0,0 @@ -/* - * Copyright © 2010 Luca Barbieri - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_jumps.cpp - * - * This pass lowers jumps (break, continue, and return) to if/else structures. - * - * It can be asked to: - * 1. Pull jumps out of ifs where possible - * 2. Remove all "continue"s, replacing them with an "execute flag" - * 3. Replace all "break" with a single conditional one at the end of the loop - * 4. Replace all "return"s with a single return at the end of the function, - * for the main function and/or other functions - * - * Applying this pass gives several benefits: - * 1. All functions can be inlined. - * 2. nv40 and other pre-DX10 chips without "continue" can be supported - * 3. nv30 and other pre-DX10 chips with no control flow at all are better - * supported - * - * Continues are lowered by adding a per-loop "execute flag", initialized to - * true, that when cleared inhibits all execution until the end of the loop. - * - * Breaks are lowered to continues, plus setting a "break flag" that is checked - * at the end of the loop, and trigger the unique "break". - * - * Returns are lowered to breaks/continues, plus adding a "return flag" that - * causes loops to break again out of their enclosing loops until all the - * loops are exited: then the "execute flag" logic will ignore everything - * until the end of the function. - * - * Note that "continue" and "return" can also be implemented by adding - * a dummy loop and using break. - * However, this is bad for hardware with limited nesting depth, and - * prevents further optimization, and thus is not currently performed. - */ - -#include "compiler/glsl_types.h" -#include -#include "ir.h" - -/** - * Enum recording the result of analyzing how control flow might exit - * an IR node. - * - * Each possible value of jump_strength indicates a strictly stronger - * guarantee on control flow than the previous value. - * - * The ordering of strengths roughly reflects the way jumps are - * lowered: jumps with higher strength tend to be lowered to jumps of - * lower strength. Accordingly, strength is used as a heuristic to - * determine which lowering to perform first. - * - * This enum is also used by get_jump_strength() to categorize - * instructions as either break, continue, return, or other. When - * used in this fashion, strength_always_clears_execute_flag is not - * used. - * - * The control flow analysis made by this optimization pass makes two - * simplifying assumptions: - * - * - It ignores discard instructions, since they are lowered by a - * separate pass (lower_discard.cpp). - * - * - It assumes it is always possible for control to flow from a loop - * to the instruction immediately following it. Technically, this - * is not true (since all execution paths through the loop might - * jump back to the top, or return from the function). - * - * Both of these simplifying assumtions are safe, since they can never - * cause reachable code to be incorrectly classified as unreachable; - * they can only do the opposite. - */ -enum jump_strength -{ - /** - * Analysis has produced no guarantee on how control flow might - * exit this IR node. It might fall out the bottom (with or - * without clearing the execute flag, if present), or it might - * continue to the top of the innermost enclosing loop, break out - * of it, or return from the function. - */ - strength_none, - - /** - * The only way control can fall out the bottom of this node is - * through a code path that clears the execute flag. It might also - * continue to the top of the innermost enclosing loop, break out - * of it, or return from the function. - */ - strength_always_clears_execute_flag, - - /** - * Control cannot fall out the bottom of this node. It might - * continue to the top of the innermost enclosing loop, break out - * of it, or return from the function. - */ - strength_continue, - - /** - * Control cannot fall out the bottom of this node, or continue the - * top of the innermost enclosing loop. It can only break out of - * it or return from the function. - */ - strength_break, - - /** - * Control cannot fall out the bottom of this node, continue to the - * top of the innermost enclosing loop, or break out of it. It can - * only return from the function. - */ - strength_return -}; - -namespace { - -struct block_record -{ - /* minimum jump strength (of lowered IR, not pre-lowering IR) - * - * If the block ends with a jump, must be the strength of the jump. - * Otherwise, the jump would be dead and have been deleted before) - * - * If the block doesn't end with a jump, it can be different than strength_none if all paths before it lead to some jump - * (e.g. an if with a return in one branch, and a break in the other, while not lowering them) - * Note that identical jumps are usually unified though. - */ - jump_strength min_strength; - - /* can anything clear the execute flag? */ - bool may_clear_execute_flag; - - block_record() - { - this->min_strength = strength_none; - this->may_clear_execute_flag = false; - } -}; - -struct loop_record -{ - ir_function_signature* signature; - ir_loop* loop; - - /* used to avoid lowering the break used to represent lowered breaks */ - unsigned nesting_depth; - bool in_if_at_the_end_of_the_loop; - - bool may_set_return_flag; - - ir_variable* break_flag; - ir_variable* execute_flag; /* cleared to emulate continue */ - - loop_record(ir_function_signature* p_signature = 0, ir_loop* p_loop = 0) - { - this->signature = p_signature; - this->loop = p_loop; - this->nesting_depth = 0; - this->in_if_at_the_end_of_the_loop = false; - this->may_set_return_flag = false; - this->break_flag = 0; - this->execute_flag = 0; - } - - ir_variable* get_execute_flag() - { - /* also supported for the "function loop" */ - if(!this->execute_flag) { - exec_list& list = this->loop ? this->loop->body_instructions : signature->body; - this->execute_flag = new(this->signature) ir_variable(glsl_type::bool_type, "execute_flag", ir_var_temporary); - list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true), 0)); - list.push_head(this->execute_flag); - } - return this->execute_flag; - } - - ir_variable* get_break_flag() - { - assert(this->loop); - if(!this->break_flag) { - this->break_flag = new(this->signature) ir_variable(glsl_type::bool_type, "break_flag", ir_var_temporary); - this->loop->insert_before(this->break_flag); - this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false), 0)); - } - return this->break_flag; - } -}; - -struct function_record -{ - ir_function_signature* signature; - ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */ - ir_variable* return_value; - bool lower_return; - unsigned nesting_depth; - - function_record(ir_function_signature* p_signature = 0, - bool lower_return = false) - { - this->signature = p_signature; - this->return_flag = 0; - this->return_value = 0; - this->nesting_depth = 0; - this->lower_return = lower_return; - } - - ir_variable* get_return_flag() - { - if(!this->return_flag) { - this->return_flag = new(this->signature) ir_variable(glsl_type::bool_type, "return_flag", ir_var_temporary); - this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false), 0)); - this->signature->body.push_head(this->return_flag); - } - return this->return_flag; - } - - ir_variable* get_return_value() - { - if(!this->return_value) { - assert(!this->signature->return_type->is_void()); - return_value = new(this->signature) ir_variable(this->signature->return_type, "return_value", ir_var_temporary); - this->signature->body.push_head(this->return_value); - } - return this->return_value; - } -}; - -struct ir_lower_jumps_visitor : public ir_control_flow_visitor { - /* Postconditions: on exit of any visit() function: - * - * ANALYSIS: this->block.min_strength, - * this->block.may_clear_execute_flag, and - * this->loop.may_set_return_flag are updated to reflect the - * characteristics of the visited statement. - * - * DEAD_CODE_ELIMINATION: If this->block.min_strength is not - * strength_none, the visited node is at the end of its exec_list. - * In other words, any unreachable statements that follow the - * visited statement in its exec_list have been removed. - * - * CONTAINED_JUMPS_LOWERED: If the visited statement contains other - * statements, then should_lower_jump() is false for all of the - * return, break, or continue statements it contains. - * - * Note that visiting a jump does not lower it. That is the - * responsibility of the statement (or function signature) that - * contains the jump. - */ - - bool progress; - - struct function_record function; - struct loop_record loop; - struct block_record block; - - bool pull_out_jumps; - bool lower_continue; - bool lower_break; - bool lower_sub_return; - bool lower_main_return; - - ir_lower_jumps_visitor() - : progress(false), - pull_out_jumps(false), - lower_continue(false), - lower_break(false), - lower_sub_return(false), - lower_main_return(false) - { - } - - void truncate_after_instruction(exec_node *ir) - { - if (!ir) - return; - - while (!ir->get_next()->is_tail_sentinel()) { - ((ir_instruction *)ir->get_next())->remove(); - this->progress = true; - } - } - - void move_outer_block_inside(ir_instruction *ir, exec_list *inner_block) - { - while (!ir->get_next()->is_tail_sentinel()) { - ir_instruction *move_ir = (ir_instruction *)ir->get_next(); - - move_ir->remove(); - inner_block->push_tail(move_ir); - } - } - - /** - * Insert the instructions necessary to lower a return statement, - * before the given return instruction. - */ - void insert_lowered_return(ir_return *ir) - { - ir_variable* return_flag = this->function.get_return_flag(); - if(!this->function.signature->return_type->is_void()) { - ir_variable* return_value = this->function.get_return_value(); - ir->insert_before( - new(ir) ir_assignment( - new (ir) ir_dereference_variable(return_value), - ir->value)); - } - ir->insert_before( - new(ir) ir_assignment( - new (ir) ir_dereference_variable(return_flag), - new (ir) ir_constant(true))); - this->loop.may_set_return_flag = true; - } - - /** - * If the given instruction is a return, lower it to instructions - * that store the return value (if there is one), set the return - * flag, and then break. - * - * It is safe to pass NULL to this function. - */ - void lower_return_unconditionally(ir_instruction *ir) - { - if (get_jump_strength(ir) != strength_return) { - return; - } - insert_lowered_return((ir_return*)ir); - ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); - } - - /** - * Create the necessary instruction to replace a break instruction. - */ - ir_instruction *create_lowered_break() - { - void *ctx = this->function.signature; - return new(ctx) ir_assignment( - new(ctx) ir_dereference_variable(this->loop.get_break_flag()), - new(ctx) ir_constant(true), - 0); - } - - /** - * If the given instruction is a break, lower it to an instruction - * that sets the break flag, without consulting - * should_lower_jump(). - * - * It is safe to pass NULL to this function. - */ - void lower_break_unconditionally(ir_instruction *ir) - { - if (get_jump_strength(ir) != strength_break) { - return; - } - ir->replace_with(create_lowered_break()); - } - - /** - * If the block ends in a conditional or unconditional break, lower - * it, even though should_lower_jump() says it needn't be lowered. - */ - void lower_final_breaks(exec_list *block) - { - ir_instruction *ir = (ir_instruction *) block->get_tail(); - lower_break_unconditionally(ir); - ir_if *ir_if = ir->as_if(); - if (ir_if) { - lower_break_unconditionally( - (ir_instruction *) ir_if->then_instructions.get_tail()); - lower_break_unconditionally( - (ir_instruction *) ir_if->else_instructions.get_tail()); - } - } - - virtual void visit(class ir_loop_jump * ir) - { - /* Eliminate all instructions after each one, since they are - * unreachable. This satisfies the DEAD_CODE_ELIMINATION - * postcondition. - */ - truncate_after_instruction(ir); - - /* Set this->block.min_strength based on this instruction. This - * satisfies the ANALYSIS postcondition. It is not necessary to - * update this->block.may_clear_execute_flag or - * this->loop.may_set_return_flag, because an unlowered jump - * instruction can't change any flags. - */ - this->block.min_strength = ir->is_break() ? strength_break : strength_continue; - - /* The CONTAINED_JUMPS_LOWERED postcondition is already - * satisfied, because jump statements can't contain other - * statements. - */ - } - - virtual void visit(class ir_return * ir) - { - /* Eliminate all instructions after each one, since they are - * unreachable. This satisfies the DEAD_CODE_ELIMINATION - * postcondition. - */ - truncate_after_instruction(ir); - - /* Set this->block.min_strength based on this instruction. This - * satisfies the ANALYSIS postcondition. It is not necessary to - * update this->block.may_clear_execute_flag or - * this->loop.may_set_return_flag, because an unlowered return - * instruction can't change any flags. - */ - this->block.min_strength = strength_return; - - /* The CONTAINED_JUMPS_LOWERED postcondition is already - * satisfied, because jump statements can't contain other - * statements. - */ - } - - virtual void visit(class ir_discard * ir) - { - /* Nothing needs to be done. The ANALYSIS and - * DEAD_CODE_ELIMINATION postconditions are already satisfied, - * because discard statements are ignored by this optimization - * pass. The CONTAINED_JUMPS_LOWERED postcondition is already - * satisfied, because discard statements can't contain other - * statements. - */ - (void) ir; - } - - enum jump_strength get_jump_strength(ir_instruction* ir) - { - if(!ir) - return strength_none; - else if(ir->ir_type == ir_type_loop_jump) { - if(((ir_loop_jump*)ir)->is_break()) - return strength_break; - else - return strength_continue; - } else if(ir->ir_type == ir_type_return) - return strength_return; - else - return strength_none; - } - - bool should_lower_jump(ir_jump* ir) - { - unsigned strength = get_jump_strength(ir); - bool lower; - switch(strength) - { - case strength_none: - lower = false; /* don't change this, code relies on it */ - break; - case strength_continue: - lower = lower_continue; - break; - case strength_break: - assert(this->loop.loop); - /* never lower "canonical break" */ - if(ir->get_next()->is_tail_sentinel() && (this->loop.nesting_depth == 0 - || (this->loop.nesting_depth == 1 && this->loop.in_if_at_the_end_of_the_loop))) - lower = false; - else - lower = lower_break; - break; - case strength_return: - /* never lower return at the end of a this->function */ - if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel()) - lower = false; - else - lower = this->function.lower_return; - break; - } - return lower; - } - - block_record visit_block(exec_list* list) - { - /* Note: since visiting a node may change that node's next - * pointer, we can't use visit_exec_list(), because - * visit_exec_list() caches the node's next pointer before - * visiting it. So we use foreach_in_list() instead. - * - * foreach_in_list() isn't safe if the node being visited gets - * removed, but fortunately this visitor doesn't do that. - */ - - block_record saved_block = this->block; - this->block = block_record(); - foreach_in_list(ir_instruction, node, list) { - node->accept(this); - } - block_record ret = this->block; - this->block = saved_block; - return ret; - } - - virtual void visit(ir_if *ir) - { - if(this->loop.nesting_depth == 0 && ir->get_next()->is_tail_sentinel()) - this->loop.in_if_at_the_end_of_the_loop = true; - - ++this->function.nesting_depth; - ++this->loop.nesting_depth; - - block_record block_records[2]; - ir_jump* jumps[2]; - - /* Recursively lower nested jumps. This satisfies the - * CONTAINED_JUMPS_LOWERED postcondition, except in the case of - * unconditional jumps at the end of ir->then_instructions and - * ir->else_instructions, which are handled below. - */ - block_records[0] = visit_block(&ir->then_instructions); - block_records[1] = visit_block(&ir->else_instructions); - -retry: /* we get here if we put code after the if inside a branch */ - - /* Determine which of ir->then_instructions and - * ir->else_instructions end with an unconditional jump. - */ - for(unsigned i = 0; i < 2; ++i) { - exec_list& list = i ? ir->else_instructions : ir->then_instructions; - jumps[i] = 0; - if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail())) - jumps[i] = (ir_jump*)list.get_tail(); - } - - /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED - * postcondition by lowering jumps in both then_instructions and - * else_instructions. - */ - for(;;) { - /* Determine the types of the jumps that terminate - * ir->then_instructions and ir->else_instructions. - */ - jump_strength jump_strengths[2]; - - for(unsigned i = 0; i < 2; ++i) { - if(jumps[i]) { - jump_strengths[i] = block_records[i].min_strength; - assert(jump_strengths[i] == get_jump_strength(jumps[i])); - } else - jump_strengths[i] = strength_none; - } - - /* If both code paths end in a jump, and the jumps are the - * same, and we are pulling out jumps, replace them with a - * single jump that comes after the if instruction. The new - * jump will be visited next, and it will be lowered if - * necessary by the loop or conditional that encloses it. - */ - if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) { - bool unify = true; - if(jump_strengths[0] == strength_continue) - ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_continue)); - else if(jump_strengths[0] == strength_break) - ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); - /* FINISHME: unify returns with identical expressions */ - else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void()) - ir->insert_after(new(ir) ir_return(NULL)); - else - unify = false; - - if(unify) { - jumps[0]->remove(); - jumps[1]->remove(); - this->progress = true; - - /* Update jumps[] to reflect the fact that the jumps - * are gone, and update block_records[] to reflect the - * fact that control can now flow to the next - * instruction. - */ - jumps[0] = 0; - jumps[1] = 0; - block_records[0].min_strength = strength_none; - block_records[1].min_strength = strength_none; - - /* The CONTAINED_JUMPS_LOWERED postcondition is now - * satisfied, so we can break out of the loop. - */ - break; - } - } - - /* lower a jump: if both need to lowered, start with the strongest one, so that - * we might later unify the lowered version with the other one - */ - bool should_lower[2]; - for(unsigned i = 0; i < 2; ++i) - should_lower[i] = should_lower_jump(jumps[i]); - - int lower; - if(should_lower[1] && should_lower[0]) - lower = jump_strengths[1] > jump_strengths[0]; - else if(should_lower[0]) - lower = 0; - else if(should_lower[1]) - lower = 1; - else - /* Neither code path ends in a jump that needs to be - * lowered, so the CONTAINED_JUMPS_LOWERED postcondition - * is satisfied and we can break out of the loop. - */ - break; - - if(jump_strengths[lower] == strength_return) { - /* To lower a return, we create a return flag (if the - * function doesn't have one already) and add instructions - * that: 1. store the return value (if this function has a - * non-void return) and 2. set the return flag - */ - insert_lowered_return((ir_return*)jumps[lower]); - if(this->loop.loop) { - /* If we are in a loop, replace the return instruction - * with a break instruction, and then loop so that the - * break instruction can be lowered if necessary. - */ - ir_loop_jump* lowered = 0; - lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break); - /* Note: we must update block_records and jumps to - * reflect the fact that the control path has been - * altered from a return to a break. - */ - block_records[lower].min_strength = strength_break; - jumps[lower]->replace_with(lowered); - jumps[lower] = lowered; - } else { - /* If we are not in a loop, we then proceed as we would - * for a continue statement (set the execute flag to - * false to prevent the rest of the function from - * executing). - */ - goto lower_continue; - } - this->progress = true; - } else if(jump_strengths[lower] == strength_break) { - /* To lower a break, we create a break flag (if the loop - * doesn't have one already) and add an instruction that - * sets it. - * - * Then we proceed as we would for a continue statement - * (set the execute flag to false to prevent the rest of - * the loop body from executing). - * - * The visit() function for the loop will ensure that the - * break flag is checked after executing the loop body. - */ - jumps[lower]->insert_before(create_lowered_break()); - goto lower_continue; - } else if(jump_strengths[lower] == strength_continue) { -lower_continue: - /* To lower a continue, we create an execute flag (if the - * loop doesn't have one already) and replace the continue - * with an instruction that clears it. - * - * Note that this code path gets exercised when lowering - * return statements that are not inside a loop, so - * this->loop must be initialized even outside of loops. - */ - ir_variable* execute_flag = this->loop.get_execute_flag(); - jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0)); - /* Note: we must update block_records and jumps to reflect - * the fact that the control path has been altered to an - * instruction that clears the execute flag. - */ - jumps[lower] = 0; - block_records[lower].min_strength = strength_always_clears_execute_flag; - block_records[lower].may_clear_execute_flag = true; - this->progress = true; - - /* Let the loop run again, in case the other branch of the - * if needs to be lowered too. - */ - } - } - - /* move out a jump out if possible */ - if(pull_out_jumps) { - /* If one of the branches ends in a jump, and control cannot - * fall out the bottom of the other branch, then we can move - * the jump after the if. - * - * Set move_out to the branch we are moving a jump out of. - */ - int move_out = -1; - if(jumps[0] && block_records[1].min_strength >= strength_continue) - move_out = 0; - else if(jumps[1] && block_records[0].min_strength >= strength_continue) - move_out = 1; - - if(move_out >= 0) - { - jumps[move_out]->remove(); - ir->insert_after(jumps[move_out]); - /* Note: we must update block_records and jumps to reflect - * the fact that the jump has been moved out of the if. - */ - jumps[move_out] = 0; - block_records[move_out].min_strength = strength_none; - this->progress = true; - } - } - - /* Now satisfy the ANALYSIS postcondition by setting - * this->block.min_strength and - * this->block.may_clear_execute_flag based on the - * characteristics of the two branches. - */ - if(block_records[0].min_strength < block_records[1].min_strength) - this->block.min_strength = block_records[0].min_strength; - else - this->block.min_strength = block_records[1].min_strength; - this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag; - - /* Now we need to clean up the instructions that follow the - * if. - * - * If those instructions are unreachable, then satisfy the - * DEAD_CODE_ELIMINATION postcondition by eliminating them. - * Otherwise that postcondition is already satisfied. - */ - if(this->block.min_strength) - truncate_after_instruction(ir); - else if(this->block.may_clear_execute_flag) - { - /* If the "if" instruction might clear the execute flag, then - * we need to guard any instructions that follow so that they - * are only executed if the execute flag is set. - * - * If one of the branches of the "if" always clears the - * execute flag, and the other branch never clears it, then - * this is easy: just move all the instructions following the - * "if" into the branch that never clears it. - */ - int move_into = -1; - if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag) - move_into = 1; - else if(block_records[1].min_strength && !block_records[0].may_clear_execute_flag) - move_into = 0; - - if(move_into >= 0) { - assert(!block_records[move_into].min_strength && !block_records[move_into].may_clear_execute_flag); /* otherwise, we just truncated */ - - exec_list* list = move_into ? &ir->else_instructions : &ir->then_instructions; - exec_node* next = ir->get_next(); - if(!next->is_tail_sentinel()) { - move_outer_block_inside(ir, list); - - /* If any instructions moved, then we need to visit - * them (since they are now inside the "if"). Since - * block_records[move_into] is in its default state - * (see assertion above), we can safely replace - * block_records[move_into] with the result of this - * analysis. - */ - exec_list list; - list.head = next; - block_records[move_into] = visit_block(&list); - - /* - * Then we need to re-start our jump lowering, since one - * of the instructions we moved might be a jump that - * needs to be lowered. - */ - this->progress = true; - goto retry; - } - } else { - /* If we get here, then the simple case didn't apply; we - * need to actually guard the instructions that follow. - * - * To avoid creating unnecessarily-deep nesting, first - * look through the instructions that follow and unwrap - * any instructions that that are already wrapped in the - * appropriate guard. - */ - ir_instruction* ir_after; - for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();) - { - ir_if* ir_if = ir_after->as_if(); - if(ir_if && ir_if->else_instructions.is_empty()) { - ir_dereference_variable* ir_if_cond_deref = ir_if->condition->as_dereference_variable(); - if(ir_if_cond_deref && ir_if_cond_deref->var == this->loop.execute_flag) { - ir_instruction* ir_next = (ir_instruction*)ir_after->get_next(); - ir_after->insert_before(&ir_if->then_instructions); - ir_after->remove(); - ir_after = ir_next; - continue; - } - } - ir_after = (ir_instruction*)ir_after->get_next(); - - /* only set this if we find any unprotected instruction */ - this->progress = true; - } - - /* Then, wrap all the instructions that follow in a single - * guard. - */ - if(!ir->get_next()->is_tail_sentinel()) { - assert(this->loop.execute_flag); - ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag)); - move_outer_block_inside(ir, &if_execute->then_instructions); - ir->insert_after(if_execute); - } - } - } - --this->loop.nesting_depth; - --this->function.nesting_depth; - } - - virtual void visit(ir_loop *ir) - { - /* Visit the body of the loop, with a fresh data structure in - * this->loop so that the analysis we do here won't bleed into - * enclosing loops. - * - * We assume that all code after a loop is reachable from the - * loop (see comments on enum jump_strength), so the - * DEAD_CODE_ELIMINATION postcondition is automatically - * satisfied, as is the block.min_strength portion of the - * ANALYSIS postcondition. - * - * The block.may_clear_execute_flag portion of the ANALYSIS - * postcondition is automatically satisfied because execute - * flags do not propagate outside of loops. - * - * The loop.may_set_return_flag portion of the ANALYSIS - * postcondition is handled below. - */ - ++this->function.nesting_depth; - loop_record saved_loop = this->loop; - this->loop = loop_record(this->function.signature, ir); - - /* Recursively lower nested jumps. This satisfies the - * CONTAINED_JUMPS_LOWERED postcondition, except in the case of - * an unconditional continue or return at the bottom of the - * loop, which are handled below. - */ - block_record body = visit_block(&ir->body_instructions); - - /* If the loop ends in an unconditional continue, eliminate it - * because it is redundant. - */ - ir_instruction *ir_last - = (ir_instruction *) ir->body_instructions.get_tail(); - if (get_jump_strength(ir_last) == strength_continue) { - ir_last->remove(); - } - - /* If the loop ends in an unconditional return, and we are - * lowering returns, lower it. - */ - if (this->function.lower_return) - lower_return_unconditionally(ir_last); - - if(body.min_strength >= strength_break) { - /* FINISHME: If the min_strength of the loop body is - * strength_break or strength_return, that means that it - * isn't a loop at all, since control flow always leaves the - * body of the loop via break or return. In principle the - * loop could be eliminated in this case. This optimization - * is not implemented yet. - */ - } - - if(this->loop.break_flag) { - /* We only get here if we are lowering breaks */ - assert (lower_break); - - /* If a break flag was generated while visiting the body of - * the loop, then at least one break was lowered, so we need - * to generate an if statement at the end of the loop that - * does a "break" if the break flag is set. The break we - * generate won't violate the CONTAINED_JUMPS_LOWERED - * postcondition, because should_lower_jump() always returns - * false for a break that happens at the end of a loop. - * - * However, if the loop already ends in a conditional or - * unconditional break, then we need to lower that break, - * because it won't be at the end of the loop anymore. - */ - lower_final_breaks(&ir->body_instructions); - - ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag)); - break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); - ir->body_instructions.push_tail(break_if); - } - - /* If the body of the loop may set the return flag, then at - * least one return was lowered to a break, so we need to ensure - * that the return flag is checked after the body of the loop is - * executed. - */ - if(this->loop.may_set_return_flag) { - assert(this->function.return_flag); - /* Generate the if statement to check the return flag */ - ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag)); - /* Note: we also need to propagate the knowledge that the - * return flag may get set to the outer context. This - * satisfies the loop.may_set_return_flag part of the - * ANALYSIS postcondition. - */ - saved_loop.may_set_return_flag = true; - if(saved_loop.loop) - /* If this loop is nested inside another one, then the if - * statement that we generated should break out of that - * loop if the return flag is set. Caller will lower that - * break statement if necessary. - */ - return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break)); - else - /* Otherwise, all we need to do is ensure that the - * instructions that follow are only executed if the - * return flag is clear. We can do that by moving those - * instructions into the else clause of the generated if - * statement. - */ - move_outer_block_inside(ir, &return_if->else_instructions); - ir->insert_after(return_if); - } - - this->loop = saved_loop; - --this->function.nesting_depth; - } - - virtual void visit(ir_function_signature *ir) - { - /* these are not strictly necessary */ - assert(!this->function.signature); - assert(!this->loop.loop); - - bool lower_return; - if (strcmp(ir->function_name(), "main") == 0) - lower_return = lower_main_return; - else - lower_return = lower_sub_return; - - function_record saved_function = this->function; - loop_record saved_loop = this->loop; - this->function = function_record(ir, lower_return); - this->loop = loop_record(ir); - - assert(!this->loop.loop); - - /* Visit the body of the function to lower any jumps that occur - * in it, except possibly an unconditional return statement at - * the end of it. - */ - visit_block(&ir->body); - - /* If the body ended in an unconditional return of non-void, - * then we don't need to lower it because it's the one canonical - * return. - * - * If the body ended in a return of void, eliminate it because - * it is redundant. - */ - if (ir->return_type->is_void() && - get_jump_strength((ir_instruction *) ir->body.get_tail())) { - ir_jump *jump = (ir_jump *) ir->body.get_tail(); - assert (jump->ir_type == ir_type_return); - jump->remove(); - } - - if(this->function.return_value) - ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value))); - - this->loop = saved_loop; - this->function = saved_function; - } - - virtual void visit(class ir_function * ir) - { - visit_block(&ir->signatures); - } -}; - -} /* anonymous namespace */ - -bool -do_lower_jumps(exec_list *instructions, bool pull_out_jumps, bool lower_sub_return, bool lower_main_return, bool lower_continue, bool lower_break) -{ - ir_lower_jumps_visitor v; - v.pull_out_jumps = pull_out_jumps; - v.lower_continue = lower_continue; - v.lower_break = lower_break; - v.lower_sub_return = lower_sub_return; - v.lower_main_return = lower_main_return; - - bool progress_ever = false; - do { - v.progress = false; - visit_exec_list(instructions, &v); - progress_ever = v.progress || progress_ever; - } while (v.progress); - - return progress_ever; -} diff --git a/src/glsl/lower_mat_op_to_vec.cpp b/src/glsl/lower_mat_op_to_vec.cpp deleted file mode 100644 index 266fdc6a250..00000000000 --- a/src/glsl/lower_mat_op_to_vec.cpp +++ /dev/null @@ -1,438 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_mat_op_to_vec.cpp - * - * Breaks matrix operation expressions down to a series of vector operations. - * - * Generally this is how we have to codegen matrix operations for a - * GPU, so this gives us the chance to constant fold operations on a - * column or row. - */ - -#include "ir.h" -#include "ir_expression_flattening.h" -#include "compiler/glsl_types.h" - -namespace { - -class ir_mat_op_to_vec_visitor : public ir_hierarchical_visitor { -public: - ir_mat_op_to_vec_visitor() - { - this->made_progress = false; - this->mem_ctx = NULL; - } - - ir_visitor_status visit_leave(ir_assignment *); - - ir_dereference *get_column(ir_dereference *val, int col); - ir_rvalue *get_element(ir_dereference *val, int col, int row); - - void do_mul_mat_mat(ir_dereference *result, - ir_dereference *a, ir_dereference *b); - void do_mul_mat_vec(ir_dereference *result, - ir_dereference *a, ir_dereference *b); - void do_mul_vec_mat(ir_dereference *result, - ir_dereference *a, ir_dereference *b); - void do_mul_mat_scalar(ir_dereference *result, - ir_dereference *a, ir_dereference *b); - void do_equal_mat_mat(ir_dereference *result, ir_dereference *a, - ir_dereference *b, bool test_equal); - - void *mem_ctx; - bool made_progress; -}; - -} /* anonymous namespace */ - -static bool -mat_op_to_vec_predicate(ir_instruction *ir) -{ - ir_expression *expr = ir->as_expression(); - unsigned int i; - - if (!expr) - return false; - - for (i = 0; i < expr->get_num_operands(); i++) { - if (expr->operands[i]->type->is_matrix()) - return true; - } - - return false; -} - -bool -do_mat_op_to_vec(exec_list *instructions) -{ - ir_mat_op_to_vec_visitor v; - - /* Pull out any matrix expression to a separate assignment to a - * temp. This will make our handling of the breakdown to - * operations on the matrix's vector components much easier. - */ - do_expression_flattening(instructions, mat_op_to_vec_predicate); - - visit_list_elements(&v, instructions); - - return v.made_progress; -} - -ir_rvalue * -ir_mat_op_to_vec_visitor::get_element(ir_dereference *val, int col, int row) -{ - val = get_column(val, col); - - return new(mem_ctx) ir_swizzle(val, row, 0, 0, 0, 1); -} - -ir_dereference * -ir_mat_op_to_vec_visitor::get_column(ir_dereference *val, int row) -{ - val = val->clone(mem_ctx, NULL); - - if (val->type->is_matrix()) { - val = new(mem_ctx) ir_dereference_array(val, - new(mem_ctx) ir_constant(row)); - } - - return val; -} - -void -ir_mat_op_to_vec_visitor::do_mul_mat_mat(ir_dereference *result, - ir_dereference *a, - ir_dereference *b) -{ - unsigned b_col, i; - ir_assignment *assign; - ir_expression *expr; - - for (b_col = 0; b_col < b->type->matrix_columns; b_col++) { - /* first column */ - expr = new(mem_ctx) ir_expression(ir_binop_mul, - get_column(a, 0), - get_element(b, b_col, 0)); - - /* following columns */ - for (i = 1; i < a->type->matrix_columns; i++) { - ir_expression *mul_expr; - - mul_expr = new(mem_ctx) ir_expression(ir_binop_mul, - get_column(a, i), - get_element(b, b_col, i)); - expr = new(mem_ctx) ir_expression(ir_binop_add, - expr, - mul_expr); - } - - assign = new(mem_ctx) ir_assignment(get_column(result, b_col), expr); - base_ir->insert_before(assign); - } -} - -void -ir_mat_op_to_vec_visitor::do_mul_mat_vec(ir_dereference *result, - ir_dereference *a, - ir_dereference *b) -{ - unsigned i; - ir_assignment *assign; - ir_expression *expr; - - /* first column */ - expr = new(mem_ctx) ir_expression(ir_binop_mul, - get_column(a, 0), - get_element(b, 0, 0)); - - /* following columns */ - for (i = 1; i < a->type->matrix_columns; i++) { - ir_expression *mul_expr; - - mul_expr = new(mem_ctx) ir_expression(ir_binop_mul, - get_column(a, i), - get_element(b, 0, i)); - expr = new(mem_ctx) ir_expression(ir_binop_add, expr, mul_expr); - } - - result = result->clone(mem_ctx, NULL); - assign = new(mem_ctx) ir_assignment(result, expr); - base_ir->insert_before(assign); -} - -void -ir_mat_op_to_vec_visitor::do_mul_vec_mat(ir_dereference *result, - ir_dereference *a, - ir_dereference *b) -{ - unsigned i; - - for (i = 0; i < b->type->matrix_columns; i++) { - ir_rvalue *column_result; - ir_expression *column_expr; - ir_assignment *column_assign; - - column_result = result->clone(mem_ctx, NULL); - column_result = new(mem_ctx) ir_swizzle(column_result, i, 0, 0, 0, 1); - - column_expr = new(mem_ctx) ir_expression(ir_binop_dot, - a->clone(mem_ctx, NULL), - get_column(b, i)); - - column_assign = new(mem_ctx) ir_assignment(column_result, - column_expr); - base_ir->insert_before(column_assign); - } -} - -void -ir_mat_op_to_vec_visitor::do_mul_mat_scalar(ir_dereference *result, - ir_dereference *a, - ir_dereference *b) -{ - unsigned i; - - for (i = 0; i < a->type->matrix_columns; i++) { - ir_expression *column_expr; - ir_assignment *column_assign; - - column_expr = new(mem_ctx) ir_expression(ir_binop_mul, - get_column(a, i), - b->clone(mem_ctx, NULL)); - - column_assign = new(mem_ctx) ir_assignment(get_column(result, i), - column_expr); - base_ir->insert_before(column_assign); - } -} - -void -ir_mat_op_to_vec_visitor::do_equal_mat_mat(ir_dereference *result, - ir_dereference *a, - ir_dereference *b, - bool test_equal) -{ - /* This essentially implements the following GLSL: - * - * bool equal(mat4 a, mat4 b) - * { - * return !any(bvec4(a[0] != b[0], - * a[1] != b[1], - * a[2] != b[2], - * a[3] != b[3]); - * } - * - * bool nequal(mat4 a, mat4 b) - * { - * return any(bvec4(a[0] != b[0], - * a[1] != b[1], - * a[2] != b[2], - * a[3] != b[3]); - * } - */ - const unsigned columns = a->type->matrix_columns; - const glsl_type *const bvec_type = - glsl_type::get_instance(GLSL_TYPE_BOOL, columns, 1); - - ir_variable *const tmp_bvec = - new(this->mem_ctx) ir_variable(bvec_type, "mat_cmp_bvec", - ir_var_temporary); - this->base_ir->insert_before(tmp_bvec); - - for (unsigned i = 0; i < columns; i++) { - ir_expression *const cmp = - new(this->mem_ctx) ir_expression(ir_binop_any_nequal, - get_column(a, i), - get_column(b, i)); - - ir_dereference *const lhs = - new(this->mem_ctx) ir_dereference_variable(tmp_bvec); - - ir_assignment *const assign = - new(this->mem_ctx) ir_assignment(lhs, cmp, NULL, (1U << i)); - - this->base_ir->insert_before(assign); - } - - ir_rvalue *const val = new(this->mem_ctx) ir_dereference_variable(tmp_bvec); - uint8_t vec_elems = val->type->vector_elements; - ir_expression *any = - new(this->mem_ctx) ir_expression(ir_binop_any_nequal, val, - new(this->mem_ctx) ir_constant(false, - vec_elems)); - - if (test_equal) - any = new(this->mem_ctx) ir_expression(ir_unop_logic_not, any); - - ir_assignment *const assign = - new(mem_ctx) ir_assignment(result->clone(mem_ctx, NULL), any); - base_ir->insert_before(assign); -} - -static bool -has_matrix_operand(const ir_expression *expr, unsigned &columns) -{ - for (unsigned i = 0; i < expr->get_num_operands(); i++) { - if (expr->operands[i]->type->is_matrix()) { - columns = expr->operands[i]->type->matrix_columns; - return true; - } - } - - return false; -} - - -ir_visitor_status -ir_mat_op_to_vec_visitor::visit_leave(ir_assignment *orig_assign) -{ - ir_expression *orig_expr = orig_assign->rhs->as_expression(); - unsigned int i, matrix_columns = 1; - ir_dereference *op[2]; - - if (!orig_expr) - return visit_continue; - - if (!has_matrix_operand(orig_expr, matrix_columns)) - return visit_continue; - - assert(orig_expr->get_num_operands() <= 2); - - mem_ctx = ralloc_parent(orig_assign); - - ir_dereference_variable *result = - orig_assign->lhs->as_dereference_variable(); - assert(result); - - /* Store the expression operands in temps so we can use them - * multiple times. - */ - for (i = 0; i < orig_expr->get_num_operands(); i++) { - ir_assignment *assign; - ir_dereference *deref = orig_expr->operands[i]->as_dereference(); - - /* Avoid making a temporary if we don't need to to avoid aliasing. */ - if (deref && - deref->variable_referenced() != result->variable_referenced()) { - op[i] = deref; - continue; - } - - /* Otherwise, store the operand in a temporary generally if it's - * not a dereference. - */ - ir_variable *var = new(mem_ctx) ir_variable(orig_expr->operands[i]->type, - "mat_op_to_vec", - ir_var_temporary); - base_ir->insert_before(var); - - /* Note that we use this dereference for the assignment. That means - * that others that want to use op[i] have to clone the deref. - */ - op[i] = new(mem_ctx) ir_dereference_variable(var); - assign = new(mem_ctx) ir_assignment(op[i], orig_expr->operands[i]); - base_ir->insert_before(assign); - } - - /* OK, time to break down this matrix operation. */ - switch (orig_expr->operation) { - case ir_unop_d2f: - case ir_unop_f2d: - case ir_unop_neg: { - /* Apply the operation to each column.*/ - for (i = 0; i < matrix_columns; i++) { - ir_expression *column_expr; - ir_assignment *column_assign; - - column_expr = new(mem_ctx) ir_expression(orig_expr->operation, - get_column(op[0], i)); - - column_assign = new(mem_ctx) ir_assignment(get_column(result, i), - column_expr); - assert(column_assign->write_mask != 0); - base_ir->insert_before(column_assign); - } - break; - } - case ir_binop_add: - case ir_binop_sub: - case ir_binop_div: - case ir_binop_mod: { - /* For most operations, the matrix version is just going - * column-wise through and applying the operation to each column - * if available. - */ - for (i = 0; i < matrix_columns; i++) { - ir_expression *column_expr; - ir_assignment *column_assign; - - column_expr = new(mem_ctx) ir_expression(orig_expr->operation, - get_column(op[0], i), - get_column(op[1], i)); - - column_assign = new(mem_ctx) ir_assignment(get_column(result, i), - column_expr); - assert(column_assign->write_mask != 0); - base_ir->insert_before(column_assign); - } - break; - } - case ir_binop_mul: - if (op[0]->type->is_matrix()) { - if (op[1]->type->is_matrix()) { - do_mul_mat_mat(result, op[0], op[1]); - } else if (op[1]->type->is_vector()) { - do_mul_mat_vec(result, op[0], op[1]); - } else { - assert(op[1]->type->is_scalar()); - do_mul_mat_scalar(result, op[0], op[1]); - } - } else { - assert(op[1]->type->is_matrix()); - if (op[0]->type->is_vector()) { - do_mul_vec_mat(result, op[0], op[1]); - } else { - assert(op[0]->type->is_scalar()); - do_mul_mat_scalar(result, op[1], op[0]); - } - } - break; - - case ir_binop_all_equal: - case ir_binop_any_nequal: - do_equal_mat_mat(result, op[1], op[0], - (orig_expr->operation == ir_binop_all_equal)); - break; - - default: - printf("FINISHME: Handle matrix operation for %s\n", - orig_expr->operator_string()); - abort(); - } - orig_assign->remove(); - this->made_progress = true; - - return visit_continue; -} diff --git a/src/glsl/lower_named_interface_blocks.cpp b/src/glsl/lower_named_interface_blocks.cpp deleted file mode 100644 index f29eba4f75f..00000000000 --- a/src/glsl/lower_named_interface_blocks.cpp +++ /dev/null @@ -1,280 +0,0 @@ -/* - * Copyright (c) 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_named_interface_blocks.cpp - * - * This lowering pass converts all interface blocks with instance names - * into interface blocks without an instance name. - * - * For example, the following shader: - * - * out block { - * float block_var; - * } inst_name; - * - * main() - * { - * inst_name.block_var = 0.0; - * } - * - * Is rewritten to: - * - * out block { - * float block_var; - * }; - * - * main() - * { - * block_var = 0.0; - * } - * - * This takes place after the shader code has already been verified with - * the interface name in place. - * - * The linking phase will use the interface block name rather than the - * interface's instance name when linking interfaces. - * - * This modification to the ir allows our currently existing dead code - * elimination to work with interface blocks without changes. - */ - -#include "glsl_symbol_table.h" -#include "ir.h" -#include "ir_optimization.h" -#include "ir_rvalue_visitor.h" -#include "program/hash_table.h" - -static const glsl_type * -process_array_type(const glsl_type *type, unsigned idx) -{ - const glsl_type *element_type = type->fields.array; - if (element_type->is_array()) { - const glsl_type *new_array_type = process_array_type(element_type, idx); - return glsl_type::get_array_instance(new_array_type, type->length); - } else { - return glsl_type::get_array_instance( - element_type->fields.structure[idx].type, type->length); - } -} - -static ir_rvalue * -process_array_ir(void * const mem_ctx, - ir_dereference_array *deref_array_prev, - ir_rvalue *deref_var) -{ - ir_dereference_array *deref_array = - deref_array_prev->array->as_dereference_array(); - - if (deref_array == NULL) { - return new(mem_ctx) ir_dereference_array(deref_var, - deref_array_prev->array_index); - } else { - deref_array = (ir_dereference_array *) process_array_ir(mem_ctx, - deref_array, - deref_var); - return new(mem_ctx) ir_dereference_array(deref_array, - deref_array_prev->array_index); - } -} - -namespace { - -class flatten_named_interface_blocks_declarations : public ir_rvalue_visitor -{ -public: - void * const mem_ctx; - hash_table *interface_namespace; - - flatten_named_interface_blocks_declarations(void *mem_ctx) - : mem_ctx(mem_ctx), - interface_namespace(NULL) - { - } - - void run(exec_list *instructions); - - virtual ir_visitor_status visit_leave(ir_assignment *); - virtual void handle_rvalue(ir_rvalue **rvalue); -}; - -} /* anonymous namespace */ - -void -flatten_named_interface_blocks_declarations::run(exec_list *instructions) -{ - interface_namespace = hash_table_ctor(0, hash_table_string_hash, - hash_table_string_compare); - - /* First pass: adjust instance block variables with an instance name - * to not have an instance name. - * - * The interface block variables are stored in the interface_namespace - * hash table so they can be used in the second pass. - */ - foreach_in_list_safe(ir_instruction, node, instructions) { - ir_variable *var = node->as_variable(); - if (!var || !var->is_interface_instance()) - continue; - - /* It should be possible to handle uniforms during this pass, - * but, this will require changes to the other uniform block - * support code. - */ - if (var->data.mode == ir_var_uniform || - var->data.mode == ir_var_shader_storage) - continue; - - const glsl_type * iface_t = var->type->without_array(); - exec_node *insert_pos = var; - - assert (iface_t->is_interface()); - - for (unsigned i = 0; i < iface_t->length; i++) { - const char * field_name = iface_t->fields.structure[i].name; - char *iface_field_name = - ralloc_asprintf(mem_ctx, "%s %s.%s.%s", - var->data.mode == ir_var_shader_in ? "in" : "out", - iface_t->name, var->name, field_name); - - ir_variable *found_var = - (ir_variable *) hash_table_find(interface_namespace, - iface_field_name); - if (!found_var) { - ir_variable *new_var; - char *var_name = - ralloc_strdup(mem_ctx, iface_t->fields.structure[i].name); - if (!var->type->is_array()) { - new_var = - new(mem_ctx) ir_variable(iface_t->fields.structure[i].type, - var_name, - (ir_variable_mode) var->data.mode); - new_var->data.from_named_ifc_block_nonarray = 1; - } else { - const glsl_type *new_array_type = - process_array_type(var->type, i); - new_var = - new(mem_ctx) ir_variable(new_array_type, - var_name, - (ir_variable_mode) var->data.mode); - new_var->data.from_named_ifc_block_array = 1; - } - new_var->data.location = iface_t->fields.structure[i].location; - new_var->data.explicit_location = (new_var->data.location >= 0); - new_var->data.interpolation = - iface_t->fields.structure[i].interpolation; - new_var->data.centroid = iface_t->fields.structure[i].centroid; - new_var->data.sample = iface_t->fields.structure[i].sample; - new_var->data.patch = iface_t->fields.structure[i].patch; - new_var->data.stream = var->data.stream; - new_var->data.how_declared = var->data.how_declared; - - new_var->init_interface_type(iface_t); - hash_table_insert(interface_namespace, new_var, - iface_field_name); - insert_pos->insert_after(new_var); - insert_pos = new_var; - } - } - var->remove(); - } - - /* Second pass: visit all ir_dereference_record instances, and if they - * reference an interface block, then flatten the refererence out. - */ - visit_list_elements(this, instructions); - hash_table_dtor(interface_namespace); - interface_namespace = NULL; -} - -ir_visitor_status -flatten_named_interface_blocks_declarations::visit_leave(ir_assignment *ir) -{ - ir_dereference_record *lhs_rec = ir->lhs->as_dereference_record(); - if (lhs_rec) { - ir_rvalue *lhs_rec_tmp = lhs_rec; - handle_rvalue(&lhs_rec_tmp); - if (lhs_rec_tmp != lhs_rec) { - ir->set_lhs(lhs_rec_tmp); - } - } - return rvalue_visit(ir); -} - -void -flatten_named_interface_blocks_declarations::handle_rvalue(ir_rvalue **rvalue) -{ - if (*rvalue == NULL) - return; - - ir_dereference_record *ir = (*rvalue)->as_dereference_record(); - if (ir == NULL) - return; - - ir_variable *var = ir->variable_referenced(); - if (var == NULL) - return; - - if (!var->is_interface_instance()) - return; - - /* It should be possible to handle uniforms during this pass, - * but, this will require changes to the other uniform block - * support code. - */ - if (var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage) - return; - - if (var->get_interface_type() != NULL) { - char *iface_field_name = - ralloc_asprintf(mem_ctx, "%s %s.%s.%s", - var->data.mode == ir_var_shader_in ? "in" : "out", - var->get_interface_type()->name, - var->name, ir->field); - /* Find the variable in the set of flattened interface blocks */ - ir_variable *found_var = - (ir_variable *) hash_table_find(interface_namespace, - iface_field_name); - assert(found_var); - - ir_dereference_variable *deref_var = - new(mem_ctx) ir_dereference_variable(found_var); - - ir_dereference_array *deref_array = - ir->record->as_dereference_array(); - if (deref_array != NULL) { - *rvalue = process_array_ir(mem_ctx, deref_array, - (ir_rvalue *)deref_var); - } else { - *rvalue = deref_var; - } - } -} - -void -lower_named_interface_blocks(void *mem_ctx, gl_shader *shader) -{ - flatten_named_interface_blocks_declarations v_decl(mem_ctx); - v_decl.run(shader->ir); -} - diff --git a/src/glsl/lower_noise.cpp b/src/glsl/lower_noise.cpp deleted file mode 100644 index 85f59b675e0..00000000000 --- a/src/glsl/lower_noise.cpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_noise.cpp - * IR lower pass to remove noise opcodes. - * - * \author Ian Romanick - */ - -#include "ir.h" -#include "ir_rvalue_visitor.h" - -class lower_noise_visitor : public ir_rvalue_visitor { -public: - lower_noise_visitor() : progress(false) - { - /* empty */ - } - - void handle_rvalue(ir_rvalue **rvalue) - { - if (!*rvalue) - return; - - ir_expression *expr = (*rvalue)->as_expression(); - if (!expr) - return; - - /* In the future, ir_unop_noise may be replaced by a call to a function - * that implements noise. No hardware has a noise instruction. - */ - if (expr->operation == ir_unop_noise) { - *rvalue = ir_constant::zero(ralloc_parent(expr), expr->type); - this->progress = true; - } - } - - bool progress; -}; - - -bool -lower_noise(exec_list *instructions) -{ - lower_noise_visitor v; - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/lower_offset_array.cpp b/src/glsl/lower_offset_array.cpp deleted file mode 100644 index 96486c3a711..00000000000 --- a/src/glsl/lower_offset_array.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_offset_array.cpp - * - * IR lower pass to decompose ir_texture ir_tg4 with an array of offsets - * into four ir_tg4s with a single ivec2 offset, select the .w component of each, - * and return those four values packed into a gvec4. - * - * \author Chris Forbes - */ - -#include "compiler/glsl_types.h" -#include "ir.h" -#include "ir_builder.h" -#include "ir_optimization.h" -#include "ir_rvalue_visitor.h" - -using namespace ir_builder; - -class lower_offset_array_visitor : public ir_rvalue_visitor { -public: - lower_offset_array_visitor() - { - progress = false; - } - - void handle_rvalue(ir_rvalue **rv); - - bool progress; -}; - -void -lower_offset_array_visitor::handle_rvalue(ir_rvalue **rv) -{ - if (*rv == NULL || (*rv)->ir_type != ir_type_texture) - return; - - ir_texture *ir = (ir_texture *) *rv; - if (ir->op != ir_tg4 || !ir->offset || !ir->offset->type->is_array()) - return; - - void *mem_ctx = ralloc_parent(ir); - - ir_variable *var = - new (mem_ctx) ir_variable(ir->type, "result", ir_var_temporary); - base_ir->insert_before(var); - - for (int i = 0; i < 4; i++) { - ir_texture *tex = ir->clone(mem_ctx, NULL); - tex->offset = new (mem_ctx) ir_dereference_array(tex->offset, - new (mem_ctx) ir_constant(i)); - - base_ir->insert_before(assign(var, swizzle_w(tex), 1 << i)); - } - - *rv = new (mem_ctx) ir_dereference_variable(var); - - progress = true; -} - -bool -lower_offset_arrays(exec_list *instructions) -{ - lower_offset_array_visitor v; - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/lower_output_reads.cpp b/src/glsl/lower_output_reads.cpp deleted file mode 100644 index 79488df2932..00000000000 --- a/src/glsl/lower_output_reads.cpp +++ /dev/null @@ -1,178 +0,0 @@ -/* - * Copyright © 2012 Vincent Lejeune - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir.h" -#include "program/hash_table.h" - -/** - * \file lower_output_reads.cpp - * - * In GLSL, shader output variables (such as varyings) can be both read and - * written. However, on some hardware, reading an output register causes - * trouble. - * - * This pass creates temporary shadow copies of every (used) shader output, - * and replaces all accesses to use those instead. It also adds code to the - * main() function to copy the final values to the actual shader outputs. - */ - -namespace { - -class output_read_remover : public ir_hierarchical_visitor { -protected: - /** - * A hash table mapping from the original ir_variable shader outputs - * (ir_var_shader_out mode) to the new temporaries to be used instead. - */ - hash_table *replacements; - - void *mem_ctx; - - unsigned stage; -public: - output_read_remover(unsigned stage); - ~output_read_remover(); - virtual ir_visitor_status visit(class ir_dereference_variable *); - virtual ir_visitor_status visit_leave(class ir_emit_vertex *); - virtual ir_visitor_status visit_leave(class ir_return *); - virtual ir_visitor_status visit_leave(class ir_function_signature *); -}; - -} /* anonymous namespace */ - -/** - * Hash function for the output variables - computes the hash of the name. - * NOTE: We're using the name string to ensure that the hash doesn't depend - * on any random factors, otherwise the output_read_remover could produce - * the random order of the assignments. - * - * NOTE: If you want to reuse this function please take into account that - * generally the names of the variables are non-unique. - */ -static unsigned -hash_table_var_hash(const void *key) -{ - const ir_variable * var = static_cast(key); - return hash_table_string_hash(var->name); -} - -output_read_remover::output_read_remover(unsigned stage) -{ - this->stage = stage; - mem_ctx = ralloc_context(NULL); - replacements = - hash_table_ctor(0, hash_table_var_hash, hash_table_pointer_compare); -} - -output_read_remover::~output_read_remover() -{ - hash_table_dtor(replacements); - ralloc_free(mem_ctx); -} - -ir_visitor_status -output_read_remover::visit(ir_dereference_variable *ir) -{ - if (ir->var->data.mode != ir_var_shader_out) - return visit_continue; - if (stage == MESA_SHADER_TESS_CTRL) - return visit_continue; - - ir_variable *temp = (ir_variable *) hash_table_find(replacements, ir->var); - - /* If we don't have an existing temporary, create one. */ - if (temp == NULL) { - void *var_ctx = ralloc_parent(ir->var); - temp = new(var_ctx) ir_variable(ir->var->type, ir->var->name, - ir_var_temporary); - hash_table_insert(replacements, temp, ir->var); - ir->var->insert_after(temp); - } - - /* Update the dereference to use the temporary */ - ir->var = temp; - - return visit_continue; -} - -/** - * Create an assignment to copy a temporary value back to the actual output. - */ -static ir_assignment * -copy(void *ctx, ir_variable *output, ir_variable *temp) -{ - ir_dereference_variable *lhs = new(ctx) ir_dereference_variable(output); - ir_dereference_variable *rhs = new(ctx) ir_dereference_variable(temp); - return new(ctx) ir_assignment(lhs, rhs); -} - -/** Insert a copy-back assignment before a "return" statement or a call to - * EmitVertex(). - */ -static void -emit_return_copy(const void *key, void *data, void *closure) -{ - ir_return *ir = (ir_return *) closure; - ir->insert_before(copy(ir, (ir_variable *) key, (ir_variable *) data)); -} - -/** Insert a copy-back assignment at the end of the main() function */ -static void -emit_main_copy(const void *key, void *data, void *closure) -{ - ir_function_signature *sig = (ir_function_signature *) closure; - sig->body.push_tail(copy(sig, (ir_variable *) key, (ir_variable *) data)); -} - -ir_visitor_status -output_read_remover::visit_leave(ir_return *ir) -{ - hash_table_call_foreach(replacements, emit_return_copy, ir); - return visit_continue; -} - -ir_visitor_status -output_read_remover::visit_leave(ir_emit_vertex *ir) -{ - hash_table_call_foreach(replacements, emit_return_copy, ir); - hash_table_clear(replacements); - return visit_continue; -} - -ir_visitor_status -output_read_remover::visit_leave(ir_function_signature *sig) -{ - if (strcmp(sig->function_name(), "main") != 0) - return visit_continue; - - hash_table_call_foreach(replacements, emit_main_copy, sig); - return visit_continue; -} - -void -lower_output_reads(unsigned stage, exec_list *instructions) -{ - output_read_remover v(stage); - visit_list_elements(&v, instructions); -} diff --git a/src/glsl/lower_packed_varyings.cpp b/src/glsl/lower_packed_varyings.cpp deleted file mode 100644 index 8d1eb1725d5..00000000000 --- a/src/glsl/lower_packed_varyings.cpp +++ /dev/null @@ -1,749 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_varyings_to_packed.cpp - * - * This lowering pass generates GLSL code that manually packs varyings into - * vec4 slots, for the benefit of back-ends that don't support packed varyings - * natively. - * - * For example, the following shader: - * - * out mat3x2 foo; // location=4, location_frac=0 - * out vec3 bar[2]; // location=5, location_frac=2 - * - * main() - * { - * ... - * } - * - * Is rewritten to: - * - * mat3x2 foo; - * vec3 bar[2]; - * out vec4 packed4; // location=4, location_frac=0 - * out vec4 packed5; // location=5, location_frac=0 - * out vec4 packed6; // location=6, location_frac=0 - * - * main() - * { - * ... - * packed4.xy = foo[0]; - * packed4.zw = foo[1]; - * packed5.xy = foo[2]; - * packed5.zw = bar[0].xy; - * packed6.x = bar[0].z; - * packed6.yzw = bar[1]; - * } - * - * This lowering pass properly handles "double parking" of a varying vector - * across two varying slots. For example, in the code above, two of the - * components of bar[0] are stored in packed5, and the remaining component is - * stored in packed6. - * - * Note that in theory, the extra instructions may cause some loss of - * performance. However, hopefully in most cases the performance loss will - * either be absorbed by a later optimization pass, or it will be offset by - * memory bandwidth savings (because fewer varyings are used). - * - * This lowering pass also packs flat floats, ints, and uints together, by - * using ivec4 as the base type of flat "varyings", and using appropriate - * casts to convert floats and uints into ints. - * - * This lowering pass also handles varyings whose type is a struct or an array - * of struct. Structs are packed in order and with no gaps, so there may be a - * performance penalty due to structure elements being double-parked. - * - * Lowering of geometry shader inputs is slightly more complex, since geometry - * inputs are always arrays, so we need to lower arrays to arrays. For - * example, the following input: - * - * in struct Foo { - * float f; - * vec3 v; - * vec2 a[2]; - * } arr[3]; // location=4, location_frac=0 - * - * Would get lowered like this if it occurred in a fragment shader: - * - * struct Foo { - * float f; - * vec3 v; - * vec2 a[2]; - * } arr[3]; - * in vec4 packed4; // location=4, location_frac=0 - * in vec4 packed5; // location=5, location_frac=0 - * in vec4 packed6; // location=6, location_frac=0 - * in vec4 packed7; // location=7, location_frac=0 - * in vec4 packed8; // location=8, location_frac=0 - * in vec4 packed9; // location=9, location_frac=0 - * - * main() - * { - * arr[0].f = packed4.x; - * arr[0].v = packed4.yzw; - * arr[0].a[0] = packed5.xy; - * arr[0].a[1] = packed5.zw; - * arr[1].f = packed6.x; - * arr[1].v = packed6.yzw; - * arr[1].a[0] = packed7.xy; - * arr[1].a[1] = packed7.zw; - * arr[2].f = packed8.x; - * arr[2].v = packed8.yzw; - * arr[2].a[0] = packed9.xy; - * arr[2].a[1] = packed9.zw; - * ... - * } - * - * But it would get lowered like this if it occurred in a geometry shader: - * - * struct Foo { - * float f; - * vec3 v; - * vec2 a[2]; - * } arr[3]; - * in vec4 packed4[3]; // location=4, location_frac=0 - * in vec4 packed5[3]; // location=5, location_frac=0 - * - * main() - * { - * arr[0].f = packed4[0].x; - * arr[0].v = packed4[0].yzw; - * arr[0].a[0] = packed5[0].xy; - * arr[0].a[1] = packed5[0].zw; - * arr[1].f = packed4[1].x; - * arr[1].v = packed4[1].yzw; - * arr[1].a[0] = packed5[1].xy; - * arr[1].a[1] = packed5[1].zw; - * arr[2].f = packed4[2].x; - * arr[2].v = packed4[2].yzw; - * arr[2].a[0] = packed5[2].xy; - * arr[2].a[1] = packed5[2].zw; - * ... - * } - */ - -#include "glsl_symbol_table.h" -#include "ir.h" -#include "ir_builder.h" -#include "ir_optimization.h" -#include "program/prog_instruction.h" - -using namespace ir_builder; - -namespace { - -/** - * Visitor that performs varying packing. For each varying declared in the - * shader, this visitor determines whether it needs to be packed. If so, it - * demotes it to an ordinary global, creates new packed varyings, and - * generates assignments to convert between the original varying and the - * packed varying. - */ -class lower_packed_varyings_visitor -{ -public: - lower_packed_varyings_visitor(void *mem_ctx, unsigned locations_used, - ir_variable_mode mode, - unsigned gs_input_vertices, - exec_list *out_instructions, - exec_list *out_variables); - - void run(struct gl_shader *shader); - -private: - void bitwise_assign_pack(ir_rvalue *lhs, ir_rvalue *rhs); - void bitwise_assign_unpack(ir_rvalue *lhs, ir_rvalue *rhs); - unsigned lower_rvalue(ir_rvalue *rvalue, unsigned fine_location, - ir_variable *unpacked_var, const char *name, - bool gs_input_toplevel, unsigned vertex_index); - unsigned lower_arraylike(ir_rvalue *rvalue, unsigned array_size, - unsigned fine_location, - ir_variable *unpacked_var, const char *name, - bool gs_input_toplevel, unsigned vertex_index); - ir_dereference *get_packed_varying_deref(unsigned location, - ir_variable *unpacked_var, - const char *name, - unsigned vertex_index); - bool needs_lowering(ir_variable *var); - - /** - * Memory context used to allocate new instructions for the shader. - */ - void * const mem_ctx; - - /** - * Number of generic varying slots which are used by this shader. This is - * used to allocate temporary intermediate data structures. If any varying - * used by this shader has a location greater than or equal to - * VARYING_SLOT_VAR0 + locations_used, an assertion will fire. - */ - const unsigned locations_used; - - /** - * Array of pointers to the packed varyings that have been created for each - * generic varying slot. NULL entries in this array indicate varying slots - * for which a packed varying has not been created yet. - */ - ir_variable **packed_varyings; - - /** - * Type of varying which is being lowered in this pass (either - * ir_var_shader_in or ir_var_shader_out). - */ - const ir_variable_mode mode; - - /** - * If we are currently lowering geometry shader inputs, the number of input - * vertices the geometry shader accepts. Otherwise zero. - */ - const unsigned gs_input_vertices; - - /** - * Exec list into which the visitor should insert the packing instructions. - * Caller provides this list; it should insert the instructions into the - * appropriate place in the shader once the visitor has finished running. - */ - exec_list *out_instructions; - - /** - * Exec list into which the visitor should insert any new variables. - */ - exec_list *out_variables; -}; - -} /* anonymous namespace */ - -lower_packed_varyings_visitor::lower_packed_varyings_visitor( - void *mem_ctx, unsigned locations_used, ir_variable_mode mode, - unsigned gs_input_vertices, exec_list *out_instructions, - exec_list *out_variables) - : mem_ctx(mem_ctx), - locations_used(locations_used), - packed_varyings((ir_variable **) - rzalloc_array_size(mem_ctx, sizeof(*packed_varyings), - locations_used)), - mode(mode), - gs_input_vertices(gs_input_vertices), - out_instructions(out_instructions), - out_variables(out_variables) -{ -} - -void -lower_packed_varyings_visitor::run(struct gl_shader *shader) -{ - foreach_in_list(ir_instruction, node, shader->ir) { - ir_variable *var = node->as_variable(); - if (var == NULL) - continue; - - if (var->data.mode != this->mode || - var->data.location < VARYING_SLOT_VAR0 || - !this->needs_lowering(var)) - continue; - - /* This lowering pass is only capable of packing floats and ints - * together when their interpolation mode is "flat". Therefore, to be - * safe, caller should ensure that integral varyings always use flat - * interpolation, even when this is not required by GLSL. - */ - assert(var->data.interpolation == INTERP_QUALIFIER_FLAT || - !var->type->contains_integer()); - - /* Clone the variable for program resource list before - * it gets modified and lost. - */ - if (!shader->packed_varyings) - shader->packed_varyings = new (shader) exec_list; - - shader->packed_varyings->push_tail(var->clone(shader, NULL)); - - /* Change the old varying into an ordinary global. */ - assert(var->data.mode != ir_var_temporary); - var->data.mode = ir_var_auto; - - /* Create a reference to the old varying. */ - ir_dereference_variable *deref - = new(this->mem_ctx) ir_dereference_variable(var); - - /* Recursively pack or unpack it. */ - this->lower_rvalue(deref, var->data.location * 4 + var->data.location_frac, var, - var->name, this->gs_input_vertices != 0, 0); - } -} - -#define SWIZZLE_ZWZW MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W) - -/** - * Make an ir_assignment from \c rhs to \c lhs, performing appropriate - * bitcasts if necessary to match up types. - * - * This function is called when packing varyings. - */ -void -lower_packed_varyings_visitor::bitwise_assign_pack(ir_rvalue *lhs, - ir_rvalue *rhs) -{ - if (lhs->type->base_type != rhs->type->base_type) { - /* Since we only mix types in flat varyings, and we always store flat - * varyings as type ivec4, we need only produce conversions from (uint - * or float) to int. - */ - assert(lhs->type->base_type == GLSL_TYPE_INT); - switch (rhs->type->base_type) { - case GLSL_TYPE_UINT: - rhs = new(this->mem_ctx) - ir_expression(ir_unop_u2i, lhs->type, rhs); - break; - case GLSL_TYPE_FLOAT: - rhs = new(this->mem_ctx) - ir_expression(ir_unop_bitcast_f2i, lhs->type, rhs); - break; - case GLSL_TYPE_DOUBLE: - assert(rhs->type->vector_elements <= 2); - if (rhs->type->vector_elements == 2) { - ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "pack", ir_var_temporary); - - assert(lhs->type->vector_elements == 4); - this->out_variables->push_tail(t); - this->out_instructions->push_tail( - assign(t, u2i(expr(ir_unop_unpack_double_2x32, swizzle_x(rhs->clone(mem_ctx, NULL)))), 0x3)); - this->out_instructions->push_tail( - assign(t, u2i(expr(ir_unop_unpack_double_2x32, swizzle_y(rhs))), 0xc)); - rhs = deref(t).val; - } else { - rhs = u2i(expr(ir_unop_unpack_double_2x32, rhs)); - } - break; - default: - assert(!"Unexpected type conversion while lowering varyings"); - break; - } - } - this->out_instructions->push_tail(new (this->mem_ctx) ir_assignment(lhs, rhs)); -} - - -/** - * Make an ir_assignment from \c rhs to \c lhs, performing appropriate - * bitcasts if necessary to match up types. - * - * This function is called when unpacking varyings. - */ -void -lower_packed_varyings_visitor::bitwise_assign_unpack(ir_rvalue *lhs, - ir_rvalue *rhs) -{ - if (lhs->type->base_type != rhs->type->base_type) { - /* Since we only mix types in flat varyings, and we always store flat - * varyings as type ivec4, we need only produce conversions from int to - * (uint or float). - */ - assert(rhs->type->base_type == GLSL_TYPE_INT); - switch (lhs->type->base_type) { - case GLSL_TYPE_UINT: - rhs = new(this->mem_ctx) - ir_expression(ir_unop_i2u, lhs->type, rhs); - break; - case GLSL_TYPE_FLOAT: - rhs = new(this->mem_ctx) - ir_expression(ir_unop_bitcast_i2f, lhs->type, rhs); - break; - case GLSL_TYPE_DOUBLE: - assert(lhs->type->vector_elements <= 2); - if (lhs->type->vector_elements == 2) { - ir_variable *t = new(mem_ctx) ir_variable(lhs->type, "unpack", ir_var_temporary); - assert(rhs->type->vector_elements == 4); - this->out_variables->push_tail(t); - this->out_instructions->push_tail( - assign(t, expr(ir_unop_pack_double_2x32, i2u(swizzle_xy(rhs->clone(mem_ctx, NULL)))), 0x1)); - this->out_instructions->push_tail( - assign(t, expr(ir_unop_pack_double_2x32, i2u(swizzle(rhs->clone(mem_ctx, NULL), SWIZZLE_ZWZW, 2))), 0x2)); - rhs = deref(t).val; - } else { - rhs = expr(ir_unop_pack_double_2x32, i2u(rhs)); - } - break; - default: - assert(!"Unexpected type conversion while lowering varyings"); - break; - } - } - this->out_instructions->push_tail(new(this->mem_ctx) ir_assignment(lhs, rhs)); -} - - -/** - * Recursively pack or unpack the given varying (or portion of a varying) by - * traversing all of its constituent vectors. - * - * \param fine_location is the location where the first constituent vector - * should be packed--the word "fine" indicates that this location is expressed - * in multiples of a float, rather than multiples of a vec4 as is used - * elsewhere in Mesa. - * - * \param gs_input_toplevel should be set to true if we are lowering geometry - * shader inputs, and we are currently lowering the whole input variable - * (i.e. we are lowering the array whose index selects the vertex). - * - * \param vertex_index: if we are lowering geometry shader inputs, and the - * level of the array that we are currently lowering is *not* the top level, - * then this indicates which vertex we are currently lowering. Otherwise it - * is ignored. - * - * \return the location where the next constituent vector (after this one) - * should be packed. - */ -unsigned -lower_packed_varyings_visitor::lower_rvalue(ir_rvalue *rvalue, - unsigned fine_location, - ir_variable *unpacked_var, - const char *name, - bool gs_input_toplevel, - unsigned vertex_index) -{ - unsigned dmul = rvalue->type->is_double() ? 2 : 1; - /* When gs_input_toplevel is set, we should be looking at a geometry shader - * input array. - */ - assert(!gs_input_toplevel || rvalue->type->is_array()); - - if (rvalue->type->is_record()) { - for (unsigned i = 0; i < rvalue->type->length; i++) { - if (i != 0) - rvalue = rvalue->clone(this->mem_ctx, NULL); - const char *field_name = rvalue->type->fields.structure[i].name; - ir_dereference_record *dereference_record = new(this->mem_ctx) - ir_dereference_record(rvalue, field_name); - char *deref_name - = ralloc_asprintf(this->mem_ctx, "%s.%s", name, field_name); - fine_location = this->lower_rvalue(dereference_record, fine_location, - unpacked_var, deref_name, false, - vertex_index); - } - return fine_location; - } else if (rvalue->type->is_array()) { - /* Arrays are packed/unpacked by considering each array element in - * sequence. - */ - return this->lower_arraylike(rvalue, rvalue->type->array_size(), - fine_location, unpacked_var, name, - gs_input_toplevel, vertex_index); - } else if (rvalue->type->is_matrix()) { - /* Matrices are packed/unpacked by considering each column vector in - * sequence. - */ - return this->lower_arraylike(rvalue, rvalue->type->matrix_columns, - fine_location, unpacked_var, name, - false, vertex_index); - } else if (rvalue->type->vector_elements * dmul + - fine_location % 4 > 4) { - /* This vector is going to be "double parked" across two varying slots, - * so handle it as two separate assignments. For doubles, a dvec3/dvec4 - * can end up being spread over 3 slots. However the second splitting - * will happen later, here we just always want to split into 2. - */ - unsigned left_components, right_components; - unsigned left_swizzle_values[4] = { 0, 0, 0, 0 }; - unsigned right_swizzle_values[4] = { 0, 0, 0, 0 }; - char left_swizzle_name[4] = { 0, 0, 0, 0 }; - char right_swizzle_name[4] = { 0, 0, 0, 0 }; - - left_components = 4 - fine_location % 4; - if (rvalue->type->is_double()) { - /* We might actually end up with 0 left components! */ - left_components /= 2; - } - right_components = rvalue->type->vector_elements - left_components; - - for (unsigned i = 0; i < left_components; i++) { - left_swizzle_values[i] = i; - left_swizzle_name[i] = "xyzw"[i]; - } - for (unsigned i = 0; i < right_components; i++) { - right_swizzle_values[i] = i + left_components; - right_swizzle_name[i] = "xyzw"[i + left_components]; - } - ir_swizzle *left_swizzle = new(this->mem_ctx) - ir_swizzle(rvalue, left_swizzle_values, left_components); - ir_swizzle *right_swizzle = new(this->mem_ctx) - ir_swizzle(rvalue->clone(this->mem_ctx, NULL), right_swizzle_values, - right_components); - char *left_name - = ralloc_asprintf(this->mem_ctx, "%s.%s", name, left_swizzle_name); - char *right_name - = ralloc_asprintf(this->mem_ctx, "%s.%s", name, right_swizzle_name); - if (left_components) - fine_location = this->lower_rvalue(left_swizzle, fine_location, - unpacked_var, left_name, false, - vertex_index); - else - /* Top up the fine location to the next slot */ - fine_location++; - return this->lower_rvalue(right_swizzle, fine_location, unpacked_var, - right_name, false, vertex_index); - } else { - /* No special handling is necessary; pack the rvalue into the - * varying. - */ - unsigned swizzle_values[4] = { 0, 0, 0, 0 }; - unsigned components = rvalue->type->vector_elements * dmul; - unsigned location = fine_location / 4; - unsigned location_frac = fine_location % 4; - for (unsigned i = 0; i < components; ++i) - swizzle_values[i] = i + location_frac; - ir_dereference *packed_deref = - this->get_packed_varying_deref(location, unpacked_var, name, - vertex_index); - ir_swizzle *swizzle = new(this->mem_ctx) - ir_swizzle(packed_deref, swizzle_values, components); - if (this->mode == ir_var_shader_out) { - this->bitwise_assign_pack(swizzle, rvalue); - } else { - this->bitwise_assign_unpack(rvalue, swizzle); - } - return fine_location + components; - } -} - -/** - * Recursively pack or unpack a varying for which we need to iterate over its - * constituent elements, accessing each one using an ir_dereference_array. - * This takes care of both arrays and matrices, since ir_dereference_array - * treats a matrix like an array of its column vectors. - * - * \param gs_input_toplevel should be set to true if we are lowering geometry - * shader inputs, and we are currently lowering the whole input variable - * (i.e. we are lowering the array whose index selects the vertex). - * - * \param vertex_index: if we are lowering geometry shader inputs, and the - * level of the array that we are currently lowering is *not* the top level, - * then this indicates which vertex we are currently lowering. Otherwise it - * is ignored. - */ -unsigned -lower_packed_varyings_visitor::lower_arraylike(ir_rvalue *rvalue, - unsigned array_size, - unsigned fine_location, - ir_variable *unpacked_var, - const char *name, - bool gs_input_toplevel, - unsigned vertex_index) -{ - for (unsigned i = 0; i < array_size; i++) { - if (i != 0) - rvalue = rvalue->clone(this->mem_ctx, NULL); - ir_constant *constant = new(this->mem_ctx) ir_constant(i); - ir_dereference_array *dereference_array = new(this->mem_ctx) - ir_dereference_array(rvalue, constant); - if (gs_input_toplevel) { - /* Geometry shader inputs are a special case. Instead of storing - * each element of the array at a different location, all elements - * are at the same location, but with a different vertex index. - */ - (void) this->lower_rvalue(dereference_array, fine_location, - unpacked_var, name, false, i); - } else { - char *subscripted_name - = ralloc_asprintf(this->mem_ctx, "%s[%d]", name, i); - fine_location = - this->lower_rvalue(dereference_array, fine_location, - unpacked_var, subscripted_name, - false, vertex_index); - } - } - return fine_location; -} - -/** - * Retrieve the packed varying corresponding to the given varying location. - * If no packed varying has been created for the given varying location yet, - * create it and add it to the shader before returning it. - * - * The newly created varying inherits its interpolation parameters from \c - * unpacked_var. Its base type is ivec4 if we are lowering a flat varying, - * vec4 otherwise. - * - * \param vertex_index: if we are lowering geometry shader inputs, then this - * indicates which vertex we are currently lowering. Otherwise it is ignored. - */ -ir_dereference * -lower_packed_varyings_visitor::get_packed_varying_deref( - unsigned location, ir_variable *unpacked_var, const char *name, - unsigned vertex_index) -{ - unsigned slot = location - VARYING_SLOT_VAR0; - assert(slot < locations_used); - if (this->packed_varyings[slot] == NULL) { - char *packed_name = ralloc_asprintf(this->mem_ctx, "packed:%s", name); - const glsl_type *packed_type; - if (unpacked_var->data.interpolation == INTERP_QUALIFIER_FLAT) - packed_type = glsl_type::ivec4_type; - else - packed_type = glsl_type::vec4_type; - if (this->gs_input_vertices != 0) { - packed_type = - glsl_type::get_array_instance(packed_type, - this->gs_input_vertices); - } - ir_variable *packed_var = new(this->mem_ctx) - ir_variable(packed_type, packed_name, this->mode); - if (this->gs_input_vertices != 0) { - /* Prevent update_array_sizes() from messing with the size of the - * array. - */ - packed_var->data.max_array_access = this->gs_input_vertices - 1; - } - packed_var->data.centroid = unpacked_var->data.centroid; - packed_var->data.sample = unpacked_var->data.sample; - packed_var->data.patch = unpacked_var->data.patch; - packed_var->data.interpolation = unpacked_var->data.interpolation; - packed_var->data.location = location; - packed_var->data.precision = unpacked_var->data.precision; - packed_var->data.always_active_io = unpacked_var->data.always_active_io; - unpacked_var->insert_before(packed_var); - this->packed_varyings[slot] = packed_var; - } else { - /* For geometry shader inputs, only update the packed variable name the - * first time we visit each component. - */ - if (this->gs_input_vertices == 0 || vertex_index == 0) { - ralloc_asprintf_append((char **) &this->packed_varyings[slot]->name, - ",%s", name); - } - } - - ir_dereference *deref = new(this->mem_ctx) - ir_dereference_variable(this->packed_varyings[slot]); - if (this->gs_input_vertices != 0) { - /* When lowering GS inputs, the packed variable is an array, so we need - * to dereference it using vertex_index. - */ - ir_constant *constant = new(this->mem_ctx) ir_constant(vertex_index); - deref = new(this->mem_ctx) ir_dereference_array(deref, constant); - } - return deref; -} - -bool -lower_packed_varyings_visitor::needs_lowering(ir_variable *var) -{ - /* Things composed of vec4's and varyings with explicitly assigned - * locations don't need lowering. Everything else does. - */ - if (var->data.explicit_location) - return false; - - const glsl_type *type = var->type->without_array(); - if (type->vector_elements == 4 && !type->is_double()) - return false; - return true; -} - - -/** - * Visitor that splices varying packing code before every use of EmitVertex() - * in a geometry shader. - */ -class lower_packed_varyings_gs_splicer : public ir_hierarchical_visitor -{ -public: - explicit lower_packed_varyings_gs_splicer(void *mem_ctx, - const exec_list *instructions); - - virtual ir_visitor_status visit_leave(ir_emit_vertex *ev); - -private: - /** - * Memory context used to allocate new instructions for the shader. - */ - void * const mem_ctx; - - /** - * Instructions that should be spliced into place before each EmitVertex() - * call. - */ - const exec_list *instructions; -}; - - -lower_packed_varyings_gs_splicer::lower_packed_varyings_gs_splicer( - void *mem_ctx, const exec_list *instructions) - : mem_ctx(mem_ctx), instructions(instructions) -{ -} - - -ir_visitor_status -lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev) -{ - foreach_in_list(ir_instruction, ir, this->instructions) { - ev->insert_before(ir->clone(this->mem_ctx, NULL)); - } - return visit_continue; -} - - -void -lower_packed_varyings(void *mem_ctx, unsigned locations_used, - ir_variable_mode mode, unsigned gs_input_vertices, - gl_shader *shader) -{ - exec_list *instructions = shader->ir; - ir_function *main_func = shader->symbols->get_function("main"); - exec_list void_parameters; - ir_function_signature *main_func_sig - = main_func->matching_signature(NULL, &void_parameters, false); - exec_list new_instructions, new_variables; - lower_packed_varyings_visitor visitor(mem_ctx, locations_used, mode, - gs_input_vertices, - &new_instructions, - &new_variables); - visitor.run(shader); - if (mode == ir_var_shader_out) { - if (shader->Stage == MESA_SHADER_GEOMETRY) { - /* For geometry shaders, outputs need to be lowered before each call - * to EmitVertex() - */ - lower_packed_varyings_gs_splicer splicer(mem_ctx, &new_instructions); - - /* Add all the variables in first. */ - main_func_sig->body.head->insert_before(&new_variables); - - /* Now update all the EmitVertex instances */ - splicer.run(instructions); - } else { - /* For other shader types, outputs need to be lowered at the end of - * main() - */ - main_func_sig->body.append_list(&new_variables); - main_func_sig->body.append_list(&new_instructions); - } - } else { - /* Shader inputs need to be lowered at the beginning of main() */ - main_func_sig->body.head->insert_before(&new_instructions); - main_func_sig->body.head->insert_before(&new_variables); - } -} diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp deleted file mode 100644 index 7f18238bc6e..00000000000 --- a/src/glsl/lower_packing_builtins.cpp +++ /dev/null @@ -1,1412 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir.h" -#include "ir_builder.h" -#include "ir_optimization.h" -#include "ir_rvalue_visitor.h" - -namespace { - -using namespace ir_builder; - -/** - * A visitor that lowers built-in floating-point pack/unpack expressions - * such packSnorm2x16. - */ -class lower_packing_builtins_visitor : public ir_rvalue_visitor { -public: - /** - * \param op_mask is a bitmask of `enum lower_packing_builtins_op` - */ - explicit lower_packing_builtins_visitor(int op_mask) - : op_mask(op_mask), - progress(false) - { - /* Mutually exclusive options. */ - assert(!((op_mask & LOWER_PACK_HALF_2x16) && - (op_mask & LOWER_PACK_HALF_2x16_TO_SPLIT))); - - assert(!((op_mask & LOWER_UNPACK_HALF_2x16) && - (op_mask & LOWER_UNPACK_HALF_2x16_TO_SPLIT))); - - factory.instructions = &factory_instructions; - } - - virtual ~lower_packing_builtins_visitor() - { - assert(factory_instructions.is_empty()); - } - - bool get_progress() { return progress; } - - void handle_rvalue(ir_rvalue **rvalue) - { - if (!*rvalue) - return; - - ir_expression *expr = (*rvalue)->as_expression(); - if (!expr) - return; - - enum lower_packing_builtins_op lowering_op = - choose_lowering_op(expr->operation); - - if (lowering_op == LOWER_PACK_UNPACK_NONE) - return; - - setup_factory(ralloc_parent(expr)); - - ir_rvalue *op0 = expr->operands[0]; - ralloc_steal(factory.mem_ctx, op0); - - switch (lowering_op) { - case LOWER_PACK_SNORM_2x16: - *rvalue = lower_pack_snorm_2x16(op0); - break; - case LOWER_PACK_SNORM_4x8: - *rvalue = lower_pack_snorm_4x8(op0); - break; - case LOWER_PACK_UNORM_2x16: - *rvalue = lower_pack_unorm_2x16(op0); - break; - case LOWER_PACK_UNORM_4x8: - *rvalue = lower_pack_unorm_4x8(op0); - break; - case LOWER_PACK_HALF_2x16: - *rvalue = lower_pack_half_2x16(op0); - break; - case LOWER_PACK_HALF_2x16_TO_SPLIT: - *rvalue = split_pack_half_2x16(op0); - break; - case LOWER_UNPACK_SNORM_2x16: - *rvalue = lower_unpack_snorm_2x16(op0); - break; - case LOWER_UNPACK_SNORM_4x8: - *rvalue = lower_unpack_snorm_4x8(op0); - break; - case LOWER_UNPACK_UNORM_2x16: - *rvalue = lower_unpack_unorm_2x16(op0); - break; - case LOWER_UNPACK_UNORM_4x8: - *rvalue = lower_unpack_unorm_4x8(op0); - break; - case LOWER_UNPACK_HALF_2x16: - *rvalue = lower_unpack_half_2x16(op0); - break; - case LOWER_UNPACK_HALF_2x16_TO_SPLIT: - *rvalue = split_unpack_half_2x16(op0); - break; - case LOWER_PACK_UNPACK_NONE: - case LOWER_PACK_USE_BFI: - case LOWER_PACK_USE_BFE: - assert(!"not reached"); - break; - } - - teardown_factory(); - progress = true; - } - -private: - const int op_mask; - bool progress; - ir_factory factory; - exec_list factory_instructions; - - /** - * Determine the needed lowering operation by filtering \a expr_op - * through \ref op_mask. - */ - enum lower_packing_builtins_op - choose_lowering_op(ir_expression_operation expr_op) - { - /* C++ regards int and enum as fundamentally different types. - * So, we can't simply return from each case; we must cast the return - * value. - */ - int result; - - switch (expr_op) { - case ir_unop_pack_snorm_2x16: - result = op_mask & LOWER_PACK_SNORM_2x16; - break; - case ir_unop_pack_snorm_4x8: - result = op_mask & LOWER_PACK_SNORM_4x8; - break; - case ir_unop_pack_unorm_2x16: - result = op_mask & LOWER_PACK_UNORM_2x16; - break; - case ir_unop_pack_unorm_4x8: - result = op_mask & LOWER_PACK_UNORM_4x8; - break; - case ir_unop_pack_half_2x16: - result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); - break; - case ir_unop_unpack_snorm_2x16: - result = op_mask & LOWER_UNPACK_SNORM_2x16; - break; - case ir_unop_unpack_snorm_4x8: - result = op_mask & LOWER_UNPACK_SNORM_4x8; - break; - case ir_unop_unpack_unorm_2x16: - result = op_mask & LOWER_UNPACK_UNORM_2x16; - break; - case ir_unop_unpack_unorm_4x8: - result = op_mask & LOWER_UNPACK_UNORM_4x8; - break; - case ir_unop_unpack_half_2x16: - result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); - break; - default: - result = LOWER_PACK_UNPACK_NONE; - break; - } - - return static_cast(result); - } - - void - setup_factory(void *mem_ctx) - { - assert(factory.mem_ctx == NULL); - assert(factory.instructions->is_empty()); - - factory.mem_ctx = mem_ctx; - } - - void - teardown_factory() - { - base_ir->insert_before(factory.instructions); - assert(factory.instructions->is_empty()); - factory.mem_ctx = NULL; - } - - template - ir_constant* - constant(T x) - { - return factory.constant(x); - } - - /** - * \brief Pack two uint16's into a single uint32. - * - * Interpret the given uvec2 as a uint16 pair. Pack the pair into a uint32 - * where the least significant bits specify the first element of the pair. - * Return the uint32. - */ - ir_rvalue* - pack_uvec2_to_uint(ir_rvalue *uvec2_rval) - { - assert(uvec2_rval->type == glsl_type::uvec2_type); - - /* uvec2 u = UVEC2_RVAL; */ - ir_variable *u = factory.make_temp(glsl_type::uvec2_type, - "tmp_pack_uvec2_to_uint"); - factory.emit(assign(u, uvec2_rval)); - - if (op_mask & LOWER_PACK_USE_BFI) { - return bitfield_insert(bit_and(swizzle_x(u), constant(0xffffu)), - swizzle_y(u), - constant(16u), - constant(16u)); - } - - /* return (u.y << 16) | (u.x & 0xffff); */ - return bit_or(lshift(swizzle_y(u), constant(16u)), - bit_and(swizzle_x(u), constant(0xffffu))); - } - - /** - * \brief Pack four uint8's into a single uint32. - * - * Interpret the given uvec4 as a uint32 4-typle. Pack the 4-tuple into a - * uint32 where the least significant bits specify the first element of the - * 4-tuple. Return the uint32. - */ - ir_rvalue* - pack_uvec4_to_uint(ir_rvalue *uvec4_rval) - { - assert(uvec4_rval->type == glsl_type::uvec4_type); - - ir_variable *u = factory.make_temp(glsl_type::uvec4_type, - "tmp_pack_uvec4_to_uint"); - - if (op_mask & LOWER_PACK_USE_BFI) { - /* uvec4 u = UVEC4_RVAL; */ - factory.emit(assign(u, uvec4_rval)); - - return bitfield_insert(bitfield_insert( - bitfield_insert( - bit_and(swizzle_x(u), constant(0xffu)), - swizzle_y(u), constant(8u), constant(8u)), - swizzle_z(u), constant(16u), constant(8u)), - swizzle_w(u), constant(24u), constant(8u)); - } - - /* uvec4 u = UVEC4_RVAL & 0xff */ - factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); - - /* return (u.w << 24) | (u.z << 16) | (u.y << 8) | u.x; */ - return bit_or(bit_or(lshift(swizzle_w(u), constant(24u)), - lshift(swizzle_z(u), constant(16u))), - bit_or(lshift(swizzle_y(u), constant(8u)), - swizzle_x(u))); - } - - /** - * \brief Unpack a uint32 into two uint16's. - * - * Interpret the given uint32 as a uint16 pair where the uint32's least - * significant bits specify the pair's first element. Return the uint16 - * pair as a uvec2. - */ - ir_rvalue* - unpack_uint_to_uvec2(ir_rvalue *uint_rval) - { - assert(uint_rval->type == glsl_type::uint_type); - - /* uint u = UINT_RVAL; */ - ir_variable *u = factory.make_temp(glsl_type::uint_type, - "tmp_unpack_uint_to_uvec2_u"); - factory.emit(assign(u, uint_rval)); - - /* uvec2 u2; */ - ir_variable *u2 = factory.make_temp(glsl_type::uvec2_type, - "tmp_unpack_uint_to_uvec2_u2"); - - /* u2.x = u & 0xffffu; */ - factory.emit(assign(u2, bit_and(u, constant(0xffffu)), WRITEMASK_X)); - - /* u2.y = u >> 16u; */ - factory.emit(assign(u2, rshift(u, constant(16u)), WRITEMASK_Y)); - - return deref(u2).val; - } - - /** - * \brief Unpack a uint32 into two int16's. - * - * Specifically each 16-bit value is sign-extended to the full width of an - * int32 on return. - */ - ir_rvalue * - unpack_uint_to_ivec2(ir_rvalue *uint_rval) - { - assert(uint_rval->type == glsl_type::uint_type); - - if (!(op_mask & LOWER_PACK_USE_BFE)) { - return rshift(lshift(u2i(unpack_uint_to_uvec2(uint_rval)), - constant(16u)), - constant(16u)); - } - - ir_variable *i = factory.make_temp(glsl_type::int_type, - "tmp_unpack_uint_to_ivec2_i"); - factory.emit(assign(i, u2i(uint_rval))); - - /* ivec2 i2; */ - ir_variable *i2 = factory.make_temp(glsl_type::ivec2_type, - "tmp_unpack_uint_to_ivec2_i2"); - - factory.emit(assign(i2, bitfield_extract(i, constant(0), constant(16)), - WRITEMASK_X)); - factory.emit(assign(i2, bitfield_extract(i, constant(16), constant(16)), - WRITEMASK_Y)); - - return deref(i2).val; - } - - /** - * \brief Unpack a uint32 into four uint8's. - * - * Interpret the given uint32 as a uint8 4-tuple where the uint32's least - * significant bits specify the 4-tuple's first element. Return the uint8 - * 4-tuple as a uvec4. - */ - ir_rvalue* - unpack_uint_to_uvec4(ir_rvalue *uint_rval) - { - assert(uint_rval->type == glsl_type::uint_type); - - /* uint u = UINT_RVAL; */ - ir_variable *u = factory.make_temp(glsl_type::uint_type, - "tmp_unpack_uint_to_uvec4_u"); - factory.emit(assign(u, uint_rval)); - - /* uvec4 u4; */ - ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, - "tmp_unpack_uint_to_uvec4_u4"); - - /* u4.x = u & 0xffu; */ - factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); - - if (op_mask & LOWER_PACK_USE_BFE) { - /* u4.y = bitfield_extract(u, 8, 8); */ - factory.emit(assign(u4, bitfield_extract(u, constant(8u), constant(8u)), - WRITEMASK_Y)); - - /* u4.z = bitfield_extract(u, 16, 8); */ - factory.emit(assign(u4, bitfield_extract(u, constant(16u), constant(8u)), - WRITEMASK_Z)); - } else { - /* u4.y = (u >> 8u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), - constant(0xffu)), WRITEMASK_Y)); - - /* u4.z = (u >> 16u) & 0xffu; */ - factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), - constant(0xffu)), WRITEMASK_Z)); - } - - /* u4.w = (u >> 24u) */ - factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); - - return deref(u4).val; - } - - /** - * \brief Unpack a uint32 into four int8's. - * - * Specifically each 8-bit value is sign-extended to the full width of an - * int32 on return. - */ - ir_rvalue * - unpack_uint_to_ivec4(ir_rvalue *uint_rval) - { - assert(uint_rval->type == glsl_type::uint_type); - - if (!(op_mask & LOWER_PACK_USE_BFE)) { - return rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), - constant(24u)), - constant(24u)); - } - - ir_variable *i = factory.make_temp(glsl_type::int_type, - "tmp_unpack_uint_to_ivec4_i"); - factory.emit(assign(i, u2i(uint_rval))); - - /* ivec4 i4; */ - ir_variable *i4 = factory.make_temp(glsl_type::ivec4_type, - "tmp_unpack_uint_to_ivec4_i4"); - - factory.emit(assign(i4, bitfield_extract(i, constant(0), constant(8)), - WRITEMASK_X)); - factory.emit(assign(i4, bitfield_extract(i, constant(8), constant(8)), - WRITEMASK_Y)); - factory.emit(assign(i4, bitfield_extract(i, constant(16), constant(8)), - WRITEMASK_Z)); - factory.emit(assign(i4, bitfield_extract(i, constant(24), constant(8)), - WRITEMASK_W)); - - return deref(i4).val; - } - - /** - * \brief Lower a packSnorm2x16 expression. - * - * \param vec2_rval is packSnorm2x16's input - * \return packSnorm2x16's output as a uint rvalue - */ - ir_rvalue* - lower_pack_snorm_2x16(ir_rvalue *vec2_rval) - { - /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: - * - * highp uint packSnorm2x16(vec2 v) - * -------------------------------- - * First, converts each component of the normalized floating-point value - * v into 16-bit integer values. Then, the results are packed into the - * returned 32-bit unsigned integer. - * - * The conversion for component c of v to fixed point is done as - * follows: - * - * packSnorm2x16: round(clamp(c, -1, +1) * 32767.0) - * - * The first component of the vector will be written to the least - * significant bits of the output; the last component will be written to - * the most significant bits. - * - * This function generates IR that approximates the following pseudo-GLSL: - * - * return pack_uvec2_to_uint( - * uvec2(ivec2( - * round(clamp(VEC2_RVALUE, -1.0f, 1.0f) * 32767.0f)))); - * - * It is necessary to first convert the vec2 to ivec2 rather than directly - * converting vec2 to uvec2 because the latter conversion is undefined. - * From page 56 (62 of pdf) of the GLSL ES 3.00 spec: "It is undefined to - * convert a negative floating point value to an uint". - */ - assert(vec2_rval->type == glsl_type::vec2_type); - - ir_rvalue *result = pack_uvec2_to_uint( - i2u(f2i(round_even(mul(clamp(vec2_rval, - constant(-1.0f), - constant(1.0f)), - constant(32767.0f)))))); - - assert(result->type == glsl_type::uint_type); - return result; - } - - /** - * \brief Lower a packSnorm4x8 expression. - * - * \param vec4_rval is packSnorm4x8's input - * \return packSnorm4x8's output as a uint rvalue - */ - ir_rvalue* - lower_pack_snorm_4x8(ir_rvalue *vec4_rval) - { - /* From page 137 (143 of pdf) of the GLSL 4.30 spec: - * - * highp uint packSnorm4x8(vec4 v) - * ------------------------------- - * First, converts each component of the normalized floating-point value - * v into 8-bit integer values. Then, the results are packed into the - * returned 32-bit unsigned integer. - * - * The conversion for component c of v to fixed point is done as - * follows: - * - * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) - * - * The first component of the vector will be written to the least - * significant bits of the output; the last component will be written to - * the most significant bits. - * - * This function generates IR that approximates the following pseudo-GLSL: - * - * return pack_uvec4_to_uint( - * uvec4(ivec4( - * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); - * - * It is necessary to first convert the vec4 to ivec4 rather than directly - * converting vec4 to uvec4 because the latter conversion is undefined. - * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to - * convert a negative floating point value to an uint". - */ - assert(vec4_rval->type == glsl_type::vec4_type); - - ir_rvalue *result = pack_uvec4_to_uint( - i2u(f2i(round_even(mul(clamp(vec4_rval, - constant(-1.0f), - constant(1.0f)), - constant(127.0f)))))); - - assert(result->type == glsl_type::uint_type); - return result; - } - - /** - * \brief Lower an unpackSnorm2x16 expression. - * - * \param uint_rval is unpackSnorm2x16's input - * \return unpackSnorm2x16's output as a vec2 rvalue - */ - ir_rvalue* - lower_unpack_snorm_2x16(ir_rvalue *uint_rval) - { - /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: - * - * highp vec2 unpackSnorm2x16 (highp uint p) - * ----------------------------------------- - * First, unpacks a single 32-bit unsigned integer p into a pair of - * 16-bit unsigned integers. Then, each component is converted to - * a normalized floating-point value to generate the returned - * two-component vector. - * - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackSnorm2x16: clamp(f / 32767.0, -1,+1) - * - * The first component of the returned vector will be extracted from the - * least significant bits of the input; the last component will be - * extracted from the most significant bits. - * - * This function generates IR that approximates the following pseudo-GLSL: - * - * return clamp( - * ((ivec2(unpack_uint_to_uvec2(UINT_RVALUE)) << 16) >> 16) / 32767.0f, - * -1.0f, 1.0f); - * - * The above IR may appear unnecessarily complex, but the intermediate - * conversion to ivec2 and the bit shifts are necessary to correctly unpack - * negative floats. - * - * To see why, consider packing and then unpacking vec2(-1.0, 0.0). - * packSnorm2x16 encodes -1.0 as the int16 0xffff. During unpacking, we - * place that int16 into an int32, which results in the *positive* integer - * 0x0000ffff. The int16's sign bit becomes, in the int32, the rather - * unimportant bit 16. We must now extend the int16's sign bit into bits - * 17-32, which is accomplished by left-shifting then right-shifting. - */ - - assert(uint_rval->type == glsl_type::uint_type); - - ir_rvalue *result = - clamp(div(i2f(unpack_uint_to_ivec2(uint_rval)), - constant(32767.0f)), - constant(-1.0f), - constant(1.0f)); - - assert(result->type == glsl_type::vec2_type); - return result; - } - - /** - * \brief Lower an unpackSnorm4x8 expression. - * - * \param uint_rval is unpackSnorm4x8's input - * \return unpackSnorm4x8's output as a vec4 rvalue - */ - ir_rvalue* - lower_unpack_snorm_4x8(ir_rvalue *uint_rval) - { - /* From page 137 (143 of pdf) of the GLSL 4.30 spec: - * - * highp vec4 unpackSnorm4x8 (highp uint p) - * ---------------------------------------- - * First, unpacks a single 32-bit unsigned integer p into four - * 8-bit unsigned integers. Then, each component is converted to - * a normalized floating-point value to generate the returned - * four-component vector. - * - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackSnorm4x8: clamp(f / 127.0, -1, +1) - * - * The first component of the returned vector will be extracted from the - * least significant bits of the input; the last component will be - * extracted from the most significant bits. - * - * This function generates IR that approximates the following pseudo-GLSL: - * - * return clamp( - * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, - * -1.0f, 1.0f); - * - * The above IR may appear unnecessarily complex, but the intermediate - * conversion to ivec4 and the bit shifts are necessary to correctly unpack - * negative floats. - * - * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, - * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we - * place that int8 into an int32, which results in the *positive* integer - * 0x000000ff. The int8's sign bit becomes, in the int32, the rather - * unimportant bit 8. We must now extend the int8's sign bit into bits - * 9-32, which is accomplished by left-shifting then right-shifting. - */ - - assert(uint_rval->type == glsl_type::uint_type); - - ir_rvalue *result = - clamp(div(i2f(unpack_uint_to_ivec4(uint_rval)), - constant(127.0f)), - constant(-1.0f), - constant(1.0f)); - - assert(result->type == glsl_type::vec4_type); - return result; - } - - /** - * \brief Lower a packUnorm2x16 expression. - * - * \param vec2_rval is packUnorm2x16's input - * \return packUnorm2x16's output as a uint rvalue - */ - ir_rvalue* - lower_pack_unorm_2x16(ir_rvalue *vec2_rval) - { - /* From page 88 (94 of pdf) of the GLSL ES 3.00 spec: - * - * highp uint packUnorm2x16 (vec2 v) - * --------------------------------- - * First, converts each component of the normalized floating-point value - * v into 16-bit integer values. Then, the results are packed into the - * returned 32-bit unsigned integer. - * - * The conversion for component c of v to fixed point is done as - * follows: - * - * packUnorm2x16: round(clamp(c, 0, +1) * 65535.0) - * - * The first component of the vector will be written to the least - * significant bits of the output; the last component will be written to - * the most significant bits. - * - * This function generates IR that approximates the following pseudo-GLSL: - * - * return pack_uvec2_to_uint(uvec2( - * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 65535.0f))); - * - * Here it is safe to directly convert the vec2 to uvec2 because the vec2 - * has been clamped to a non-negative range. - */ - - assert(vec2_rval->type == glsl_type::vec2_type); - - ir_rvalue *result = pack_uvec2_to_uint( - f2u(round_even(mul(saturate(vec2_rval), constant(65535.0f))))); - - assert(result->type == glsl_type::uint_type); - return result; - } - - /** - * \brief Lower a packUnorm4x8 expression. - * - * \param vec4_rval is packUnorm4x8's input - * \return packUnorm4x8's output as a uint rvalue - */ - ir_rvalue* - lower_pack_unorm_4x8(ir_rvalue *vec4_rval) - { - /* From page 137 (143 of pdf) of the GLSL 4.30 spec: - * - * highp uint packUnorm4x8 (vec4 v) - * -------------------------------- - * First, converts each component of the normalized floating-point value - * v into 8-bit integer values. Then, the results are packed into the - * returned 32-bit unsigned integer. - * - * The conversion for component c of v to fixed point is done as - * follows: - * - * packUnorm4x8: round(clamp(c, 0, +1) * 255.0) - * - * The first component of the vector will be written to the least - * significant bits of the output; the last component will be written to - * the most significant bits. - * - * This function generates IR that approximates the following pseudo-GLSL: - * - * return pack_uvec4_to_uint(uvec4( - * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); - * - * Here it is safe to directly convert the vec4 to uvec4 because the vec4 - * has been clamped to a non-negative range. - */ - - assert(vec4_rval->type == glsl_type::vec4_type); - - ir_rvalue *result = pack_uvec4_to_uint( - f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); - - assert(result->type == glsl_type::uint_type); - return result; - } - - /** - * \brief Lower an unpackUnorm2x16 expression. - * - * \param uint_rval is unpackUnorm2x16's input - * \return unpackUnorm2x16's output as a vec2 rvalue - */ - ir_rvalue* - lower_unpack_unorm_2x16(ir_rvalue *uint_rval) - { - /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: - * - * highp vec2 unpackUnorm2x16 (highp uint p) - * ----------------------------------------- - * First, unpacks a single 32-bit unsigned integer p into a pair of - * 16-bit unsigned integers. Then, each component is converted to - * a normalized floating-point value to generate the returned - * two-component vector. - * - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackUnorm2x16: f / 65535.0 - * - * The first component of the returned vector will be extracted from the - * least significant bits of the input; the last component will be - * extracted from the most significant bits. - * - * This function generates IR that approximates the following pseudo-GLSL: - * - * return vec2(unpack_uint_to_uvec2(UINT_RVALUE)) / 65535.0; - */ - - assert(uint_rval->type == glsl_type::uint_type); - - ir_rvalue *result = div(u2f(unpack_uint_to_uvec2(uint_rval)), - constant(65535.0f)); - - assert(result->type == glsl_type::vec2_type); - return result; - } - - /** - * \brief Lower an unpackUnorm4x8 expression. - * - * \param uint_rval is unpackUnorm4x8's input - * \return unpackUnorm4x8's output as a vec4 rvalue - */ - ir_rvalue* - lower_unpack_unorm_4x8(ir_rvalue *uint_rval) - { - /* From page 137 (143 of pdf) of the GLSL 4.30 spec: - * - * highp vec4 unpackUnorm4x8 (highp uint p) - * ---------------------------------------- - * First, unpacks a single 32-bit unsigned integer p into four - * 8-bit unsigned integers. Then, each component is converted to - * a normalized floating-point value to generate the returned - * two-component vector. - * - * The conversion for unpacked fixed-point value f to floating point is - * done as follows: - * - * unpackUnorm4x8: f / 255.0 - * - * The first component of the returned vector will be extracted from the - * least significant bits of the input; the last component will be - * extracted from the most significant bits. - * - * This function generates IR that approximates the following pseudo-GLSL: - * - * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; - */ - - assert(uint_rval->type == glsl_type::uint_type); - - ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), - constant(255.0f)); - - assert(result->type == glsl_type::vec4_type); - return result; - } - - /** - * \brief Lower the component-wise calculation of packHalf2x16. - * - * \param f_rval is one component of packHafl2x16's input - * \param e_rval is the unshifted exponent bits of f_rval - * \param m_rval is the unshifted mantissa bits of f_rval - * - * \return a uint rvalue that encodes a float16 in its lower 16 bits - */ - ir_rvalue* - pack_half_1x16_nosign(ir_rvalue *f_rval, - ir_rvalue *e_rval, - ir_rvalue *m_rval) - { - assert(e_rval->type == glsl_type::uint_type); - assert(m_rval->type == glsl_type::uint_type); - - /* uint u16; */ - ir_variable *u16 = factory.make_temp(glsl_type::uint_type, - "tmp_pack_half_1x16_u16"); - - /* float f = FLOAT_RVAL; */ - ir_variable *f = factory.make_temp(glsl_type::float_type, - "tmp_pack_half_1x16_f"); - factory.emit(assign(f, f_rval)); - - /* uint e = E_RVAL; */ - ir_variable *e = factory.make_temp(glsl_type::uint_type, - "tmp_pack_half_1x16_e"); - factory.emit(assign(e, e_rval)); - - /* uint m = M_RVAL; */ - ir_variable *m = factory.make_temp(glsl_type::uint_type, - "tmp_pack_half_1x16_m"); - factory.emit(assign(m, m_rval)); - - /* Preliminaries - * ------------- - * - * For a float16, the bit layout is: - * - * sign: 15 - * exponent: 10:14 - * mantissa: 0:9 - * - * Let f16 be a float16 value. The sign, exponent, and mantissa - * determine its value thus: - * - * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) - * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) - * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) - * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) - * if e16 = 31 and m16 != 0, then NaN (5) - * - * where 0 <= m16 < 2^10. - * - * For a float32, the bit layout is: - * - * sign: 31 - * exponent: 23:30 - * mantissa: 0:22 - * - * Let f32 be a float32 value. The sign, exponent, and mantissa - * determine its value thus: - * - * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) - * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) - * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) - * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) - * if e32 = 255 and m32 != 0, then NaN (14) - * - * where 0 <= m32 < 2^23. - * - * The minimum and maximum normal float16 values are - * - * min_norm16 = 2^(1 - 15) * (1 + 0 / 2^10) = 2^(-14) (20) - * max_norm16 = 2^(30 - 15) * (1 + 1023 / 2^10) (21) - * - * The step at max_norm16 is - * - * max_step16 = 2^5 (22) - * - * Observe that the float16 boundary values in equations 20-21 lie in the - * range of normal float32 values. - * - * - * Rounding Behavior - * ----------------- - * Not all float32 values can be exactly represented as a float16. We - * round all such intermediate float32 values to the nearest float16; if - * the float32 is exactly between to float16 values, we round to the one - * with an even mantissa. This rounding behavior has several benefits: - * - * - It has no sign bias. - * - * - It reproduces the behavior of real hardware: opcode F32TO16 in Intel's - * GPU ISA. - * - * - By reproducing the behavior of the GPU (at least on Intel hardware), - * compile-time evaluation of constant packHalf2x16 GLSL expressions will - * result in the same value as if the expression were executed on the - * GPU. - * - * Calculation - * ----------- - * Our task is to compute s16, e16, m16 given f32. Since this function - * ignores the sign bit, assume that s32 = s16 = 0. There are several - * cases consider. - */ - - factory.emit( - - /* Case 1) f32 is NaN - * - * The resultant f16 will also be NaN. - */ - - /* if (e32 == 255 && m32 != 0) { */ - if_tree(logic_and(equal(e, constant(0xffu << 23u)), - logic_not(equal(m, constant(0u)))), - - assign(u16, constant(0x7fffu)), - - /* Case 2) f32 lies in the range [0, min_norm16). - * - * The resultant float16 will be either zero, subnormal, or normal. - * - * Solving - * - * f32 = min_norm16 (30) - * - * gives - * - * e32 = 113 and m32 = 0 (31) - * - * Therefore this case occurs if and only if - * - * e32 < 113 (32) - */ - - /* } else if (e32 < 113) { */ - if_tree(less(e, constant(113u << 23u)), - - /* u16 = uint(round_to_even(abs(f32) * float(1u << 24u))); */ - assign(u16, f2u(round_even(mul(expr(ir_unop_abs, f), - constant((float) (1 << 24)))))), - - /* Case 3) f32 lies in the range - * [min_norm16, max_norm16 + max_step16). - * - * The resultant float16 will be either normal or infinite. - * - * Solving - * - * f32 = max_norm16 + max_step16 (40) - * = 2^15 * (1 + 1023 / 2^10) + 2^5 (41) - * = 2^16 (42) - * gives - * - * e32 = 143 and m32 = 0 (43) - * - * We already solved the boundary condition f32 = min_norm16 above - * in equation 31. Therefore this case occurs if and only if - * - * 113 <= e32 and e32 < 143 - */ - - /* } else if (e32 < 143) { */ - if_tree(less(e, constant(143u << 23u)), - - /* The addition below handles the case where the mantissa rounds - * up to 1024 and bumps the exponent. - * - * u16 = ((e - (112u << 23u)) >> 13u) - * + round_to_even((float(m) / (1u << 13u)); - */ - assign(u16, add(rshift(sub(e, constant(112u << 23u)), - constant(13u)), - f2u(round_even( - div(u2f(m), constant((float) (1 << 13))))))), - - /* Case 4) f32 lies in the range [max_norm16 + max_step16, inf]. - * - * The resultant float16 will be infinite. - * - * The cases above caught all float32 values in the range - * [0, max_norm16 + max_step16), so this is the fall-through case. - */ - - /* } else { */ - - assign(u16, constant(31u << 10u)))))); - - /* } */ - - return deref(u16).val; - } - - /** - * \brief Lower a packHalf2x16 expression. - * - * \param vec2_rval is packHalf2x16's input - * \return packHalf2x16's output as a uint rvalue - */ - ir_rvalue* - lower_pack_half_2x16(ir_rvalue *vec2_rval) - { - /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: - * - * highp uint packHalf2x16 (mediump vec2 v) - * ---------------------------------------- - * Returns an unsigned integer obtained by converting the components of - * a two-component floating-point vector to the 16-bit floating-point - * representation found in the OpenGL ES Specification, and then packing - * these two 16-bit integers into a 32-bit unsigned integer. - * - * The first vector component specifies the 16 least- significant bits - * of the result; the second component specifies the 16 most-significant - * bits. - */ - - assert(vec2_rval->type == glsl_type::vec2_type); - - /* vec2 f = VEC2_RVAL; */ - ir_variable *f = factory.make_temp(glsl_type::vec2_type, - "tmp_pack_half_2x16_f"); - factory.emit(assign(f, vec2_rval)); - - /* uvec2 f32 = bitcast_f2u(f); */ - ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, - "tmp_pack_half_2x16_f32"); - factory.emit(assign(f32, expr(ir_unop_bitcast_f2u, f))); - - /* uvec2 f16; */ - ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, - "tmp_pack_half_2x16_f16"); - - /* Get f32's unshifted exponent bits. - * - * uvec2 e = f32 & 0x7f800000u; - */ - ir_variable *e = factory.make_temp(glsl_type::uvec2_type, - "tmp_pack_half_2x16_e"); - factory.emit(assign(e, bit_and(f32, constant(0x7f800000u)))); - - /* Get f32's unshifted mantissa bits. - * - * uvec2 m = f32 & 0x007fffffu; - */ - ir_variable *m = factory.make_temp(glsl_type::uvec2_type, - "tmp_pack_half_2x16_m"); - factory.emit(assign(m, bit_and(f32, constant(0x007fffffu)))); - - /* Set f16's exponent and mantissa bits. - * - * f16.x = pack_half_1x16_nosign(e.x, m.x); - * f16.y = pack_half_1y16_nosign(e.y, m.y); - */ - factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_x(f), - swizzle_x(e), - swizzle_x(m)), - WRITEMASK_X)); - factory.emit(assign(f16, pack_half_1x16_nosign(swizzle_y(f), - swizzle_y(e), - swizzle_y(m)), - WRITEMASK_Y)); - - /* Set f16's sign bits. - * - * f16 |= (f32 & (1u << 31u) >> 16u; - */ - factory.emit( - assign(f16, bit_or(f16, - rshift(bit_and(f32, constant(1u << 31u)), - constant(16u))))); - - - /* return (f16.y << 16u) | f16.x; */ - ir_rvalue *result = bit_or(lshift(swizzle_y(f16), - constant(16u)), - swizzle_x(f16)); - - assert(result->type == glsl_type::uint_type); - return result; - } - - /** - * \brief Split packHalf2x16's vec2 operand into two floats. - * - * \param vec2_rval is packHalf2x16's input - * \return a uint rvalue - * - * Some code generators, such as the i965 fragment shader, require that all - * vector expressions be lowered to a sequence of scalar expressions. - * However, packHalf2x16 cannot be scalarized by the same mechanism as - * a true vector operation because its input and output have a differing - * number of vector components. - * - * This method scalarizes packHalf2x16 by transforming it from an unary - * operation having vector input to a binary operation having scalar input. - * That is, it transforms - * - * packHalf2x16(VEC2_RVAL); - * - * into - * - * vec2 v = VEC2_RVAL; - * return packHalf2x16_split(v.x, v.y); - */ - ir_rvalue* - split_pack_half_2x16(ir_rvalue *vec2_rval) - { - assert(vec2_rval->type == glsl_type::vec2_type); - - ir_variable *v = factory.make_temp(glsl_type::vec2_type, - "tmp_split_pack_half_2x16_v"); - factory.emit(assign(v, vec2_rval)); - - return expr(ir_binop_pack_half_2x16_split, swizzle_x(v), swizzle_y(v)); - } - - /** - * \brief Lower the component-wise calculation of unpackHalf2x16. - * - * Given a uint that encodes a float16 in its lower 16 bits, this function - * returns a uint that encodes a float32 with the same value. The sign bit - * of the float16 is ignored. - * - * \param e_rval is the unshifted exponent bits of a float16 - * \param m_rval is the unshifted mantissa bits of a float16 - * \param a uint rvalue that encodes a float32 - */ - ir_rvalue* - unpack_half_1x16_nosign(ir_rvalue *e_rval, ir_rvalue *m_rval) - { - assert(e_rval->type == glsl_type::uint_type); - assert(m_rval->type == glsl_type::uint_type); - - /* uint u32; */ - ir_variable *u32 = factory.make_temp(glsl_type::uint_type, - "tmp_unpack_half_1x16_u32"); - - /* uint e = E_RVAL; */ - ir_variable *e = factory.make_temp(glsl_type::uint_type, - "tmp_unpack_half_1x16_e"); - factory.emit(assign(e, e_rval)); - - /* uint m = M_RVAL; */ - ir_variable *m = factory.make_temp(glsl_type::uint_type, - "tmp_unpack_half_1x16_m"); - factory.emit(assign(m, m_rval)); - - /* Preliminaries - * ------------- - * - * For a float16, the bit layout is: - * - * sign: 15 - * exponent: 10:14 - * mantissa: 0:9 - * - * Let f16 be a float16 value. The sign, exponent, and mantissa - * determine its value thus: - * - * if e16 = 0 and m16 = 0, then zero: (-1)^s16 * 0 (1) - * if e16 = 0 and m16!= 0, then subnormal: (-1)^s16 * 2^(e16 - 14) * (m16 / 2^10) (2) - * if 0 < e16 < 31, then normal: (-1)^s16 * 2^(e16 - 15) * (1 + m16 / 2^10) (3) - * if e16 = 31 and m16 = 0, then infinite: (-1)^s16 * inf (4) - * if e16 = 31 and m16 != 0, then NaN (5) - * - * where 0 <= m16 < 2^10. - * - * For a float32, the bit layout is: - * - * sign: 31 - * exponent: 23:30 - * mantissa: 0:22 - * - * Let f32 be a float32 value. The sign, exponent, and mantissa - * determine its value thus: - * - * if e32 = 0 and m32 = 0, then zero: (-1)^s * 0 (10) - * if e32 = 0 and m32 != 0, then subnormal: (-1)^s * 2^(e32 - 126) * (m32 / 2^23) (11) - * if 0 < e32 < 255, then normal: (-1)^s * 2^(e32 - 127) * (1 + m32 / 2^23) (12) - * if e32 = 255 and m32 = 0, then infinite: (-1)^s * inf (13) - * if e32 = 255 and m32 != 0, then NaN (14) - * - * where 0 <= m32 < 2^23. - * - * Calculation - * ----------- - * Our task is to compute s32, e32, m32 given f16. Since this function - * ignores the sign bit, assume that s32 = s16 = 0. There are several - * cases consider. - */ - - factory.emit( - - /* Case 1) f16 is zero or subnormal. - * - * The simplest method of calcuating f32 in this case is - * - * f32 = f16 (20) - * = 2^(-14) * (m16 / 2^10) (21) - * = m16 / 2^(-24) (22) - */ - - /* if (e16 == 0) { */ - if_tree(equal(e, constant(0u)), - - /* u32 = bitcast_f2u(float(m) / float(1 << 24)); */ - assign(u32, expr(ir_unop_bitcast_f2u, - div(u2f(m), constant((float)(1 << 24))))), - - /* Case 2) f16 is normal. - * - * The equation - * - * f32 = f16 (30) - * 2^(e32 - 127) * (1 + m32 / 2^23) = (31) - * 2^(e16 - 15) * (1 + m16 / 2^10) - * - * can be decomposed into two - * - * 2^(e32 - 127) = 2^(e16 - 15) (32) - * 1 + m32 / 2^23 = 1 + m16 / 2^10 (33) - * - * which solve to - * - * e32 = e16 + 112 (34) - * m32 = m16 * 2^13 (35) - */ - - /* } else if (e16 < 31)) { */ - if_tree(less(e, constant(31u << 10u)), - - /* u32 = ((e + (112 << 10)) | m) << 13; - */ - assign(u32, lshift(bit_or(add(e, constant(112u << 10u)), m), - constant(13u))), - - - /* Case 3) f16 is infinite. */ - if_tree(equal(m, constant(0u)), - - assign(u32, constant(255u << 23u)), - - /* Case 4) f16 is NaN. */ - /* } else { */ - - assign(u32, constant(0x7fffffffu)))))); - - /* } */ - - return deref(u32).val; - } - - /** - * \brief Lower an unpackHalf2x16 expression. - * - * \param uint_rval is unpackHalf2x16's input - * \return unpackHalf2x16's output as a vec2 rvalue - */ - ir_rvalue* - lower_unpack_half_2x16(ir_rvalue *uint_rval) - { - /* From page 89 (95 of pdf) of the GLSL ES 3.00 spec: - * - * mediump vec2 unpackHalf2x16 (highp uint v) - * ------------------------------------------ - * Returns a two-component floating-point vector with components - * obtained by unpacking a 32-bit unsigned integer into a pair of 16-bit - * values, interpreting those values as 16-bit floating-point numbers - * according to the OpenGL ES Specification, and converting them to - * 32-bit floating-point values. - * - * The first component of the vector is obtained from the - * 16 least-significant bits of v; the second component is obtained - * from the 16 most-significant bits of v. - */ - assert(uint_rval->type == glsl_type::uint_type); - - /* uint u = RVALUE; - * uvec2 f16 = uvec2(u.x & 0xffff, u.y >> 16); - */ - ir_variable *f16 = factory.make_temp(glsl_type::uvec2_type, - "tmp_unpack_half_2x16_f16"); - factory.emit(assign(f16, unpack_uint_to_uvec2(uint_rval))); - - /* uvec2 f32; */ - ir_variable *f32 = factory.make_temp(glsl_type::uvec2_type, - "tmp_unpack_half_2x16_f32"); - - /* Get f16's unshifted exponent bits. - * - * uvec2 e = f16 & 0x7c00u; - */ - ir_variable *e = factory.make_temp(glsl_type::uvec2_type, - "tmp_unpack_half_2x16_e"); - factory.emit(assign(e, bit_and(f16, constant(0x7c00u)))); - - /* Get f16's unshifted mantissa bits. - * - * uvec2 m = f16 & 0x03ffu; - */ - ir_variable *m = factory.make_temp(glsl_type::uvec2_type, - "tmp_unpack_half_2x16_m"); - factory.emit(assign(m, bit_and(f16, constant(0x03ffu)))); - - /* Set f32's exponent and mantissa bits. - * - * f32.x = unpack_half_1x16_nosign(e.x, m.x); - * f32.y = unpack_half_1x16_nosign(e.y, m.y); - */ - factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_x(e), - swizzle_x(m)), - WRITEMASK_X)); - factory.emit(assign(f32, unpack_half_1x16_nosign(swizzle_y(e), - swizzle_y(m)), - WRITEMASK_Y)); - - /* Set f32's sign bit. - * - * f32 |= (f16 & 0x8000u) << 16u; - */ - factory.emit(assign(f32, bit_or(f32, - lshift(bit_and(f16, - constant(0x8000u)), - constant(16u))))); - - /* return bitcast_u2f(f32); */ - ir_rvalue *result = expr(ir_unop_bitcast_u2f, f32); - assert(result->type == glsl_type::vec2_type); - return result; - } - - /** - * \brief Split unpackHalf2x16 into two operations. - * - * \param uint_rval is unpackHalf2x16's input - * \return a vec2 rvalue - * - * Some code generators, such as the i965 fragment shader, require that all - * vector expressions be lowered to a sequence of scalar expressions. - * However, unpackHalf2x16 cannot be scalarized by the same method as - * a true vector operation because the number of components of its input - * and output differ. - * - * This method scalarizes unpackHalf2x16 by transforming it from a single - * operation having vec2 output to a pair of operations each having float - * output. That is, it transforms - * - * unpackHalf2x16(UINT_RVAL) - * - * into - * - * uint u = UINT_RVAL; - * vec2 v; - * - * v.x = unpackHalf2x16_split_x(u); - * v.y = unpackHalf2x16_split_y(u); - * - * return v; - */ - ir_rvalue* - split_unpack_half_2x16(ir_rvalue *uint_rval) - { - assert(uint_rval->type == glsl_type::uint_type); - - /* uint u = uint_rval; */ - ir_variable *u = factory.make_temp(glsl_type::uint_type, - "tmp_split_unpack_half_2x16_u"); - factory.emit(assign(u, uint_rval)); - - /* vec2 v; */ - ir_variable *v = factory.make_temp(glsl_type::vec2_type, - "tmp_split_unpack_half_2x16_v"); - - /* v.x = unpack_half_2x16_split_x(u); */ - factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_x, u), - WRITEMASK_X)); - - /* v.y = unpack_half_2x16_split_y(u); */ - factory.emit(assign(v, expr(ir_unop_unpack_half_2x16_split_y, u), - WRITEMASK_Y)); - - return deref(v).val; - } -}; - -} // namespace anonymous - -/** - * \brief Lower the builtin packing functions. - * - * \param op_mask is a bitmask of `enum lower_packing_builtins_op`. - */ -bool -lower_packing_builtins(exec_list *instructions, int op_mask) -{ - lower_packing_builtins_visitor v(op_mask); - visit_list_elements(&v, instructions, true); - return v.get_progress(); -} diff --git a/src/glsl/lower_shared_reference.cpp b/src/glsl/lower_shared_reference.cpp deleted file mode 100644 index 533cd9202f4..00000000000 --- a/src/glsl/lower_shared_reference.cpp +++ /dev/null @@ -1,496 +0,0 @@ -/* - * Copyright (c) 2015 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_shared_reference.cpp - * - * IR lower pass to replace dereferences of compute shader shared variables - * with intrinsic function calls. - * - * This relieves drivers of the responsibility of allocating space for the - * shared variables in the shared memory region. - */ - -#include "lower_buffer_access.h" -#include "ir_builder.h" -#include "main/macros.h" -#include "util/list.h" -#include "glsl_parser_extras.h" - -using namespace ir_builder; - -namespace { - -struct var_offset { - struct list_head node; - const ir_variable *var; - unsigned offset; -}; - -class lower_shared_reference_visitor : - public lower_buffer_access::lower_buffer_access { -public: - - lower_shared_reference_visitor(struct gl_shader *shader) - : list_ctx(ralloc_context(NULL)), shader(shader), shared_size(0u) - { - list_inithead(&var_offsets); - } - - ~lower_shared_reference_visitor() - { - ralloc_free(list_ctx); - } - - enum { - shared_load_access, - shared_store_access, - shared_atomic_access, - } buffer_access_type; - - void insert_buffer_access(void *mem_ctx, ir_dereference *deref, - const glsl_type *type, ir_rvalue *offset, - unsigned mask, int channel); - - void handle_rvalue(ir_rvalue **rvalue); - ir_visitor_status visit_enter(ir_assignment *ir); - void handle_assignment(ir_assignment *ir); - - ir_call *lower_shared_atomic_intrinsic(ir_call *ir); - ir_call *check_for_shared_atomic_intrinsic(ir_call *ir); - ir_visitor_status visit_enter(ir_call *ir); - - unsigned get_shared_offset(const ir_variable *); - - ir_call *shared_load(void *mem_ctx, const struct glsl_type *type, - ir_rvalue *offset); - ir_call *shared_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, - unsigned write_mask); - - void *list_ctx; - struct gl_shader *shader; - struct list_head var_offsets; - unsigned shared_size; - bool progress; -}; - -unsigned -lower_shared_reference_visitor::get_shared_offset(const ir_variable *var) -{ - list_for_each_entry(var_offset, var_entry, &var_offsets, node) { - if (var_entry->var == var) - return var_entry->offset; - } - - struct var_offset *new_entry = rzalloc(list_ctx, struct var_offset); - list_add(&new_entry->node, &var_offsets); - new_entry->var = var; - - unsigned var_align = var->type->std430_base_alignment(false); - new_entry->offset = glsl_align(shared_size, var_align); - - unsigned var_size = var->type->std430_size(false); - shared_size = new_entry->offset + var_size; - - return new_entry->offset; -} - -void -lower_shared_reference_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_dereference *deref = (*rvalue)->as_dereference(); - if (!deref) - return; - - ir_variable *var = deref->variable_referenced(); - if (!var || var->data.mode != ir_var_shader_shared) - return; - - buffer_access_type = shared_load_access; - - void *mem_ctx = ralloc_parent(shader->ir); - - ir_rvalue *offset = NULL; - unsigned const_offset = get_shared_offset(var); - bool row_major; - int matrix_columns; - assert(var->get_interface_type() == NULL); - const unsigned packing = GLSL_INTERFACE_PACKING_STD430; - - setup_buffer_access(mem_ctx, var, deref, - &offset, &const_offset, - &row_major, &matrix_columns, packing); - - /* Now that we've calculated the offset to the start of the - * dereference, walk over the type and emit loads into a temporary. - */ - const glsl_type *type = (*rvalue)->type; - ir_variable *load_var = new(mem_ctx) ir_variable(type, - "shared_load_temp", - ir_var_temporary); - base_ir->insert_before(load_var); - - ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, - "shared_load_temp_offset", - ir_var_temporary); - base_ir->insert_before(load_offset); - base_ir->insert_before(assign(load_offset, offset)); - - deref = new(mem_ctx) ir_dereference_variable(load_var); - - emit_access(mem_ctx, false, deref, load_offset, const_offset, row_major, - matrix_columns, packing, 0); - - *rvalue = deref; - - progress = true; -} - -void -lower_shared_reference_visitor::handle_assignment(ir_assignment *ir) -{ - if (!ir || !ir->lhs) - return; - - ir_rvalue *rvalue = ir->lhs->as_rvalue(); - if (!rvalue) - return; - - ir_dereference *deref = ir->lhs->as_dereference(); - if (!deref) - return; - - ir_variable *var = ir->lhs->variable_referenced(); - if (!var || var->data.mode != ir_var_shader_shared) - return; - - buffer_access_type = shared_store_access; - - /* We have a write to a shared variable, so declare a temporary and rewrite - * the assignment so that the temporary is the LHS. - */ - void *mem_ctx = ralloc_parent(shader->ir); - - const glsl_type *type = rvalue->type; - ir_variable *store_var = new(mem_ctx) ir_variable(type, - "shared_store_temp", - ir_var_temporary); - base_ir->insert_before(store_var); - ir->lhs = new(mem_ctx) ir_dereference_variable(store_var); - - ir_rvalue *offset = NULL; - unsigned const_offset = get_shared_offset(var); - bool row_major; - int matrix_columns; - assert(var->get_interface_type() == NULL); - const unsigned packing = GLSL_INTERFACE_PACKING_STD430; - - setup_buffer_access(mem_ctx, var, deref, - &offset, &const_offset, - &row_major, &matrix_columns, packing); - - deref = new(mem_ctx) ir_dereference_variable(store_var); - - ir_variable *store_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, - "shared_store_temp_offset", - ir_var_temporary); - base_ir->insert_before(store_offset); - base_ir->insert_before(assign(store_offset, offset)); - - /* Now we have to write the value assigned to the temporary back to memory */ - emit_access(mem_ctx, true, deref, store_offset, const_offset, row_major, - matrix_columns, packing, ir->write_mask); - - progress = true; -} - -ir_visitor_status -lower_shared_reference_visitor::visit_enter(ir_assignment *ir) -{ - handle_assignment(ir); - return rvalue_visit(ir); -} - -void -lower_shared_reference_visitor::insert_buffer_access(void *mem_ctx, - ir_dereference *deref, - const glsl_type *type, - ir_rvalue *offset, - unsigned mask, - int channel) -{ - if (buffer_access_type == shared_store_access) { - ir_call *store = shared_store(mem_ctx, deref, offset, mask); - base_ir->insert_after(store); - } else { - ir_call *load = shared_load(mem_ctx, type, offset); - base_ir->insert_before(load); - ir_rvalue *value = load->return_deref->as_rvalue()->clone(mem_ctx, NULL); - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), - value)); - } -} - -static bool -compute_shader_enabled(const _mesa_glsl_parse_state *state) -{ - return state->stage == MESA_SHADER_COMPUTE; -} - -ir_call * -lower_shared_reference_visitor::shared_store(void *mem_ctx, - ir_rvalue *deref, - ir_rvalue *offset, - unsigned write_mask) -{ - exec_list sig_params; - - ir_variable *offset_ref = new(mem_ctx) - ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); - sig_params.push_tail(offset_ref); - - ir_variable *val_ref = new(mem_ctx) - ir_variable(deref->type, "value" , ir_var_function_in); - sig_params.push_tail(val_ref); - - ir_variable *writemask_ref = new(mem_ctx) - ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); - sig_params.push_tail(writemask_ref); - - ir_function_signature *sig = new(mem_ctx) - ir_function_signature(glsl_type::void_type, compute_shader_enabled); - assert(sig); - sig->replace_parameters(&sig_params); - sig->is_intrinsic = true; - - ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_shared"); - f->add_signature(sig); - - exec_list call_params; - call_params.push_tail(offset->clone(mem_ctx, NULL)); - call_params.push_tail(deref->clone(mem_ctx, NULL)); - call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); - return new(mem_ctx) ir_call(sig, NULL, &call_params); -} - -ir_call * -lower_shared_reference_visitor::shared_load(void *mem_ctx, - const struct glsl_type *type, - ir_rvalue *offset) -{ - exec_list sig_params; - - ir_variable *offset_ref = new(mem_ctx) - ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); - sig_params.push_tail(offset_ref); - - ir_function_signature *sig = - new(mem_ctx) ir_function_signature(type, compute_shader_enabled); - assert(sig); - sig->replace_parameters(&sig_params); - sig->is_intrinsic = true; - - ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_shared"); - f->add_signature(sig); - - ir_variable *result = new(mem_ctx) - ir_variable(type, "shared_load_result", ir_var_temporary); - base_ir->insert_before(result); - ir_dereference_variable *deref_result = new(mem_ctx) - ir_dereference_variable(result); - - exec_list call_params; - call_params.push_tail(offset->clone(mem_ctx, NULL)); - - return new(mem_ctx) ir_call(sig, deref_result, &call_params); -} - -/* Lowers the intrinsic call to a new internal intrinsic that swaps the access - * to the shared variable in the first parameter by an offset. This involves - * creating the new internal intrinsic (i.e. the new function signature). - */ -ir_call * -lower_shared_reference_visitor::lower_shared_atomic_intrinsic(ir_call *ir) -{ - /* Shared atomics usually have 2 parameters, the shared variable and an - * integer argument. The exception is CompSwap, that has an additional - * integer parameter. - */ - int param_count = ir->actual_parameters.length(); - assert(param_count == 2 || param_count == 3); - - /* First argument must be a scalar integer shared variable */ - exec_node *param = ir->actual_parameters.get_head(); - ir_instruction *inst = (ir_instruction *) param; - assert(inst->ir_type == ir_type_dereference_variable || - inst->ir_type == ir_type_dereference_array || - inst->ir_type == ir_type_dereference_record || - inst->ir_type == ir_type_swizzle); - - ir_rvalue *deref = (ir_rvalue *) inst; - assert(deref->type->is_scalar() && deref->type->is_integer()); - - ir_variable *var = deref->variable_referenced(); - assert(var); - - /* Compute the offset to the start if the dereference - */ - void *mem_ctx = ralloc_parent(shader->ir); - - ir_rvalue *offset = NULL; - unsigned const_offset = get_shared_offset(var); - bool row_major; - int matrix_columns; - assert(var->get_interface_type() == NULL); - const unsigned packing = GLSL_INTERFACE_PACKING_STD430; - buffer_access_type = shared_atomic_access; - - setup_buffer_access(mem_ctx, var, deref, - &offset, &const_offset, - &row_major, &matrix_columns, packing); - - assert(offset); - assert(!row_major); - assert(matrix_columns == 1); - - ir_rvalue *deref_offset = - add(offset, new(mem_ctx) ir_constant(const_offset)); - - /* Create the new internal function signature that will take an offset - * instead of a shared variable - */ - exec_list sig_params; - ir_variable *sig_param = new(mem_ctx) - ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); - sig_params.push_tail(sig_param); - - const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? - glsl_type::int_type : glsl_type::uint_type; - sig_param = new(mem_ctx) - ir_variable(type, "data1", ir_var_function_in); - sig_params.push_tail(sig_param); - - if (param_count == 3) { - sig_param = new(mem_ctx) - ir_variable(type, "data2", ir_var_function_in); - sig_params.push_tail(sig_param); - } - - ir_function_signature *sig = - new(mem_ctx) ir_function_signature(deref->type, - compute_shader_enabled); - assert(sig); - sig->replace_parameters(&sig_params); - sig->is_intrinsic = true; - - char func_name[64]; - sprintf(func_name, "%s_shared", ir->callee_name()); - ir_function *f = new(mem_ctx) ir_function(func_name); - f->add_signature(sig); - - /* Now, create the call to the internal intrinsic */ - exec_list call_params; - call_params.push_tail(deref_offset); - param = ir->actual_parameters.get_head()->get_next(); - ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); - call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); - if (param_count == 3) { - param = param->get_next(); - param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); - call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); - } - ir_dereference_variable *return_deref = - ir->return_deref->clone(mem_ctx, NULL); - return new(mem_ctx) ir_call(sig, return_deref, &call_params); -} - -ir_call * -lower_shared_reference_visitor::check_for_shared_atomic_intrinsic(ir_call *ir) -{ - exec_list& params = ir->actual_parameters; - - if (params.length() < 2 || params.length() > 3) - return ir; - - ir_rvalue *rvalue = - ((ir_instruction *) params.get_head())->as_rvalue(); - if (!rvalue) - return ir; - - ir_variable *var = rvalue->variable_referenced(); - if (!var || var->data.mode != ir_var_shader_shared) - return ir; - - const char *callee = ir->callee_name(); - if (!strcmp("__intrinsic_atomic_add", callee) || - !strcmp("__intrinsic_atomic_min", callee) || - !strcmp("__intrinsic_atomic_max", callee) || - !strcmp("__intrinsic_atomic_and", callee) || - !strcmp("__intrinsic_atomic_or", callee) || - !strcmp("__intrinsic_atomic_xor", callee) || - !strcmp("__intrinsic_atomic_exchange", callee) || - !strcmp("__intrinsic_atomic_comp_swap", callee)) { - return lower_shared_atomic_intrinsic(ir); - } - - return ir; -} - -ir_visitor_status -lower_shared_reference_visitor::visit_enter(ir_call *ir) -{ - ir_call *new_ir = check_for_shared_atomic_intrinsic(ir); - if (new_ir != ir) { - progress = true; - base_ir->replace_with(new_ir); - return visit_continue_with_parent; - } - - return rvalue_visit(ir); -} - -} /* unnamed namespace */ - -void -lower_shared_reference(struct gl_shader *shader, unsigned *shared_size) -{ - if (shader->Stage != MESA_SHADER_COMPUTE) - return; - - lower_shared_reference_visitor v(shader); - - /* Loop over the instructions lowering references, because we take a deref - * of an shared variable array using a shared variable dereference as the - * index will produce a collection of instructions all of which have cloned - * shared variable dereferences for that array index. - */ - do { - v.progress = false; - visit_list_elements(&v, shader->ir); - } while (v.progress); - - *shared_size = v.shared_size; -} diff --git a/src/glsl/lower_subroutine.cpp b/src/glsl/lower_subroutine.cpp deleted file mode 100644 index e80c1be768a..00000000000 --- a/src/glsl/lower_subroutine.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright © 2015 Red Hat - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_subroutine.cpp - * - * lowers subroutines to an if ladder. - */ - -#include "compiler/glsl_types.h" -#include "glsl_parser_extras.h" -#include "ir.h" -#include "ir_builder.h" - -using namespace ir_builder; -namespace { - -class lower_subroutine_visitor : public ir_hierarchical_visitor { -public: - lower_subroutine_visitor(struct _mesa_glsl_parse_state *state) - : state(state) - { - this->progress = false; - } - - ir_visitor_status visit_leave(ir_call *); - ir_call *call_clone(ir_call *call, ir_function_signature *callee); - bool progress; - struct _mesa_glsl_parse_state *state; -}; - -} - -bool -lower_subroutine(exec_list *instructions, struct _mesa_glsl_parse_state *state) -{ - lower_subroutine_visitor v(state); - visit_list_elements(&v, instructions); - return v.progress; -} - -ir_call * -lower_subroutine_visitor::call_clone(ir_call *call, ir_function_signature *callee) -{ - void *mem_ctx = ralloc_parent(call); - ir_dereference_variable *new_return_ref = NULL; - if (call->return_deref != NULL) - new_return_ref = call->return_deref->clone(mem_ctx, NULL); - - exec_list new_parameters; - - foreach_in_list(ir_instruction, ir, &call->actual_parameters) { - new_parameters.push_tail(ir->clone(mem_ctx, NULL)); - } - - return new(mem_ctx) ir_call(callee, new_return_ref, &new_parameters); -} - -ir_visitor_status -lower_subroutine_visitor::visit_leave(ir_call *ir) -{ - if (!ir->sub_var) - return visit_continue; - - void *mem_ctx = ralloc_parent(ir); - ir_if *last_branch = NULL; - - for (int s = this->state->num_subroutines - 1; s >= 0; s--) { - ir_rvalue *var; - ir_constant *lc = new(mem_ctx)ir_constant(s); - ir_function *fn = this->state->subroutines[s]; - bool is_compat = false; - - for (int i = 0; i < fn->num_subroutine_types; i++) { - if (ir->sub_var->type->without_array() == fn->subroutine_types[i]) { - is_compat = true; - break; - } - } - if (is_compat == false) - continue; - - if (ir->array_idx != NULL) - var = ir->array_idx->clone(mem_ctx, NULL); - else - var = new(mem_ctx) ir_dereference_variable(ir->sub_var); - - ir_function_signature *sub_sig = - fn->exact_matching_signature(this->state, - &ir->actual_parameters); - - ir_call *new_call = call_clone(ir, sub_sig); - if (!last_branch) - last_branch = if_tree(equal(subr_to_int(var), lc), new_call); - else - last_branch = if_tree(equal(subr_to_int(var), lc), new_call, last_branch); - } - if (last_branch) - ir->insert_before(last_branch); - ir->remove(); - - return visit_continue; -} diff --git a/src/glsl/lower_tess_level.cpp b/src/glsl/lower_tess_level.cpp deleted file mode 100644 index bed2553222f..00000000000 --- a/src/glsl/lower_tess_level.cpp +++ /dev/null @@ -1,459 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_tess_level.cpp - * - * This pass accounts for the difference between the way gl_TessLevelOuter - * and gl_TessLevelInner is declared in standard GLSL (as an array of - * floats), and the way it is frequently implemented in hardware (as a vec4 - * and vec2). - * - * The declaration of gl_TessLevel* is replaced with a declaration - * of gl_TessLevel*MESA, and any references to gl_TessLevel* are - * translated to refer to gl_TessLevel*MESA with the appropriate - * swizzling of array indices. For instance: - * - * gl_TessLevelOuter[i] - * - * is translated into: - * - * gl_TessLevelOuterMESA[i] - * - * Since some hardware may not internally represent gl_TessLevel* as a pair - * of vec4's, this lowering pass is optional. To enable it, set the - * LowerTessLevel flag in gl_shader_compiler_options to true. - */ - -#include "glsl_symbol_table.h" -#include "ir_rvalue_visitor.h" -#include "ir.h" -#include "program/prog_instruction.h" /* For WRITEMASK_* */ - -namespace { - -class lower_tess_level_visitor : public ir_rvalue_visitor { -public: - explicit lower_tess_level_visitor(gl_shader_stage shader_stage) - : progress(false), old_tess_level_outer_var(NULL), - old_tess_level_inner_var(NULL), new_tess_level_outer_var(NULL), - new_tess_level_inner_var(NULL), shader_stage(shader_stage) - { - } - - virtual ir_visitor_status visit(ir_variable *); - bool is_tess_level_array(ir_rvalue *ir); - ir_rvalue *lower_tess_level_array(ir_rvalue *ir); - virtual ir_visitor_status visit_leave(ir_assignment *); - void visit_new_assignment(ir_assignment *ir); - virtual ir_visitor_status visit_leave(ir_call *); - - virtual void handle_rvalue(ir_rvalue **rvalue); - - void fix_lhs(ir_assignment *); - - bool progress; - - /** - * Pointer to the declaration of gl_TessLevel*, if found. - */ - ir_variable *old_tess_level_outer_var; - ir_variable *old_tess_level_inner_var; - - /** - * Pointer to the newly-created gl_TessLevel*MESA variables. - */ - ir_variable *new_tess_level_outer_var; - ir_variable *new_tess_level_inner_var; - - /** - * Type of shader we are compiling (e.g. MESA_SHADER_TESS_CTRL) - */ - const gl_shader_stage shader_stage; -}; - -} /* anonymous namespace */ - -/** - * Replace any declaration of gl_TessLevel* as an array of floats with a - * declaration of gl_TessLevel*MESA as a vec4. - */ -ir_visitor_status -lower_tess_level_visitor::visit(ir_variable *ir) -{ - if ((!ir->name) || - ((strcmp(ir->name, "gl_TessLevelInner") != 0) && - (strcmp(ir->name, "gl_TessLevelOuter") != 0))) - return visit_continue; - - assert (ir->type->is_array()); - - if (strcmp(ir->name, "gl_TessLevelOuter") == 0) { - if (this->old_tess_level_outer_var) - return visit_continue; - - old_tess_level_outer_var = ir; - assert(ir->type->fields.array == glsl_type::float_type); - - /* Clone the old var so that we inherit all of its properties */ - new_tess_level_outer_var = ir->clone(ralloc_parent(ir), NULL); - - /* And change the properties that we need to change */ - new_tess_level_outer_var->name = ralloc_strdup(new_tess_level_outer_var, - "gl_TessLevelOuterMESA"); - new_tess_level_outer_var->type = glsl_type::vec4_type; - new_tess_level_outer_var->data.max_array_access = 0; - - ir->replace_with(new_tess_level_outer_var); - } else if (strcmp(ir->name, "gl_TessLevelInner") == 0) { - if (this->old_tess_level_inner_var) - return visit_continue; - - old_tess_level_inner_var = ir; - assert(ir->type->fields.array == glsl_type::float_type); - - /* Clone the old var so that we inherit all of its properties */ - new_tess_level_inner_var = ir->clone(ralloc_parent(ir), NULL); - - /* And change the properties that we need to change */ - new_tess_level_inner_var->name = ralloc_strdup(new_tess_level_inner_var, - "gl_TessLevelInnerMESA"); - new_tess_level_inner_var->type = glsl_type::vec2_type; - new_tess_level_inner_var->data.max_array_access = 0; - - ir->replace_with(new_tess_level_inner_var); - } else { - assert(0); - } - - this->progress = true; - - return visit_continue; -} - - -/** - * Determine whether the given rvalue describes an array of floats that - * needs to be lowered to a vec4; that is, determine whether it - * matches one of the following patterns: - * - * - gl_TessLevelOuter - * - gl_TessLevelInner - */ -bool -lower_tess_level_visitor::is_tess_level_array(ir_rvalue *ir) -{ - if (!ir->type->is_array()) - return false; - if (ir->type->fields.array != glsl_type::float_type) - return false; - - if (this->old_tess_level_outer_var) { - if (ir->variable_referenced() == this->old_tess_level_outer_var) - return true; - } - if (this->old_tess_level_inner_var) { - if (ir->variable_referenced() == this->old_tess_level_inner_var) - return true; - } - return false; -} - - -/** - * If the given ir satisfies is_tess_level_array(), return new ir - * representing its lowered equivalent. That is, map: - * - * - gl_TessLevelOuter => gl_TessLevelOuterMESA - * - gl_TessLevelInner => gl_TessLevelInnerMESA - * - * Otherwise return NULL. - */ -ir_rvalue * -lower_tess_level_visitor::lower_tess_level_array(ir_rvalue *ir) -{ - if (!ir->type->is_array()) - return NULL; - if (ir->type->fields.array != glsl_type::float_type) - return NULL; - - ir_variable **new_var = NULL; - - if (this->old_tess_level_outer_var) { - if (ir->variable_referenced() == this->old_tess_level_outer_var) - new_var = &this->new_tess_level_outer_var; - } - if (this->old_tess_level_inner_var) { - if (ir->variable_referenced() == this->old_tess_level_inner_var) - new_var = &this->new_tess_level_inner_var; - } - - if (new_var == NULL) - return NULL; - - assert(ir->as_dereference_variable()); - return new(ralloc_parent(ir)) ir_dereference_variable(*new_var); -} - - -void -lower_tess_level_visitor::handle_rvalue(ir_rvalue **rv) -{ - if (*rv == NULL) - return; - - ir_dereference_array *const array_deref = (*rv)->as_dereference_array(); - if (array_deref == NULL) - return; - - /* Replace any expression that indexes one of the floats in gl_TessLevel* - * with an expression that indexes into one of the vec4's - * gl_TessLevel*MESA and accesses the appropriate component. - */ - ir_rvalue *lowered_vec4 = - this->lower_tess_level_array(array_deref->array); - if (lowered_vec4 != NULL) { - this->progress = true; - void *mem_ctx = ralloc_parent(array_deref); - - ir_expression *const expr = - new(mem_ctx) ir_expression(ir_binop_vector_extract, - lowered_vec4, - array_deref->array_index); - - *rv = expr; - } -} - -void -lower_tess_level_visitor::fix_lhs(ir_assignment *ir) -{ - if (ir->lhs->ir_type != ir_type_expression) - return; - void *mem_ctx = ralloc_parent(ir); - ir_expression *const expr = (ir_expression *) ir->lhs; - - /* The expression must be of the form: - * - * (vector_extract gl_TessLevel*MESA, j). - */ - assert(expr->operation == ir_binop_vector_extract); - assert(expr->operands[0]->ir_type == ir_type_dereference_variable); - assert((expr->operands[0]->type == glsl_type::vec4_type) || - (expr->operands[0]->type == glsl_type::vec2_type)); - - ir_dereference *const new_lhs = (ir_dereference *) expr->operands[0]; - - ir_constant *old_index_constant = expr->operands[1]->constant_expression_value(); - if (!old_index_constant) { - ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, - expr->operands[0]->type, - new_lhs->clone(mem_ctx, NULL), - ir->rhs, - expr->operands[1]); - } - ir->set_lhs(new_lhs); - - if (old_index_constant) { - /* gl_TessLevel* is being accessed via a constant index. Don't bother - * creating a vector insert op. Just use a write mask. - */ - ir->write_mask = 1 << old_index_constant->get_int_component(0); - } else { - ir->write_mask = (1 << expr->operands[0]->type->vector_elements) - 1; - } -} - -/** - * Replace any assignment having a gl_TessLevel* (undereferenced) as - * its LHS or RHS with a sequence of assignments, one for each component of - * the array. Each of these assignments is lowered to refer to - * gl_TessLevel*MESA as appropriate. - */ -ir_visitor_status -lower_tess_level_visitor::visit_leave(ir_assignment *ir) -{ - /* First invoke the base class visitor. This causes handle_rvalue() to be - * called on ir->rhs and ir->condition. - */ - ir_rvalue_visitor::visit_leave(ir); - - if (this->is_tess_level_array(ir->lhs) || - this->is_tess_level_array(ir->rhs)) { - /* LHS or RHS of the assignment is the entire gl_TessLevel* array. - * Since we are - * reshaping gl_TessLevel* from an array of floats to a - * vec4, this isn't going to work as a bulk assignment anymore, so - * unroll it to element-by-element assignments and lower each of them. - * - * Note: to unroll into element-by-element assignments, we need to make - * clones of the LHS and RHS. This is safe because expressions and - * l-values are side-effect free. - */ - void *ctx = ralloc_parent(ir); - int array_size = ir->lhs->type->array_size(); - for (int i = 0; i < array_size; ++i) { - ir_dereference_array *new_lhs = new(ctx) ir_dereference_array( - ir->lhs->clone(ctx, NULL), new(ctx) ir_constant(i)); - ir_dereference_array *new_rhs = new(ctx) ir_dereference_array( - ir->rhs->clone(ctx, NULL), new(ctx) ir_constant(i)); - this->handle_rvalue((ir_rvalue **) &new_rhs); - - /* Handle the LHS after creating the new assignment. This must - * happen in this order because handle_rvalue may replace the old LHS - * with an ir_expression of ir_binop_vector_extract. Since this is - * not a valide l-value, this will cause an assertion in the - * ir_assignment constructor to fail. - * - * If this occurs, replace the mangled LHS with a dereference of the - * vector, and replace the RHS with an ir_triop_vector_insert. - */ - ir_assignment *const assign = new(ctx) ir_assignment(new_lhs, new_rhs); - this->handle_rvalue((ir_rvalue **) &assign->lhs); - this->fix_lhs(assign); - - this->base_ir->insert_before(assign); - } - ir->remove(); - - return visit_continue; - } - - /* Handle the LHS as if it were an r-value. Normally - * rvalue_visit(ir_assignment *) only visits the RHS, but we need to lower - * expressions in the LHS as well. - * - * This may cause the LHS to get replaced with an ir_expression of - * ir_binop_vector_extract. If this occurs, replace it with a dereference - * of the vector, and replace the RHS with an ir_triop_vector_insert. - */ - handle_rvalue((ir_rvalue **)&ir->lhs); - this->fix_lhs(ir); - - return rvalue_visit(ir); -} - - -/** - * Set up base_ir properly and call visit_leave() on a newly created - * ir_assignment node. This is used in cases where we have to insert an - * ir_assignment in a place where we know the hierarchical visitor won't see - * it. - */ -void -lower_tess_level_visitor::visit_new_assignment(ir_assignment *ir) -{ - ir_instruction *old_base_ir = this->base_ir; - this->base_ir = ir; - ir->accept(this); - this->base_ir = old_base_ir; -} - - -/** - * If a gl_TessLevel* variable appears as an argument in an ir_call - * expression, replace it with a temporary variable, and make sure the ir_call - * is preceded and/or followed by assignments that copy the contents of the - * temporary variable to and/or from gl_TessLevel*. Each of these - * assignments is then lowered to refer to gl_TessLevel*MESA. - */ -ir_visitor_status -lower_tess_level_visitor::visit_leave(ir_call *ir) -{ - void *ctx = ralloc_parent(ir); - - const exec_node *formal_param_node = ir->callee->parameters.head; - const exec_node *actual_param_node = ir->actual_parameters.head; - while (!actual_param_node->is_tail_sentinel()) { - ir_variable *formal_param = (ir_variable *) formal_param_node; - ir_rvalue *actual_param = (ir_rvalue *) actual_param_node; - - /* Advance formal_param_node and actual_param_node now so that we can - * safely replace actual_param with another node, if necessary, below. - */ - formal_param_node = formal_param_node->next; - actual_param_node = actual_param_node->next; - - if (!this->is_tess_level_array(actual_param)) - continue; - - /* User is trying to pass a whole gl_TessLevel* array to a function - * call. Since we are reshaping gl_TessLevel* from an array of floats - * to a vec4, this isn't going to work anymore, so use a temporary - * array instead. - */ - ir_variable *temp = new(ctx) ir_variable( - actual_param->type, "temp_tess_level", ir_var_temporary); - this->base_ir->insert_before(temp); - actual_param->replace_with( - new(ctx) ir_dereference_variable(temp)); - if (formal_param->data.mode == ir_var_function_in - || formal_param->data.mode == ir_var_function_inout) { - /* Copy from gl_TessLevel* to the temporary before the call. - * Since we are going to insert this copy before the current - * instruction, we need to visit it afterwards to make sure it - * gets lowered. - */ - ir_assignment *new_assignment = new(ctx) ir_assignment( - new(ctx) ir_dereference_variable(temp), - actual_param->clone(ctx, NULL)); - this->base_ir->insert_before(new_assignment); - this->visit_new_assignment(new_assignment); - } - if (formal_param->data.mode == ir_var_function_out - || formal_param->data.mode == ir_var_function_inout) { - /* Copy from the temporary to gl_TessLevel* after the call. - * Since visit_list_elements() has already decided which - * instruction it's going to visit next, we need to visit - * afterwards to make sure it gets lowered. - */ - ir_assignment *new_assignment = new(ctx) ir_assignment( - actual_param->clone(ctx, NULL), - new(ctx) ir_dereference_variable(temp)); - this->base_ir->insert_after(new_assignment); - this->visit_new_assignment(new_assignment); - } - } - - return rvalue_visit(ir); -} - - -bool -lower_tess_level(gl_shader *shader) -{ - if ((shader->Stage != MESA_SHADER_TESS_CTRL) && - (shader->Stage != MESA_SHADER_TESS_EVAL)) - return false; - - lower_tess_level_visitor v(shader->Stage); - - visit_list_elements(&v, shader->ir); - - if (v.new_tess_level_outer_var) - shader->symbols->add_variable(v.new_tess_level_outer_var); - if (v.new_tess_level_inner_var) - shader->symbols->add_variable(v.new_tess_level_inner_var); - - return v.progress; -} diff --git a/src/glsl/lower_texture_projection.cpp b/src/glsl/lower_texture_projection.cpp deleted file mode 100644 index 95df106d93f..00000000000 --- a/src/glsl/lower_texture_projection.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_texture_projection.cpp - * - * IR lower pass to perform the division of texture coordinates by the texture - * projector if present. - * - * Many GPUs have a texture sampling opcode that takes the projector - * and does the divide internally, thus the presence of the projector - * in the IR. For GPUs that don't, this saves the driver needing the - * logic for handling the divide. - * - * \author Eric Anholt - */ - -#include "ir.h" - -namespace { - -class lower_texture_projection_visitor : public ir_hierarchical_visitor { -public: - lower_texture_projection_visitor() - { - progress = false; - } - - ir_visitor_status visit_leave(ir_texture *ir); - - bool progress; -}; - -} /* anonymous namespace */ - -ir_visitor_status -lower_texture_projection_visitor::visit_leave(ir_texture *ir) -{ - if (!ir->projector) - return visit_continue; - - void *mem_ctx = ralloc_parent(ir); - - ir_variable *var = new(mem_ctx) ir_variable(ir->projector->type, - "projector", ir_var_temporary); - base_ir->insert_before(var); - ir_dereference *deref = new(mem_ctx) ir_dereference_variable(var); - ir_expression *expr = new(mem_ctx) ir_expression(ir_unop_rcp, - ir->projector->type, - ir->projector, - NULL); - ir_assignment *assign = new(mem_ctx) ir_assignment(deref, expr, NULL); - base_ir->insert_before(assign); - - deref = new(mem_ctx) ir_dereference_variable(var); - ir->coordinate = new(mem_ctx) ir_expression(ir_binop_mul, - ir->coordinate->type, - ir->coordinate, - deref); - - if (ir->shadow_comparitor) { - deref = new(mem_ctx) ir_dereference_variable(var); - ir->shadow_comparitor = new(mem_ctx) ir_expression(ir_binop_mul, - ir->shadow_comparitor->type, - ir->shadow_comparitor, - deref); - } - - ir->projector = NULL; - - progress = true; - return visit_continue; -} - -bool -do_lower_texture_projection(exec_list *instructions) -{ - lower_texture_projection_visitor v; - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp deleted file mode 100644 index a172054bac8..00000000000 --- a/src/glsl/lower_ubo_reference.cpp +++ /dev/null @@ -1,1042 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_ubo_reference.cpp - * - * IR lower pass to replace dereferences of variables in a uniform - * buffer object with usage of ir_binop_ubo_load expressions, each of - * which can read data up to the size of a vec4. - * - * This relieves drivers of the responsibility to deal with tricky UBO - * layout issues like std140 structures and row_major matrices on - * their own. - */ - -#include "lower_buffer_access.h" -#include "ir_builder.h" -#include "main/macros.h" -#include "glsl_parser_extras.h" - -using namespace ir_builder; - -namespace { -class lower_ubo_reference_visitor : - public lower_buffer_access::lower_buffer_access { -public: - lower_ubo_reference_visitor(struct gl_shader *shader) - : shader(shader) - { - } - - void handle_rvalue(ir_rvalue **rvalue); - ir_visitor_status visit_enter(ir_assignment *ir); - - void setup_for_load_or_store(void *mem_ctx, - ir_variable *var, - ir_rvalue *deref, - ir_rvalue **offset, - unsigned *const_offset, - bool *row_major, - int *matrix_columns, - unsigned packing); - ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, - ir_rvalue *offset); - ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, - ir_rvalue *offset); - - bool check_for_buffer_array_copy(ir_assignment *ir); - bool check_for_buffer_struct_copy(ir_assignment *ir); - void check_for_ssbo_store(ir_assignment *ir); - void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, - ir_variable *write_var, unsigned write_mask); - ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, - unsigned write_mask); - - enum { - ubo_load_access, - ssbo_load_access, - ssbo_store_access, - ssbo_unsized_array_length_access, - ssbo_atomic_access, - } buffer_access_type; - - void insert_buffer_access(void *mem_ctx, ir_dereference *deref, - const glsl_type *type, ir_rvalue *offset, - unsigned mask, int channel); - - ir_visitor_status visit_enter(class ir_expression *); - ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); - void check_ssbo_unsized_array_length_expression(class ir_expression *); - void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); - - ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, - ir_dereference *, - ir_variable *); - ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); - - unsigned calculate_unsized_array_stride(ir_dereference *deref, - unsigned packing); - - ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir); - ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); - ir_visitor_status visit_enter(ir_call *ir); - - struct gl_shader *shader; - struct gl_uniform_buffer_variable *ubo_var; - ir_rvalue *uniform_block; - bool progress; -}; - -/** - * Determine the name of the interface block field - * - * This is the name of the specific member as it would appear in the - * \c gl_uniform_buffer_variable::Name field in the shader's - * \c UniformBlocks array. - */ -static const char * -interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, - ir_rvalue **nonconst_block_index) -{ - *nonconst_block_index = NULL; - char *name_copy = NULL; - size_t base_length = 0; - - /* Loop back through the IR until we find the uniform block */ - ir_rvalue *ir = d; - while (ir != NULL) { - switch (ir->ir_type) { - case ir_type_dereference_variable: { - /* Exit loop */ - ir = NULL; - break; - } - - case ir_type_dereference_record: { - ir_dereference_record *r = (ir_dereference_record *) ir; - ir = r->record->as_dereference(); - - /* If we got here it means any previous array subscripts belong to - * block members and not the block itself so skip over them in the - * next pass. - */ - d = ir; - break; - } - - case ir_type_dereference_array: { - ir_dereference_array *a = (ir_dereference_array *) ir; - ir = a->array->as_dereference(); - break; - } - - case ir_type_swizzle: { - ir_swizzle *s = (ir_swizzle *) ir; - ir = s->val->as_dereference(); - /* Skip swizzle in the next pass */ - d = ir; - break; - } - - default: - assert(!"Should not get here."); - break; - } - } - - while (d != NULL) { - switch (d->ir_type) { - case ir_type_dereference_variable: { - ir_dereference_variable *v = (ir_dereference_variable *) d; - if (name_copy != NULL && - v->var->is_interface_instance() && - v->var->type->is_array()) { - return name_copy; - } else { - *nonconst_block_index = NULL; - return base_name; - } - - break; - } - - case ir_type_dereference_array: { - ir_dereference_array *a = (ir_dereference_array *) d; - size_t new_length; - - if (name_copy == NULL) { - name_copy = ralloc_strdup(mem_ctx, base_name); - base_length = strlen(name_copy); - } - - /* For arrays of arrays we start at the innermost array and work our - * way out so we need to insert the subscript at the base of the - * name string rather than just attaching it to the end. - */ - new_length = base_length; - ir_constant *const_index = a->array_index->as_constant(); - char *end = ralloc_strdup(NULL, &name_copy[new_length]); - if (!const_index) { - ir_rvalue *array_index = a->array_index; - if (array_index->type != glsl_type::uint_type) - array_index = i2u(array_index); - - if (a->array->type->is_array() && - a->array->type->fields.array->is_array()) { - ir_constant *base_size = new(mem_ctx) - ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); - array_index = mul(array_index, base_size); - } - - if (*nonconst_block_index) { - *nonconst_block_index = add(*nonconst_block_index, array_index); - } else { - *nonconst_block_index = array_index; - } - - ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", - end); - } else { - ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", - const_index->get_uint_component(0), - end); - } - ralloc_free(end); - - d = a->array->as_dereference(); - - break; - } - - default: - assert(!"Should not get here."); - break; - } - } - - assert(!"Should not get here."); - return NULL; -} - -void -lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, - ir_variable *var, - ir_rvalue *deref, - ir_rvalue **offset, - unsigned *const_offset, - bool *row_major, - int *matrix_columns, - unsigned packing) -{ - /* Determine the name of the interface block */ - ir_rvalue *nonconst_block_index; - const char *const field_name = - interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, - deref, &nonconst_block_index); - - /* Locate the block by interface name */ - unsigned num_blocks; - struct gl_uniform_block **blocks; - if (this->buffer_access_type != ubo_load_access) { - num_blocks = shader->NumShaderStorageBlocks; - blocks = shader->ShaderStorageBlocks; - } else { - num_blocks = shader->NumUniformBlocks; - blocks = shader->UniformBlocks; - } - this->uniform_block = NULL; - for (unsigned i = 0; i < num_blocks; i++) { - if (strcmp(field_name, blocks[i]->Name) == 0) { - - ir_constant *index = new(mem_ctx) ir_constant(i); - - if (nonconst_block_index) { - this->uniform_block = add(nonconst_block_index, index); - } else { - this->uniform_block = index; - } - - this->ubo_var = var->is_interface_instance() - ? &blocks[i]->Uniforms[0] : &blocks[i]->Uniforms[var->data.location]; - - break; - } - } - - assert(this->uniform_block); - - *const_offset = ubo_var->Offset; - - setup_buffer_access(mem_ctx, var, deref, offset, const_offset, row_major, - matrix_columns, packing); -} - -void -lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_dereference *deref = (*rvalue)->as_dereference(); - if (!deref) - return; - - ir_variable *var = deref->variable_referenced(); - if (!var || !var->is_in_buffer_block()) - return; - - void *mem_ctx = ralloc_parent(shader->ir); - - ir_rvalue *offset = NULL; - unsigned const_offset; - bool row_major; - int matrix_columns; - unsigned packing = var->get_interface_type()->interface_packing; - - this->buffer_access_type = - var->is_in_shader_storage_block() ? - ssbo_load_access : ubo_load_access; - - /* Compute the offset to the start if the dereference as well as other - * information we need to configure the write - */ - setup_for_load_or_store(mem_ctx, var, deref, - &offset, &const_offset, - &row_major, &matrix_columns, - packing); - assert(offset); - - /* Now that we've calculated the offset to the start of the - * dereference, walk over the type and emit loads into a temporary. - */ - const glsl_type *type = (*rvalue)->type; - ir_variable *load_var = new(mem_ctx) ir_variable(type, - "ubo_load_temp", - ir_var_temporary); - base_ir->insert_before(load_var); - - ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, - "ubo_load_temp_offset", - ir_var_temporary); - base_ir->insert_before(load_offset); - base_ir->insert_before(assign(load_offset, offset)); - - deref = new(mem_ctx) ir_dereference_variable(load_var); - emit_access(mem_ctx, false, deref, load_offset, const_offset, - row_major, matrix_columns, packing, 0); - *rvalue = deref; - - progress = true; -} - -ir_expression * -lower_ubo_reference_visitor::ubo_load(void *mem_ctx, - const glsl_type *type, - ir_rvalue *offset) -{ - ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); - return new(mem_ctx) - ir_expression(ir_binop_ubo_load, - type, - block_ref, - offset); - -} - -static bool -shader_storage_buffer_object(const _mesa_glsl_parse_state *state) -{ - return state->ARB_shader_storage_buffer_object_enable; -} - -ir_call * -lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, - ir_rvalue *deref, - ir_rvalue *offset, - unsigned write_mask) -{ - exec_list sig_params; - - ir_variable *block_ref = new(mem_ctx) - ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); - sig_params.push_tail(block_ref); - - ir_variable *offset_ref = new(mem_ctx) - ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); - sig_params.push_tail(offset_ref); - - ir_variable *val_ref = new(mem_ctx) - ir_variable(deref->type, "value" , ir_var_function_in); - sig_params.push_tail(val_ref); - - ir_variable *writemask_ref = new(mem_ctx) - ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); - sig_params.push_tail(writemask_ref); - - ir_function_signature *sig = new(mem_ctx) - ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); - assert(sig); - sig->replace_parameters(&sig_params); - sig->is_intrinsic = true; - - ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); - f->add_signature(sig); - - exec_list call_params; - call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); - call_params.push_tail(offset->clone(mem_ctx, NULL)); - call_params.push_tail(deref->clone(mem_ctx, NULL)); - call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); - return new(mem_ctx) ir_call(sig, NULL, &call_params); -} - -ir_call * -lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, - const struct glsl_type *type, - ir_rvalue *offset) -{ - exec_list sig_params; - - ir_variable *block_ref = new(mem_ctx) - ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); - sig_params.push_tail(block_ref); - - ir_variable *offset_ref = new(mem_ctx) - ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); - sig_params.push_tail(offset_ref); - - ir_function_signature *sig = - new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); - assert(sig); - sig->replace_parameters(&sig_params); - sig->is_intrinsic = true; - - ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo"); - f->add_signature(sig); - - ir_variable *result = new(mem_ctx) - ir_variable(type, "ssbo_load_result", ir_var_temporary); - base_ir->insert_before(result); - ir_dereference_variable *deref_result = new(mem_ctx) - ir_dereference_variable(result); - - exec_list call_params; - call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); - call_params.push_tail(offset->clone(mem_ctx, NULL)); - - return new(mem_ctx) ir_call(sig, deref_result, &call_params); -} - -void -lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, - ir_dereference *deref, - const glsl_type *type, - ir_rvalue *offset, - unsigned mask, - int channel) -{ - switch (this->buffer_access_type) { - case ubo_load_access: - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), - ubo_load(mem_ctx, type, offset), - mask)); - break; - case ssbo_load_access: { - ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); - base_ir->insert_before(load_ssbo); - ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); - ir_assignment *assignment = - assign(deref->clone(mem_ctx, NULL), value, mask); - base_ir->insert_before(assignment); - break; - } - case ssbo_store_access: - if (channel >= 0) { - base_ir->insert_after(ssbo_store(mem_ctx, - swizzle(deref, channel, 1), - offset, 1)); - } else { - base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); - } - break; - default: - unreachable("invalid buffer_access_type in insert_buffer_access"); - } -} - -void -lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, - ir_dereference *deref, - ir_variable *var, - ir_variable *write_var, - unsigned write_mask) -{ - ir_rvalue *offset = NULL; - unsigned const_offset; - bool row_major; - int matrix_columns; - unsigned packing = var->get_interface_type()->interface_packing; - - this->buffer_access_type = ssbo_store_access; - - /* Compute the offset to the start if the dereference as well as other - * information we need to configure the write - */ - setup_for_load_or_store(mem_ctx, var, deref, - &offset, &const_offset, - &row_major, &matrix_columns, - packing); - assert(offset); - - /* Now emit writes from the temporary to memory */ - ir_variable *write_offset = - new(mem_ctx) ir_variable(glsl_type::uint_type, - "ssbo_store_temp_offset", - ir_var_temporary); - - base_ir->insert_before(write_offset); - base_ir->insert_before(assign(write_offset, offset)); - - deref = new(mem_ctx) ir_dereference_variable(write_var); - emit_access(mem_ctx, true, deref, write_offset, const_offset, - row_major, matrix_columns, packing, write_mask); -} - -ir_visitor_status -lower_ubo_reference_visitor::visit_enter(ir_expression *ir) -{ - check_ssbo_unsized_array_length_expression(ir); - return rvalue_visit(ir); -} - -ir_expression * -lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr) -{ - if (expr->operation != - ir_expression_operation(ir_unop_ssbo_unsized_array_length)) - return NULL; - - ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); - if (!rvalue || - !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) - return NULL; - - ir_dereference *deref = expr->operands[0]->as_dereference(); - if (!deref) - return NULL; - - ir_variable *var = expr->operands[0]->variable_referenced(); - if (!var || !var->is_in_shader_storage_block()) - return NULL; - return process_ssbo_unsized_array_length(&rvalue, deref, var); -} - -void -lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir) -{ - if (ir->operation == - ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { - /* Don't replace this unop if it is found alone. It is going to be - * removed by the optimization passes or replaced if it is part of - * an ir_assignment or another ir_expression. - */ - return; - } - - for (unsigned i = 0; i < ir->get_num_operands(); i++) { - if (ir->operands[i]->ir_type != ir_type_expression) - continue; - ir_expression *expr = (ir_expression *) ir->operands[i]; - ir_expression *temp = calculate_ssbo_unsized_array_length(expr); - if (!temp) - continue; - - delete expr; - ir->operands[i] = temp; - } -} - -void -lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) -{ - if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) - return; - - ir_expression *expr = (ir_expression *) ir->rhs; - ir_expression *temp = calculate_ssbo_unsized_array_length(expr); - if (!temp) - return; - - delete expr; - ir->rhs = temp; - return; -} - -ir_expression * -lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) -{ - ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); - return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, - glsl_type::int_type, - block_ref); -} - -unsigned -lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref, - unsigned packing) -{ - unsigned array_stride = 0; - - switch (deref->ir_type) { - case ir_type_dereference_variable: - { - ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; - const struct glsl_type *unsized_array_type = NULL; - /* An unsized array can be sized by other lowering passes, so pick - * the first field of the array which has the data type of the unsized - * array. - */ - unsized_array_type = deref_var->var->type->fields.array; - - /* Whether or not the field is row-major (because it might be a - * bvec2 or something) does not affect the array itself. We need - * to know whether an array element in its entirety is row-major. - */ - const bool array_row_major = - is_dereferenced_thing_row_major(deref_var); - - if (packing == GLSL_INTERFACE_PACKING_STD430) { - array_stride = unsized_array_type->std430_array_stride(array_row_major); - } else { - array_stride = unsized_array_type->std140_size(array_row_major); - array_stride = glsl_align(array_stride, 16); - } - break; - } - case ir_type_dereference_record: - { - ir_dereference_record *deref_record = (ir_dereference_record *) deref; - ir_dereference *interface_deref = - deref_record->record->as_dereference(); - assert(interface_deref != NULL); - const struct glsl_type *interface_type = interface_deref->type; - unsigned record_length = interface_type->length; - /* Unsized array is always the last element of the interface */ - const struct glsl_type *unsized_array_type = - interface_type->fields.structure[record_length - 1].type->fields.array; - - const bool array_row_major = - is_dereferenced_thing_row_major(deref_record); - - if (packing == GLSL_INTERFACE_PACKING_STD430) { - array_stride = unsized_array_type->std430_array_stride(array_row_major); - } else { - array_stride = unsized_array_type->std140_size(array_row_major); - array_stride = glsl_align(array_stride, 16); - } - break; - } - default: - unreachable("Unsupported dereference type"); - } - return array_stride; -} - -ir_expression * -lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, - ir_dereference *deref, - ir_variable *var) -{ - void *mem_ctx = ralloc_parent(*rvalue); - - ir_rvalue *base_offset = NULL; - unsigned const_offset; - bool row_major; - int matrix_columns; - unsigned packing = var->get_interface_type()->interface_packing; - int unsized_array_stride = calculate_unsized_array_stride(deref, packing); - - this->buffer_access_type = ssbo_unsized_array_length_access; - - /* Compute the offset to the start if the dereference as well as other - * information we need to calculate the length. - */ - setup_for_load_or_store(mem_ctx, var, deref, - &base_offset, &const_offset, - &row_major, &matrix_columns, - packing); - /* array.length() = - * max((buffer_object_size - offset_of_array) / stride_of_array, 0) - */ - ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); - - ir_expression *offset_of_array = new(mem_ctx) - ir_expression(ir_binop_add, base_offset, - new(mem_ctx) ir_constant(const_offset)); - ir_expression *offset_of_array_int = new(mem_ctx) - ir_expression(ir_unop_u2i, offset_of_array); - - ir_expression *sub = new(mem_ctx) - ir_expression(ir_binop_sub, buffer_size, offset_of_array_int); - ir_expression *div = new(mem_ctx) - ir_expression(ir_binop_div, sub, - new(mem_ctx) ir_constant(unsized_array_stride)); - ir_expression *max = new(mem_ctx) - ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0)); - - return max; -} - -void -lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) -{ - if (!ir || !ir->lhs) - return; - - ir_rvalue *rvalue = ir->lhs->as_rvalue(); - if (!rvalue) - return; - - ir_dereference *deref = ir->lhs->as_dereference(); - if (!deref) - return; - - ir_variable *var = ir->lhs->variable_referenced(); - if (!var || !var->is_in_shader_storage_block()) - return; - - /* We have a write to a buffer variable, so declare a temporary and rewrite - * the assignment so that the temporary is the LHS. - */ - void *mem_ctx = ralloc_parent(shader->ir); - - const glsl_type *type = rvalue->type; - ir_variable *write_var = new(mem_ctx) ir_variable(type, - "ssbo_store_temp", - ir_var_temporary); - base_ir->insert_before(write_var); - ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); - - /* Now we have to write the value assigned to the temporary back to memory */ - write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); - progress = true; -} - -static bool -is_buffer_backed_variable(ir_variable *var) -{ - return var->is_in_buffer_block() || - var->data.mode == ir_var_shader_shared; -} - -bool -lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) -{ - if (!ir || !ir->lhs || !ir->rhs) - return false; - - /* LHS and RHS must be arrays - * FIXME: arrays of arrays? - */ - if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) - return false; - - /* RHS must be a buffer-backed variable. This is what can cause the problem - * since it would lead to a series of loads that need to live until we - * see the writes to the LHS. - */ - ir_variable *rhs_var = ir->rhs->variable_referenced(); - if (!rhs_var || !is_buffer_backed_variable(rhs_var)) - return false; - - /* Split the array copy into individual element copies to reduce - * register pressure - */ - ir_dereference *rhs_deref = ir->rhs->as_dereference(); - if (!rhs_deref) - return false; - - ir_dereference *lhs_deref = ir->lhs->as_dereference(); - if (!lhs_deref) - return false; - - assert(lhs_deref->type->length == rhs_deref->type->length); - void *mem_ctx = ralloc_parent(shader->ir); - - for (unsigned i = 0; i < lhs_deref->type->length; i++) { - ir_dereference *lhs_i = - new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), - new(mem_ctx) ir_constant(i)); - - ir_dereference *rhs_i = - new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), - new(mem_ctx) ir_constant(i)); - ir->insert_after(assign(lhs_i, rhs_i)); - } - - ir->remove(); - progress = true; - return true; -} - -bool -lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) -{ - if (!ir || !ir->lhs || !ir->rhs) - return false; - - /* LHS and RHS must be records */ - if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record()) - return false; - - /* RHS must be a buffer-backed variable. This is what can cause the problem - * since it would lead to a series of loads that need to live until we - * see the writes to the LHS. - */ - ir_variable *rhs_var = ir->rhs->variable_referenced(); - if (!rhs_var || !is_buffer_backed_variable(rhs_var)) - return false; - - /* Split the struct copy into individual element copies to reduce - * register pressure - */ - ir_dereference *rhs_deref = ir->rhs->as_dereference(); - if (!rhs_deref) - return false; - - ir_dereference *lhs_deref = ir->lhs->as_dereference(); - if (!lhs_deref) - return false; - - assert(lhs_deref->type->record_compare(rhs_deref->type)); - void *mem_ctx = ralloc_parent(shader->ir); - - for (unsigned i = 0; i < lhs_deref->type->length; i++) { - const char *field_name = lhs_deref->type->fields.structure[i].name; - ir_dereference *lhs_field = - new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), - field_name); - ir_dereference *rhs_field = - new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), - field_name); - ir->insert_after(assign(lhs_field, rhs_field)); - } - - ir->remove(); - progress = true; - return true; -} - -ir_visitor_status -lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) -{ - /* Array and struct copies could involve large amounts of load/store - * operations. To improve register pressure we want to special-case - * these and split them into individual element copies. - * This way we avoid emitting all the loads for the RHS first and - * all the writes for the LHS second and register usage is more - * efficient. - */ - if (check_for_buffer_array_copy(ir)) - return visit_continue_with_parent; - - if (check_for_buffer_struct_copy(ir)) - return visit_continue_with_parent; - - check_ssbo_unsized_array_length_assignment(ir); - check_for_ssbo_store(ir); - return rvalue_visit(ir); -} - -/* Lowers the intrinsic call to a new internal intrinsic that swaps the - * access to the buffer variable in the first parameter by an offset - * and block index. This involves creating the new internal intrinsic - * (i.e. the new function signature). - */ -ir_call * -lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) -{ - /* SSBO atomics usually have 2 parameters, the buffer variable and an - * integer argument. The exception is CompSwap, that has an additional - * integer parameter. - */ - int param_count = ir->actual_parameters.length(); - assert(param_count == 2 || param_count == 3); - - /* First argument must be a scalar integer buffer variable */ - exec_node *param = ir->actual_parameters.get_head(); - ir_instruction *inst = (ir_instruction *) param; - assert(inst->ir_type == ir_type_dereference_variable || - inst->ir_type == ir_type_dereference_array || - inst->ir_type == ir_type_dereference_record || - inst->ir_type == ir_type_swizzle); - - ir_rvalue *deref = (ir_rvalue *) inst; - assert(deref->type->is_scalar() && deref->type->is_integer()); - - ir_variable *var = deref->variable_referenced(); - assert(var); - - /* Compute the offset to the start if the dereference and the - * block index - */ - void *mem_ctx = ralloc_parent(shader->ir); - - ir_rvalue *offset = NULL; - unsigned const_offset; - bool row_major; - int matrix_columns; - unsigned packing = var->get_interface_type()->interface_packing; - - this->buffer_access_type = ssbo_atomic_access; - - setup_for_load_or_store(mem_ctx, var, deref, - &offset, &const_offset, - &row_major, &matrix_columns, - packing); - assert(offset); - assert(!row_major); - assert(matrix_columns == 1); - - ir_rvalue *deref_offset = - add(offset, new(mem_ctx) ir_constant(const_offset)); - ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL); - - /* Create the new internal function signature that will take a block - * index and offset instead of a buffer variable - */ - exec_list sig_params; - ir_variable *sig_param = new(mem_ctx) - ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); - sig_params.push_tail(sig_param); - - sig_param = new(mem_ctx) - ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); - sig_params.push_tail(sig_param); - - const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? - glsl_type::int_type : glsl_type::uint_type; - sig_param = new(mem_ctx) - ir_variable(type, "data1", ir_var_function_in); - sig_params.push_tail(sig_param); - - if (param_count == 3) { - sig_param = new(mem_ctx) - ir_variable(type, "data2", ir_var_function_in); - sig_params.push_tail(sig_param); - } - - ir_function_signature *sig = - new(mem_ctx) ir_function_signature(deref->type, - shader_storage_buffer_object); - assert(sig); - sig->replace_parameters(&sig_params); - sig->is_intrinsic = true; - - char func_name[64]; - sprintf(func_name, "%s_ssbo", ir->callee_name()); - ir_function *f = new(mem_ctx) ir_function(func_name); - f->add_signature(sig); - - /* Now, create the call to the internal intrinsic */ - exec_list call_params; - call_params.push_tail(block_index); - call_params.push_tail(deref_offset); - param = ir->actual_parameters.get_head()->get_next(); - ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); - call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); - if (param_count == 3) { - param = param->get_next(); - param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); - call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); - } - ir_dereference_variable *return_deref = - ir->return_deref->clone(mem_ctx, NULL); - return new(mem_ctx) ir_call(sig, return_deref, &call_params); -} - -ir_call * -lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) -{ - exec_list& params = ir->actual_parameters; - - if (params.length() < 2 || params.length() > 3) - return ir; - - ir_rvalue *rvalue = - ((ir_instruction *) params.get_head())->as_rvalue(); - if (!rvalue) - return ir; - - ir_variable *var = rvalue->variable_referenced(); - if (!var || !var->is_in_shader_storage_block()) - return ir; - - const char *callee = ir->callee_name(); - if (!strcmp("__intrinsic_atomic_add", callee) || - !strcmp("__intrinsic_atomic_min", callee) || - !strcmp("__intrinsic_atomic_max", callee) || - !strcmp("__intrinsic_atomic_and", callee) || - !strcmp("__intrinsic_atomic_or", callee) || - !strcmp("__intrinsic_atomic_xor", callee) || - !strcmp("__intrinsic_atomic_exchange", callee) || - !strcmp("__intrinsic_atomic_comp_swap", callee)) { - return lower_ssbo_atomic_intrinsic(ir); - } - - return ir; -} - - -ir_visitor_status -lower_ubo_reference_visitor::visit_enter(ir_call *ir) -{ - ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir); - if (new_ir != ir) { - progress = true; - base_ir->replace_with(new_ir); - return visit_continue_with_parent; - } - - return rvalue_visit(ir); -} - - -} /* unnamed namespace */ - -void -lower_ubo_reference(struct gl_shader *shader) -{ - lower_ubo_reference_visitor v(shader); - - /* Loop over the instructions lowering references, because we take - * a deref of a UBO array using a UBO dereference as the index will - * produce a collection of instructions all of which have cloned - * UBO dereferences for that array index. - */ - do { - v.progress = false; - visit_list_elements(&v, shader->ir); - } while (v.progress); -} diff --git a/src/glsl/lower_variable_index_to_cond_assign.cpp b/src/glsl/lower_variable_index_to_cond_assign.cpp deleted file mode 100644 index 278d5450bfb..00000000000 --- a/src/glsl/lower_variable_index_to_cond_assign.cpp +++ /dev/null @@ -1,585 +0,0 @@ -/* - * Copyright © 2010 Luca Barbieri - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_variable_index_to_cond_assign.cpp - * - * Turns non-constant indexing into array types to a series of - * conditional moves of each element into a temporary. - * - * Pre-DX10 GPUs often don't have a native way to do this operation, - * and this works around that. - * - * The lowering process proceeds as follows. Each non-constant index - * found in an r-value is converted to a canonical form \c array[i]. Each - * element of the array is conditionally assigned to a temporary by comparing - * \c i to a constant index. This is done by cloning the canonical form and - * replacing all occurances of \c i with a constant. Each remaining occurance - * of the canonical form in the IR is replaced with a dereference of the - * temporary variable. - * - * L-values with non-constant indices are handled similarly. In this case, - * the RHS of the assignment is assigned to a temporary. The non-constant - * index is replace with the canonical form (just like for r-values). The - * temporary is conditionally assigned to each element of the canonical form - * by comparing \c i with each index. The same clone-and-replace scheme is - * used. - */ - -#include "ir.h" -#include "ir_rvalue_visitor.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" -#include "main/macros.h" - -/** - * Generate a comparison value for a block of indices - * - * Lowering passes for non-constant indexing of arrays, matrices, or vectors - * can use this to generate blocks of index comparison values. - * - * \param instructions List where new instructions will be appended - * \param index \c ir_variable containing the desired index - * \param base Base value for this block of comparisons - * \param components Number of unique index values to compare. This must - * be on the range [1, 4]. - * \param mem_ctx ralloc memory context to be used for all allocations. - * - * \returns - * An \c ir_rvalue that \b must be cloned for each use in conditional - * assignments, etc. - */ -ir_rvalue * -compare_index_block(exec_list *instructions, ir_variable *index, - unsigned base, unsigned components, void *mem_ctx) -{ - ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); - - assert(index->type->is_scalar()); - assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT); - assert(components >= 1 && components <= 4); - - if (components > 1) { - const ir_swizzle_mask m = { 0, 0, 0, 0, components, false }; - broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m); - } - - /* Compare the desired index value with the next block of four indices. - */ - ir_constant_data test_indices_data; - memset(&test_indices_data, 0, sizeof(test_indices_data)); - test_indices_data.i[0] = base; - test_indices_data.i[1] = base + 1; - test_indices_data.i[2] = base + 2; - test_indices_data.i[3] = base + 3; - - ir_constant *const test_indices = - new(mem_ctx) ir_constant(broadcast_index->type, - &test_indices_data); - - ir_rvalue *const condition_val = - new(mem_ctx) ir_expression(ir_binop_equal, - glsl_type::bvec(components), - broadcast_index, - test_indices); - - ir_variable *const condition = - new(mem_ctx) ir_variable(condition_val->type, - "dereference_condition", - ir_var_temporary); - instructions->push_tail(condition); - - ir_rvalue *const cond_deref = - new(mem_ctx) ir_dereference_variable(condition); - instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); - - return cond_deref; -} - -static inline bool -is_array_or_matrix(const ir_rvalue *ir) -{ - return (ir->type->is_array() || ir->type->is_matrix()); -} - -namespace { -/** - * Replace a dereference of a variable with a specified r-value - * - * Each time a dereference of the specified value is replaced, the r-value - * tree is cloned. - */ -class deref_replacer : public ir_rvalue_visitor { -public: - deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value) - : variable_to_replace(variable_to_replace), value(value), - progress(false) - { - assert(this->variable_to_replace != NULL); - assert(this->value != NULL); - } - - virtual void handle_rvalue(ir_rvalue **rvalue) - { - ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); - - if ((dv != NULL) && (dv->var == this->variable_to_replace)) { - this->progress = true; - *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); - } - } - - const ir_variable *variable_to_replace; - ir_rvalue *value; - bool progress; -}; - -/** - * Find a variable index dereference of an array in an rvalue tree - */ -class find_variable_index : public ir_hierarchical_visitor { -public: - find_variable_index() - : deref(NULL) - { - /* empty */ - } - - virtual ir_visitor_status visit_enter(ir_dereference_array *ir) - { - if (is_array_or_matrix(ir->array) - && (ir->array_index->as_constant() == NULL)) { - this->deref = ir; - return visit_stop; - } - - return visit_continue; - } - - /** - * First array dereference found in the tree that has a non-constant index. - */ - ir_dereference_array *deref; -}; - -struct assignment_generator -{ - ir_instruction* base_ir; - ir_dereference *rvalue; - ir_variable *old_index; - bool is_write; - unsigned int write_mask; - ir_variable* var; - - assignment_generator() - : base_ir(NULL), - rvalue(NULL), - old_index(NULL), - is_write(false), - write_mask(0), - var(NULL) - { - } - - void generate(unsigned i, ir_rvalue* condition, exec_list *list) const - { - /* Just clone the rest of the deref chain when trying to get at the - * underlying variable. - */ - void *mem_ctx = ralloc_parent(base_ir); - - /* Clone the old r-value in its entirety. Then replace any occurances of - * the old variable index with the new constant index. - */ - ir_dereference *element = this->rvalue->clone(mem_ctx, NULL); - ir_constant *const index = new(mem_ctx) ir_constant(i); - deref_replacer r(this->old_index, index); - element->accept(&r); - assert(r.progress); - - /* Generate a conditional assignment to (or from) the constant indexed - * array dereference. - */ - ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); - ir_assignment *const assignment = (is_write) - ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) - : new(mem_ctx) ir_assignment(variable, element, condition); - - list->push_tail(assignment); - } -}; - -struct switch_generator -{ - /* make TFunction a template parameter if you need to use other generators */ - typedef assignment_generator TFunction; - const TFunction& generator; - - ir_variable* index; - unsigned linear_sequence_max_length; - unsigned condition_components; - - void *mem_ctx; - - switch_generator(const TFunction& generator, ir_variable *index, - unsigned linear_sequence_max_length, - unsigned condition_components) - : generator(generator), index(index), - linear_sequence_max_length(linear_sequence_max_length), - condition_components(condition_components) - { - this->mem_ctx = ralloc_parent(index); - } - - void linear_sequence(unsigned begin, unsigned end, exec_list *list) - { - if (begin == end) - return; - - /* If the array access is a read, read the first element of this subregion - * unconditionally. The remaining tests will possibly overwrite this - * value with one of the other array elements. - * - * This optimization cannot be done for writes because it will cause the - * first element of the subregion to be written possibly *in addition* to - * one of the other elements. - */ - unsigned first; - if (!this->generator.is_write) { - this->generator.generate(begin, 0, list); - first = begin + 1; - } else { - first = begin; - } - - for (unsigned i = first; i < end; i += 4) { - const unsigned comps = MIN2(condition_components, end - i); - - ir_rvalue *const cond_deref = - compare_index_block(list, index, i, comps, this->mem_ctx); - - if (comps == 1) { - this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL), - list); - } else { - for (unsigned j = 0; j < comps; j++) { - ir_rvalue *const cond_swiz = - new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL), - j, 0, 0, 0, 1); - - this->generator.generate(i + j, cond_swiz, list); - } - } - } - } - - void bisect(unsigned begin, unsigned end, exec_list *list) - { - unsigned middle = (begin + end) >> 1; - - assert(index->type->is_integer()); - - ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT) - ? new(this->mem_ctx) ir_constant((unsigned)middle) - : new(this->mem_ctx) ir_constant((int)middle); - - - ir_dereference_variable *deref = - new(this->mem_ctx) ir_dereference_variable(this->index); - - ir_expression *less = - new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type, - deref, middle_c); - - ir_if *if_less = new(this->mem_ctx) ir_if(less); - - generate(begin, middle, &if_less->then_instructions); - generate(middle, end, &if_less->else_instructions); - - list->push_tail(if_less); - } - - void generate(unsigned begin, unsigned end, exec_list *list) - { - unsigned length = end - begin; - if (length <= this->linear_sequence_max_length) - return linear_sequence(begin, end, list); - else - return bisect(begin, end, list); - } -}; - -/** - * Visitor class for replacing expressions with ir_constant values. - */ - -class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor { -public: - variable_index_to_cond_assign_visitor(gl_shader_stage stage, - bool lower_input, - bool lower_output, - bool lower_temp, - bool lower_uniform) - { - this->progress = false; - this->stage = stage; - this->lower_inputs = lower_input; - this->lower_outputs = lower_output; - this->lower_temps = lower_temp; - this->lower_uniforms = lower_uniform; - } - - bool progress; - - gl_shader_stage stage; - bool lower_inputs; - bool lower_outputs; - bool lower_temps; - bool lower_uniforms; - - bool storage_type_needs_lowering(ir_dereference_array *deref) const - { - /* If a variable isn't eventually the target of this dereference, then - * it must be a constant or some sort of anonymous temporary storage. - * - * FINISHME: Is this correct? Most drivers treat arrays of constants as - * FINISHME: uniforms. It seems like this should do the same. - */ - const ir_variable *const var = deref->array->variable_referenced(); - if (var == NULL) - return this->lower_temps; - - switch (var->data.mode) { - case ir_var_auto: - case ir_var_temporary: - return this->lower_temps; - - case ir_var_uniform: - case ir_var_shader_storage: - return this->lower_uniforms; - - case ir_var_shader_shared: - return false; - - case ir_var_function_in: - case ir_var_const_in: - return this->lower_temps; - - case ir_var_shader_in: - /* The input array size is unknown at compiler time for non-patch - * inputs in TCS and TES. The arrays are sized to - * the implementation-dependent limit "gl_MaxPatchVertices", but - * the real size is stored in the "gl_PatchVerticesIn" built-in - * uniform. - * - * The TCS input array size is specified by - * glPatchParameteri(GL_PATCH_VERTICES). - * - * The TES input array size is specified by the "vertices" output - * layout qualifier in TCS. - */ - if ((stage == MESA_SHADER_TESS_CTRL || - stage == MESA_SHADER_TESS_EVAL) && !var->data.patch) - return false; - return this->lower_inputs; - - case ir_var_function_out: - /* TCS non-patch outputs can only be indexed with "gl_InvocationID". - * Other expressions are not allowed. - */ - if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch) - return false; - return this->lower_temps; - - case ir_var_shader_out: - return this->lower_outputs; - - case ir_var_function_inout: - return this->lower_temps; - } - - assert(!"Should not get here."); - return false; - } - - bool needs_lowering(ir_dereference_array *deref) const - { - if (deref == NULL || deref->array_index->as_constant() - || !is_array_or_matrix(deref->array)) - return false; - - return this->storage_type_needs_lowering(deref); - } - - ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, - ir_assignment* orig_assign, - ir_dereference *orig_base) - { - assert(is_array_or_matrix(orig_deref->array)); - - const unsigned length = (orig_deref->array->type->is_array()) - ? orig_deref->array->type->length - : orig_deref->array->type->matrix_columns; - - void *const mem_ctx = ralloc_parent(base_ir); - - /* Temporary storage for either the result of the dereference of - * the array, or the RHS that's being assigned into the - * dereference of the array. - */ - ir_variable *var; - - if (orig_assign) { - var = new(mem_ctx) ir_variable(orig_assign->rhs->type, - "dereference_array_value", - ir_var_temporary); - base_ir->insert_before(var); - - ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var); - ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, - orig_assign->rhs, - NULL); - - base_ir->insert_before(assign); - } else { - var = new(mem_ctx) ir_variable(orig_deref->type, - "dereference_array_value", - ir_var_temporary); - base_ir->insert_before(var); - } - - /* Store the index to a temporary to avoid reusing its tree. */ - ir_variable *index = - new(mem_ctx) ir_variable(orig_deref->array_index->type, - "dereference_array_index", ir_var_temporary); - base_ir->insert_before(index); - - ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index); - ir_assignment *assign = - new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); - base_ir->insert_before(assign); - - orig_deref->array_index = lhs->clone(mem_ctx, NULL); - - assignment_generator ag; - ag.rvalue = orig_base; - ag.base_ir = base_ir; - ag.old_index = index; - ag.var = var; - if (orig_assign) { - ag.is_write = true; - ag.write_mask = orig_assign->write_mask; - } else { - ag.is_write = false; - } - - switch_generator sg(ag, index, 4, 4); - - /* If the original assignment has a condition, respect that original - * condition! This is acomplished by wrapping the new conditional - * assignments in an if-statement that uses the original condition. - */ - if ((orig_assign != NULL) && (orig_assign->condition != NULL)) { - /* No need to clone the condition because the IR that it hangs on is - * going to be removed from the instruction sequence. - */ - ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition); - - sg.generate(0, length, &if_stmt->then_instructions); - base_ir->insert_before(if_stmt); - } else { - exec_list list; - - sg.generate(0, length, &list); - base_ir->insert_before(&list); - } - - return var; - } - - virtual void handle_rvalue(ir_rvalue **pir) - { - if (this->in_assignee) - return; - - if (!*pir) - return; - - ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); - if (needs_lowering(orig_deref)) { - ir_variable *var = - convert_dereference_array(orig_deref, NULL, orig_deref); - assert(var); - *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var); - this->progress = true; - } - } - - ir_visitor_status - visit_leave(ir_assignment *ir) - { - ir_rvalue_visitor::visit_leave(ir); - - find_variable_index f; - ir->lhs->accept(&f); - - if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) { - convert_dereference_array(f.deref, ir, ir->lhs); - ir->remove(); - this->progress = true; - } - - return visit_continue; - } -}; - -} /* anonymous namespace */ - -bool -lower_variable_index_to_cond_assign(gl_shader_stage stage, - exec_list *instructions, - bool lower_input, - bool lower_output, - bool lower_temp, - bool lower_uniform) -{ - variable_index_to_cond_assign_visitor v(stage, - lower_input, - lower_output, - lower_temp, - lower_uniform); - - /* Continue lowering until no progress is made. If there are multiple - * levels of indirection (e.g., non-constant indexing of array elements and - * matrix columns of an array of matrix), each pass will only lower one - * level of indirection. - */ - bool progress_ever = false; - do { - v.progress = false; - visit_list_elements(&v, instructions); - progress_ever = v.progress || progress_ever; - } while (v.progress); - - return progress_ever; -} diff --git a/src/glsl/lower_vec_index_to_cond_assign.cpp b/src/glsl/lower_vec_index_to_cond_assign.cpp deleted file mode 100644 index 784db085924..00000000000 --- a/src/glsl/lower_vec_index_to_cond_assign.cpp +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_vec_index_to_cond_assign.cpp - * - * Turns indexing into vector types to a series of conditional moves - * of each channel's swizzle into a temporary. - * - * Most GPUs don't have a native way to do this operation, and this - * works around that. For drivers using both this pass and - * ir_vec_index_to_swizzle, there's a risk that this pass will happen - * before sufficient constant folding to find that the array index is - * constant. However, we hope that other optimization passes, - * particularly constant folding of assignment conditions and copy - * propagation, will result in the same code in the end. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" - -namespace { - -/** - * Visitor class for replacing expressions with ir_constant values. - */ - -class ir_vec_index_to_cond_assign_visitor : public ir_hierarchical_visitor { -public: - ir_vec_index_to_cond_assign_visitor() - { - progress = false; - } - - ir_rvalue *convert_vec_index_to_cond_assign(void *mem_ctx, - ir_rvalue *orig_vector, - ir_rvalue *orig_index, - const glsl_type *type); - - ir_rvalue *convert_vector_extract_to_cond_assign(ir_rvalue *ir); - - virtual ir_visitor_status visit_enter(ir_expression *); - virtual ir_visitor_status visit_enter(ir_swizzle *); - virtual ir_visitor_status visit_leave(ir_assignment *); - virtual ir_visitor_status visit_enter(ir_return *); - virtual ir_visitor_status visit_enter(ir_call *); - virtual ir_visitor_status visit_enter(ir_if *); - - bool progress; -}; - -} /* anonymous namespace */ - -ir_rvalue * -ir_vec_index_to_cond_assign_visitor::convert_vec_index_to_cond_assign(void *mem_ctx, - ir_rvalue *orig_vector, - ir_rvalue *orig_index, - const glsl_type *type) -{ - ir_assignment *assign, *value_assign; - ir_variable *index, *var, *value; - ir_dereference *deref, *deref_value; - unsigned i; - - - exec_list list; - - /* Store the index to a temporary to avoid reusing its tree. */ - assert(orig_index->type == glsl_type::int_type || - orig_index->type == glsl_type::uint_type); - index = new(base_ir) ir_variable(orig_index->type, - "vec_index_tmp_i", - ir_var_temporary); - list.push_tail(index); - deref = new(base_ir) ir_dereference_variable(index); - assign = new(base_ir) ir_assignment(deref, orig_index, NULL); - list.push_tail(assign); - - /* Store the value inside a temp, thus avoiding matrixes duplication */ - value = new(base_ir) ir_variable(orig_vector->type, "vec_value_tmp", - ir_var_temporary); - list.push_tail(value); - deref_value = new(base_ir) ir_dereference_variable(value); - value_assign = new(base_ir) ir_assignment(deref_value, orig_vector); - list.push_tail(value_assign); - - /* Temporary where we store whichever value we swizzle out. */ - var = new(base_ir) ir_variable(type, "vec_index_tmp_v", - ir_var_temporary); - list.push_tail(var); - - /* Generate a single comparison condition "mask" for all of the components - * in the vector. - */ - ir_rvalue *const cond_deref = - compare_index_block(&list, index, 0, - orig_vector->type->vector_elements, - mem_ctx); - - /* Generate a conditional move of each vector element to the temp. */ - for (i = 0; i < orig_vector->type->vector_elements; i++) { - ir_rvalue *condition_swizzle = - new(base_ir) ir_swizzle(cond_deref->clone(mem_ctx, NULL), - i, 0, 0, 0, 1); - - /* Just clone the rest of the deref chain when trying to get at the - * underlying variable. - */ - ir_rvalue *swizzle = - new(base_ir) ir_swizzle(deref_value->clone(mem_ctx, NULL), - i, 0, 0, 0, 1); - - deref = new(base_ir) ir_dereference_variable(var); - assign = new(base_ir) ir_assignment(deref, swizzle, condition_swizzle); - list.push_tail(assign); - } - - /* Put all of the new instructions in the IR stream before the old - * instruction. - */ - base_ir->insert_before(&list); - - this->progress = true; - return new(base_ir) ir_dereference_variable(var); -} - -ir_rvalue * -ir_vec_index_to_cond_assign_visitor::convert_vector_extract_to_cond_assign(ir_rvalue *ir) -{ - ir_expression *const expr = ir->as_expression(); - - if (expr == NULL || expr->operation != ir_binop_vector_extract) - return ir; - - return convert_vec_index_to_cond_assign(ralloc_parent(ir), - expr->operands[0], - expr->operands[1], - ir->type); -} - -ir_visitor_status -ir_vec_index_to_cond_assign_visitor::visit_enter(ir_expression *ir) -{ - unsigned int i; - - for (i = 0; i < ir->get_num_operands(); i++) { - ir->operands[i] = convert_vector_extract_to_cond_assign(ir->operands[i]); - } - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_cond_assign_visitor::visit_enter(ir_swizzle *ir) -{ - /* Can't be hit from normal GLSL, since you can't swizzle a scalar (which - * the result of indexing a vector is. But maybe at some point we'll end up - * using swizzling of scalars for vector construction. - */ - ir->val = convert_vector_extract_to_cond_assign(ir->val); - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_cond_assign_visitor::visit_leave(ir_assignment *ir) -{ - ir->rhs = convert_vector_extract_to_cond_assign(ir->rhs); - - if (ir->condition) { - ir->condition = convert_vector_extract_to_cond_assign(ir->condition); - } - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_cond_assign_visitor::visit_enter(ir_call *ir) -{ - foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { - ir_rvalue *new_param = convert_vector_extract_to_cond_assign(param); - - if (new_param != param) { - param->replace_with(new_param); - } - } - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_cond_assign_visitor::visit_enter(ir_return *ir) -{ - if (ir->value) { - ir->value = convert_vector_extract_to_cond_assign(ir->value); - } - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_cond_assign_visitor::visit_enter(ir_if *ir) -{ - ir->condition = convert_vector_extract_to_cond_assign(ir->condition); - - return visit_continue; -} - -bool -do_vec_index_to_cond_assign(exec_list *instructions) -{ - ir_vec_index_to_cond_assign_visitor v; - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/lower_vec_index_to_swizzle.cpp b/src/glsl/lower_vec_index_to_swizzle.cpp deleted file mode 100644 index 8b18e95509c..00000000000 --- a/src/glsl/lower_vec_index_to_swizzle.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_vec_index_to_swizzle.cpp - * - * Turns constant indexing into vector types to swizzles. This will - * let other swizzle-aware optimization passes catch these constructs, - * and codegen backends not have to worry about this case. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" -#include "main/macros.h" - -/** - * Visitor class for replacing expressions with ir_constant values. - */ - -namespace { - -class ir_vec_index_to_swizzle_visitor : public ir_hierarchical_visitor { -public: - ir_vec_index_to_swizzle_visitor() - { - progress = false; - } - - ir_rvalue *convert_vector_extract_to_swizzle(ir_rvalue *val); - - virtual ir_visitor_status visit_enter(ir_expression *); - virtual ir_visitor_status visit_enter(ir_swizzle *); - virtual ir_visitor_status visit_enter(ir_assignment *); - virtual ir_visitor_status visit_enter(ir_return *); - virtual ir_visitor_status visit_enter(ir_call *); - virtual ir_visitor_status visit_enter(ir_if *); - - bool progress; -}; - -} /* anonymous namespace */ - -ir_rvalue * -ir_vec_index_to_swizzle_visitor::convert_vector_extract_to_swizzle(ir_rvalue *ir) -{ - ir_expression *const expr = ir->as_expression(); - if (expr == NULL || expr->operation != ir_binop_vector_extract) - return ir; - - ir_constant *const idx = expr->operands[1]->constant_expression_value(); - if (idx == NULL) - return ir; - - void *ctx = ralloc_parent(ir); - this->progress = true; - - /* Page 40 of the GLSL 1.20 spec says: - * - * "When indexing with non-constant expressions, behavior is undefined - * if the index is negative, or greater than or equal to the size of - * the vector." - * - * The quoted spec text mentions non-constant expressions, but this code - * operates on constants. These constants are the result of non-constant - * expressions that have been optimized to constants. The common case here - * is a loop counter from an unrolled loop that is used to index a vector. - * - * The ir_swizzle constructor gets angry if the index is negative or too - * large. For simplicity sake, just clamp the index to [0, size-1]. - */ - const int i = CLAMP(idx->value.i[0], 0, - (int) expr->operands[0]->type->vector_elements - 1); - - return new(ctx) ir_swizzle(expr->operands[0], i, 0, 0, 0, 1); -} - -ir_visitor_status -ir_vec_index_to_swizzle_visitor::visit_enter(ir_expression *ir) -{ - unsigned int i; - - for (i = 0; i < ir->get_num_operands(); i++) { - ir->operands[i] = convert_vector_extract_to_swizzle(ir->operands[i]); - } - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_swizzle_visitor::visit_enter(ir_swizzle *ir) -{ - /* Can't be hit from normal GLSL, since you can't swizzle a scalar (which - * the result of indexing a vector is. But maybe at some point we'll end up - * using swizzling of scalars for vector construction. - */ - ir->val = convert_vector_extract_to_swizzle(ir->val); - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_swizzle_visitor::visit_enter(ir_assignment *ir) -{ - ir->rhs = convert_vector_extract_to_swizzle(ir->rhs); - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_swizzle_visitor::visit_enter(ir_call *ir) -{ - foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { - ir_rvalue *new_param = convert_vector_extract_to_swizzle(param); - - if (new_param != param) { - param->replace_with(new_param); - } - } - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_swizzle_visitor::visit_enter(ir_return *ir) -{ - if (ir->value) { - ir->value = convert_vector_extract_to_swizzle(ir->value); - } - - return visit_continue; -} - -ir_visitor_status -ir_vec_index_to_swizzle_visitor::visit_enter(ir_if *ir) -{ - ir->condition = convert_vector_extract_to_swizzle(ir->condition); - - return visit_continue; -} - -bool -do_vec_index_to_swizzle(exec_list *instructions) -{ - ir_vec_index_to_swizzle_visitor v; - - v.run(instructions); - - return v.progress; -} diff --git a/src/glsl/lower_vector.cpp b/src/glsl/lower_vector.cpp deleted file mode 100644 index a658410ae6f..00000000000 --- a/src/glsl/lower_vector.cpp +++ /dev/null @@ -1,228 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_vector.cpp - * IR lowering pass to remove some types of ir_quadop_vector - * - * \author Ian Romanick - */ - -#include "ir.h" -#include "ir_rvalue_visitor.h" - -namespace { - -class lower_vector_visitor : public ir_rvalue_visitor { -public: - lower_vector_visitor() : dont_lower_swz(false), progress(false) - { - /* empty */ - } - - void handle_rvalue(ir_rvalue **rvalue); - - /** - * Should SWZ-like expressions be lowered? - */ - bool dont_lower_swz; - - bool progress; -}; - -} /* anonymous namespace */ - -/** - * Determine if an IR expression tree looks like an extended swizzle - * - * Extended swizzles consist of access of a single vector source (with possible - * per component negation) and the constants -1, 0, or 1. - */ -bool -is_extended_swizzle(ir_expression *ir) -{ - /* Track any variables that are accessed by this expression. - */ - ir_variable *var = NULL; - - assert(ir->operation == ir_quadop_vector); - - for (unsigned i = 0; i < ir->type->vector_elements; i++) { - ir_rvalue *op = ir->operands[i]; - - while (op != NULL) { - switch (op->ir_type) { - case ir_type_constant: { - const ir_constant *const c = op->as_constant(); - - if (!c->is_one() && !c->is_zero() && !c->is_negative_one()) - return false; - - op = NULL; - break; - } - - case ir_type_dereference_variable: { - ir_dereference_variable *const d = (ir_dereference_variable *) op; - - if ((var != NULL) && (var != d->var)) - return false; - - var = d->var; - op = NULL; - break; - } - - case ir_type_expression: { - ir_expression *const ex = (ir_expression *) op; - - if (ex->operation != ir_unop_neg) - return false; - - op = ex->operands[0]; - break; - } - - case ir_type_swizzle: - op = ((ir_swizzle *) op)->val; - break; - - default: - return false; - } - } - } - - return true; -} - -void -lower_vector_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_expression *expr = (*rvalue)->as_expression(); - if ((expr == NULL) || (expr->operation != ir_quadop_vector)) - return; - - if (this->dont_lower_swz && is_extended_swizzle(expr)) - return; - - /* FINISHME: Is this the right thing to use for the ralloc context? - */ - void *const mem_ctx = expr; - - assert(expr->type->vector_elements == expr->get_num_operands()); - - /* Generate a temporary with the same type as the ir_quadop_operation. - */ - ir_variable *const temp = - new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary); - - this->base_ir->insert_before(temp); - - /* Counter of the number of components collected so far. - */ - unsigned assigned; - - /* Write-mask in the destination that receives counted by 'assigned'. - */ - unsigned write_mask; - - - /* Generate upto four assignments to that variable. Try to group component - * assignments together: - * - * - All constant components can be assigned at once. - * - All assigments of components from a single variable with the same - * unary operator can be assigned at once. - */ - ir_constant_data d = { { 0 } }; - - assigned = 0; - write_mask = 0; - for (unsigned i = 0; i < expr->type->vector_elements; i++) { - const ir_constant *const c = expr->operands[i]->as_constant(); - - if (c == NULL) - continue; - - switch (expr->type->base_type) { - case GLSL_TYPE_UINT: d.u[assigned] = c->value.u[0]; break; - case GLSL_TYPE_INT: d.i[assigned] = c->value.i[0]; break; - case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break; - case GLSL_TYPE_BOOL: d.b[assigned] = c->value.b[0]; break; - default: assert(!"Should not get here."); break; - } - - write_mask |= (1U << i); - assigned++; - } - - assert((write_mask == 0) == (assigned == 0)); - - /* If there were constant values, generate an assignment. - */ - if (assigned > 0) { - ir_constant *const c = - new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type, - assigned, 1), - &d); - ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); - ir_assignment *const assign = - new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask); - - this->base_ir->insert_before(assign); - } - - /* FINISHME: This should try to coalesce assignments. - */ - for (unsigned i = 0; i < expr->type->vector_elements; i++) { - if (expr->operands[i]->ir_type == ir_type_constant) - continue; - - ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp); - ir_assignment *const assign = - new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i)); - - this->base_ir->insert_before(assign); - assigned++; - } - - assert(assigned == expr->type->vector_elements); - - *rvalue = new(mem_ctx) ir_dereference_variable(temp); - this->progress = true; -} - -bool -lower_quadop_vector(exec_list *instructions, bool dont_lower_swz) -{ - lower_vector_visitor v; - - v.dont_lower_swz = dont_lower_swz; - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/lower_vector_derefs.cpp b/src/glsl/lower_vector_derefs.cpp deleted file mode 100644 index 4a5d6f0da4c..00000000000 --- a/src/glsl/lower_vector_derefs.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include "ir.h" -#include "ir_builder.h" -#include "ir_rvalue_visitor.h" -#include "ir_optimization.h" - -using namespace ir_builder; - -namespace { - -class vector_deref_visitor : public ir_rvalue_enter_visitor { -public: - vector_deref_visitor() - : progress(false) - { - } - - virtual ~vector_deref_visitor() - { - } - - virtual void handle_rvalue(ir_rvalue **rv); - virtual ir_visitor_status visit_enter(ir_assignment *ir); - - bool progress; -}; - -} /* anonymous namespace */ - -ir_visitor_status -vector_deref_visitor::visit_enter(ir_assignment *ir) -{ - if (!ir->lhs || ir->lhs->ir_type != ir_type_dereference_array) - return ir_rvalue_enter_visitor::visit_enter(ir); - - ir_dereference_array *const deref = (ir_dereference_array *) ir->lhs; - if (!deref->array->type->is_vector()) - return ir_rvalue_enter_visitor::visit_enter(ir); - - ir_dereference *const new_lhs = (ir_dereference *) deref->array; - ir->set_lhs(new_lhs); - - ir_constant *old_index_constant = deref->array_index->constant_expression_value(); - void *mem_ctx = ralloc_parent(ir); - if (!old_index_constant) { - ir->rhs = new(mem_ctx) ir_expression(ir_triop_vector_insert, - new_lhs->type, - new_lhs->clone(mem_ctx, NULL), - ir->rhs, - deref->array_index); - ir->write_mask = (1 << new_lhs->type->vector_elements) - 1; - } else { - ir->write_mask = 1 << old_index_constant->get_int_component(0); - } - - return ir_rvalue_enter_visitor::visit_enter(ir); -} - -void -vector_deref_visitor::handle_rvalue(ir_rvalue **rv) -{ - if (*rv == NULL || (*rv)->ir_type != ir_type_dereference_array) - return; - - ir_dereference_array *const deref = (ir_dereference_array *) *rv; - if (!deref->array->type->is_vector()) - return; - - void *mem_ctx = ralloc_parent(deref); - *rv = new(mem_ctx) ir_expression(ir_binop_vector_extract, - deref->array, - deref->array_index); -} - -bool -lower_vector_derefs(gl_shader *shader) -{ - vector_deref_visitor v; - - visit_list_elements(&v, shader->ir); - - return v.progress; -} diff --git a/src/glsl/lower_vector_insert.cpp b/src/glsl/lower_vector_insert.cpp deleted file mode 100644 index 26d31b03c12..00000000000 --- a/src/glsl/lower_vector_insert.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include "ir.h" -#include "ir_builder.h" -#include "ir_rvalue_visitor.h" -#include "ir_optimization.h" - -using namespace ir_builder; - -namespace { - -class vector_insert_visitor : public ir_rvalue_visitor { -public: - vector_insert_visitor(bool lower_nonconstant_index) - : progress(false), lower_nonconstant_index(lower_nonconstant_index) - { - factory.instructions = &factory_instructions; - } - - virtual ~vector_insert_visitor() - { - assert(factory_instructions.is_empty()); - } - - virtual void handle_rvalue(ir_rvalue **rv); - - ir_factory factory; - exec_list factory_instructions; - bool progress; - bool lower_nonconstant_index; -}; - -} /* anonymous namespace */ - -void -vector_insert_visitor::handle_rvalue(ir_rvalue **rv) -{ - if (*rv == NULL || (*rv)->ir_type != ir_type_expression) - return; - - ir_expression *const expr = (ir_expression *) *rv; - - if (likely(expr->operation != ir_triop_vector_insert)) - return; - - factory.mem_ctx = ralloc_parent(expr); - - ir_constant *const idx = expr->operands[2]->constant_expression_value(); - if (idx != NULL) { - /* Replace (vector_insert (vec) (scalar) (index)) with a dereference of - * a new temporary. The new temporary gets assigned as - * - * t = vec - * t.mask = scalar - * - * where mask is the component selected by index. - */ - ir_variable *const temp = - factory.make_temp(expr->operands[0]->type, "vec_tmp"); - - const int mask = 1 << idx->value.i[0]; - - factory.emit(assign(temp, expr->operands[0])); - factory.emit(assign(temp, expr->operands[1], mask)); - - this->progress = true; - *rv = new(factory.mem_ctx) ir_dereference_variable(temp); - } else if (this->lower_nonconstant_index) { - /* Replace (vector_insert (vec) (scalar) (index)) with a dereference of - * a new temporary. The new temporary gets assigned as - * - * t = vec - * if (index == 0) - * t.x = scalar - * if (index == 1) - * t.y = scalar - * if (index == 2) - * t.z = scalar - * if (index == 3) - * t.w = scalar - */ - ir_variable *const temp = - factory.make_temp(expr->operands[0]->type, "vec_tmp"); - - ir_variable *const src_temp = - factory.make_temp(expr->operands[1]->type, "src_temp"); - - factory.emit(assign(temp, expr->operands[0])); - factory.emit(assign(src_temp, expr->operands[1])); - - assert(expr->operands[2]->type == glsl_type::int_type || - expr->operands[2]->type == glsl_type::uint_type); - - for (unsigned i = 0; i < expr->type->vector_elements; i++) { - ir_constant *const cmp_index = - ir_constant::zero(factory.mem_ctx, expr->operands[2]->type); - cmp_index->value.u[0] = i; - - ir_variable *const cmp_result = - factory.make_temp(glsl_type::bool_type, "index_condition"); - - factory.emit(assign(cmp_result, - equal(expr->operands[2]->clone(factory.mem_ctx, - NULL), - cmp_index))); - - factory.emit(if_tree(cmp_result, - assign(temp, src_temp, WRITEMASK_X << i))); - } - - this->progress = true; - *rv = new(factory.mem_ctx) ir_dereference_variable(temp); - } - - base_ir->insert_before(factory.instructions); -} - -bool -lower_vector_insert(exec_list *instructions, bool lower_nonconstant_index) -{ - vector_insert_visitor v(lower_nonconstant_index); - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/lower_vertex_id.cpp b/src/glsl/lower_vertex_id.cpp deleted file mode 100644 index 3da7a2f1b3b..00000000000 --- a/src/glsl/lower_vertex_id.cpp +++ /dev/null @@ -1,144 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file lower_vertex_id.cpp - * - * There exists hardware, such as i965, that does not implement the OpenGL - * semantic for gl_VertexID. Instead, that hardware does not include the - * value of basevertex in the gl_VertexID value. To implement the OpenGL - * semantic, we'll have to convert gl_Vertex_ID to - * gl_VertexIDMESA+gl_BaseVertexMESA. - */ - -#include "glsl_symbol_table.h" -#include "ir_hierarchical_visitor.h" -#include "ir.h" -#include "ir_builder.h" -#include "linker.h" -#include "program/prog_statevars.h" - -namespace { - -class lower_vertex_id_visitor : public ir_hierarchical_visitor { -public: - explicit lower_vertex_id_visitor(ir_function_signature *main_sig, - exec_list *ir_list) - : progress(false), VertexID(NULL), gl_VertexID(NULL), - gl_BaseVertex(NULL), main_sig(main_sig), ir_list(ir_list) - { - foreach_in_list(ir_instruction, ir, ir_list) { - ir_variable *const var = ir->as_variable(); - - if (var != NULL && var->data.mode == ir_var_system_value && - var->data.location == SYSTEM_VALUE_BASE_VERTEX) { - gl_BaseVertex = var; - break; - } - } - } - - virtual ir_visitor_status visit(ir_dereference_variable *); - - bool progress; - -private: - ir_variable *VertexID; - ir_variable *gl_VertexID; - ir_variable *gl_BaseVertex; - - ir_function_signature *main_sig; - exec_list *ir_list; -}; - -} /* anonymous namespace */ - -ir_visitor_status -lower_vertex_id_visitor::visit(ir_dereference_variable *ir) -{ - if (ir->var->data.mode != ir_var_system_value || - ir->var->data.location != SYSTEM_VALUE_VERTEX_ID) - return visit_continue; - - if (VertexID == NULL) { - const glsl_type *const int_t = glsl_type::int_type; - void *const mem_ctx = ralloc_parent(ir); - - VertexID = new(mem_ctx) ir_variable(int_t, "__VertexID", - ir_var_temporary); - ir_list->push_head(VertexID); - - gl_VertexID = new(mem_ctx) ir_variable(int_t, "gl_VertexIDMESA", - ir_var_system_value); - gl_VertexID->data.how_declared = ir_var_declared_implicitly; - gl_VertexID->data.read_only = true; - gl_VertexID->data.location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; - gl_VertexID->data.explicit_location = true; - gl_VertexID->data.explicit_index = 0; - ir_list->push_head(gl_VertexID); - - if (gl_BaseVertex == NULL) { - gl_BaseVertex = new(mem_ctx) ir_variable(int_t, "gl_BaseVertex", - ir_var_system_value); - gl_BaseVertex->data.how_declared = ir_var_declared_implicitly; - gl_BaseVertex->data.read_only = true; - gl_BaseVertex->data.location = SYSTEM_VALUE_BASE_VERTEX; - gl_BaseVertex->data.explicit_location = true; - gl_BaseVertex->data.explicit_index = 0; - ir_list->push_head(gl_BaseVertex); - } - - ir_instruction *const inst = - ir_builder::assign(VertexID, - ir_builder::add(gl_VertexID, gl_BaseVertex)); - - main_sig->body.push_head(inst); - } - - ir->var = VertexID; - progress = true; - - return visit_continue; -} - -bool -lower_vertex_id(gl_shader *shader) -{ - /* gl_VertexID only exists in the vertex shader. - */ - if (shader->Stage != MESA_SHADER_VERTEX) - return false; - - ir_function_signature *const main_sig = - _mesa_get_main_function_signature(shader); - if (main_sig == NULL) { - assert(main_sig != NULL); - return false; - } - - lower_vertex_id_visitor v(main_sig, shader->ir); - - v.run(shader->ir); - - return v.progress; -} diff --git a/src/glsl/main.cpp b/src/glsl/main.cpp deleted file mode 100644 index df93a013ede..00000000000 --- a/src/glsl/main.cpp +++ /dev/null @@ -1,431 +0,0 @@ -/* - * Copyright © 2008, 2009 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include - -/** @file main.cpp - * - * This file is the main() routine and scaffolding for producing - * builtin_compiler (which doesn't include builtins itself and is used - * to generate the profile information for builtin_function.cpp), and - * for glsl_compiler (which does include builtins and can be used to - * offline compile GLSL code and examine the resulting GLSL IR. - */ - -#include "ast.h" -#include "glsl_parser_extras.h" -#include "ir_optimization.h" -#include "program.h" -#include "program/hash_table.h" -#include "loop_analysis.h" -#include "standalone_scaffolding.h" - -static int glsl_version = 330; - -static void -initialize_context(struct gl_context *ctx, gl_api api) -{ - initialize_context_to_defaults(ctx, api); - - /* The standalone compiler needs to claim support for almost - * everything in order to compile the built-in functions. - */ - ctx->Const.GLSLVersion = glsl_version; - ctx->Extensions.ARB_ES3_compatibility = true; - ctx->Const.MaxComputeWorkGroupCount[0] = 65535; - ctx->Const.MaxComputeWorkGroupCount[1] = 65535; - ctx->Const.MaxComputeWorkGroupCount[2] = 65535; - ctx->Const.MaxComputeWorkGroupSize[0] = 1024; - ctx->Const.MaxComputeWorkGroupSize[1] = 1024; - ctx->Const.MaxComputeWorkGroupSize[2] = 64; - ctx->Const.MaxComputeWorkGroupInvocations = 1024; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ - - switch (ctx->Const.GLSLVersion) { - case 100: - ctx->Const.MaxClipPlanes = 0; - ctx->Const.MaxCombinedTextureImageUnits = 8; - ctx->Const.MaxDrawBuffers = 2; - ctx->Const.MinProgramTexelOffset = 0; - ctx->Const.MaxProgramTexelOffset = 0; - ctx->Const.MaxLights = 0; - ctx->Const.MaxTextureCoordUnits = 0; - ctx->Const.MaxTextureUnits = 8; - - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 8; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 128 * 4; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; - - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = - ctx->Const.MaxCombinedTextureImageUnits; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 16 * 4; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ - - ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4; - break; - case 110: - case 120: - ctx->Const.MaxClipPlanes = 6; - ctx->Const.MaxCombinedTextureImageUnits = 2; - ctx->Const.MaxDrawBuffers = 1; - ctx->Const.MinProgramTexelOffset = 0; - ctx->Const.MaxProgramTexelOffset = 0; - ctx->Const.MaxLights = 8; - ctx->Const.MaxTextureCoordUnits = 2; - ctx->Const.MaxTextureUnits = 2; - - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; - - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = - ctx->Const.MaxCombinedTextureImageUnits; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ - - ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4; - break; - case 130: - case 140: - ctx->Const.MaxClipPlanes = 8; - ctx->Const.MaxCombinedTextureImageUnits = 16; - ctx->Const.MaxDrawBuffers = 8; - ctx->Const.MinProgramTexelOffset = -8; - ctx->Const.MaxProgramTexelOffset = 7; - ctx->Const.MaxLights = 8; - ctx->Const.MaxTextureCoordUnits = 8; - ctx->Const.MaxTextureUnits = 2; - - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64; - - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ - - ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents / 4; - break; - case 150: - case 330: - ctx->Const.MaxClipPlanes = 8; - ctx->Const.MaxDrawBuffers = 8; - ctx->Const.MinProgramTexelOffset = -8; - ctx->Const.MaxProgramTexelOffset = 7; - ctx->Const.MaxLights = 8; - ctx->Const.MaxTextureCoordUnits = 8; - ctx->Const.MaxTextureUnits = 2; - - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 64; - - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits = 16; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxUniformComponents = 1024; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxInputComponents = - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents; - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents = 128; - - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 1024; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = - ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxOutputComponents; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ - - ctx->Const.MaxCombinedTextureImageUnits = - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits - + ctx->Const.Program[MESA_SHADER_GEOMETRY].MaxTextureImageUnits - + ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; - - ctx->Const.MaxGeometryOutputVertices = 256; - ctx->Const.MaxGeometryTotalOutputComponents = 1024; - - ctx->Const.MaxVarying = 60 / 4; - break; - case 300: - ctx->Const.MaxClipPlanes = 8; - ctx->Const.MaxCombinedTextureImageUnits = 32; - ctx->Const.MaxDrawBuffers = 4; - ctx->Const.MinProgramTexelOffset = -8; - ctx->Const.MaxProgramTexelOffset = 7; - ctx->Const.MaxLights = 0; - ctx->Const.MaxTextureCoordUnits = 0; - ctx->Const.MaxTextureUnits = 0; - - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 16; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 1024; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxInputComponents = 0; /* not used */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 16 * 4; - - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 16; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 224; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 15 * 4; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxOutputComponents = 0; /* not used */ - - ctx->Const.MaxVarying = ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents / 4; - break; - } - - ctx->Const.GenerateTemporaryNames = true; - ctx->Const.MaxPatchVertices = 32; - - ctx->Driver.NewShader = _mesa_new_shader; -} - -/* Returned string will have 'ctx' as its ralloc owner. */ -static char * -load_text_file(void *ctx, const char *file_name) -{ - char *text = NULL; - size_t size; - size_t total_read = 0; - FILE *fp = fopen(file_name, "rb"); - - if (!fp) { - return NULL; - } - - fseek(fp, 0L, SEEK_END); - size = ftell(fp); - fseek(fp, 0L, SEEK_SET); - - text = (char *) ralloc_size(ctx, size + 1); - if (text != NULL) { - do { - size_t bytes = fread(text + total_read, - 1, size - total_read, fp); - if (bytes < size - total_read) { - free(text); - text = NULL; - goto error; - } - - if (bytes == 0) { - break; - } - - total_read += bytes; - } while (total_read < size); - - text[total_read] = '\0'; -error:; - } - - fclose(fp); - - return text; -} - -int dump_ast = 0; -int dump_hir = 0; -int dump_lir = 0; -int do_link = 0; - -const struct option compiler_opts[] = { - { "dump-ast", no_argument, &dump_ast, 1 }, - { "dump-hir", no_argument, &dump_hir, 1 }, - { "dump-lir", no_argument, &dump_lir, 1 }, - { "link", no_argument, &do_link, 1 }, - { "version", required_argument, NULL, 'v' }, - { NULL, 0, NULL, 0 } -}; - -/** - * \brief Print proper usage and exit with failure. - */ -void -usage_fail(const char *name) -{ - - const char *header = - "usage: %s [options] \n" - "\n" - "Possible options are:\n"; - printf(header, name); - for (const struct option *o = compiler_opts; o->name != 0; ++o) { - printf(" --%s\n", o->name); - } - exit(EXIT_FAILURE); -} - - -void -compile_shader(struct gl_context *ctx, struct gl_shader *shader) -{ - struct _mesa_glsl_parse_state *state = - new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); - - _mesa_glsl_compile_shader(ctx, shader, dump_ast, dump_hir); - - /* Print out the resulting IR */ - if (!state->error && dump_lir) { - _mesa_print_ir(stdout, shader->ir, state); - } - - return; -} - -int -main(int argc, char **argv) -{ - int status = EXIT_SUCCESS; - struct gl_context local_ctx; - struct gl_context *ctx = &local_ctx; - bool glsl_es = false; - - int c; - int idx = 0; - while ((c = getopt_long(argc, argv, "", compiler_opts, &idx)) != -1) { - switch (c) { - case 'v': - glsl_version = strtol(optarg, NULL, 10); - switch (glsl_version) { - case 100: - case 300: - glsl_es = true; - break; - case 110: - case 120: - case 130: - case 140: - case 150: - case 330: - glsl_es = false; - break; - default: - fprintf(stderr, "Unrecognized GLSL version `%s'\n", optarg); - usage_fail(argv[0]); - break; - } - break; - default: - break; - } - } - - - if (argc <= optind) - usage_fail(argv[0]); - - initialize_context(ctx, (glsl_es) ? API_OPENGLES2 : API_OPENGL_COMPAT); - - struct gl_shader_program *whole_program; - - whole_program = rzalloc (NULL, struct gl_shader_program); - assert(whole_program != NULL); - whole_program->InfoLog = ralloc_strdup(whole_program, ""); - - /* Created just to avoid segmentation faults */ - whole_program->AttributeBindings = new string_to_uint_map; - whole_program->FragDataBindings = new string_to_uint_map; - whole_program->FragDataIndexBindings = new string_to_uint_map; - - for (/* empty */; argc > optind; optind++) { - whole_program->Shaders = - reralloc(whole_program, whole_program->Shaders, - struct gl_shader *, whole_program->NumShaders + 1); - assert(whole_program->Shaders != NULL); - - struct gl_shader *shader = rzalloc(whole_program, gl_shader); - - whole_program->Shaders[whole_program->NumShaders] = shader; - whole_program->NumShaders++; - - const unsigned len = strlen(argv[optind]); - if (len < 6) - usage_fail(argv[0]); - - const char *const ext = & argv[optind][len - 5]; - if (strncmp(".vert", ext, 5) == 0 || strncmp(".glsl", ext, 5) == 0) - shader->Type = GL_VERTEX_SHADER; - else if (strncmp(".tesc", ext, 5) == 0) - shader->Type = GL_TESS_CONTROL_SHADER; - else if (strncmp(".tese", ext, 5) == 0) - shader->Type = GL_TESS_EVALUATION_SHADER; - else if (strncmp(".geom", ext, 5) == 0) - shader->Type = GL_GEOMETRY_SHADER; - else if (strncmp(".frag", ext, 5) == 0) - shader->Type = GL_FRAGMENT_SHADER; - else if (strncmp(".comp", ext, 5) == 0) - shader->Type = GL_COMPUTE_SHADER; - else - usage_fail(argv[0]); - shader->Stage = _mesa_shader_enum_to_shader_stage(shader->Type); - - shader->Source = load_text_file(whole_program, argv[optind]); - if (shader->Source == NULL) { - printf("File \"%s\" does not exist.\n", argv[optind]); - exit(EXIT_FAILURE); - } - - compile_shader(ctx, shader); - - if (strlen(shader->InfoLog) > 0) - printf("Info log for %s:\n%s\n", argv[optind], shader->InfoLog); - - if (!shader->CompileStatus) { - status = EXIT_FAILURE; - break; - } - } - - if ((status == EXIT_SUCCESS) && do_link) { - _mesa_clear_shader_program_data(whole_program); - - link_shaders(ctx, whole_program); - status = (whole_program->LinkStatus) ? EXIT_SUCCESS : EXIT_FAILURE; - - if (strlen(whole_program->InfoLog) > 0) - printf("Info log for linking:\n%s\n", whole_program->InfoLog); - } - - for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) - ralloc_free(whole_program->_LinkedShaders[i]); - - delete whole_program->AttributeBindings; - delete whole_program->FragDataBindings; - delete whole_program->FragDataIndexBindings; - - ralloc_free(whole_program); - _mesa_glsl_release_types(); - _mesa_glsl_release_builtin_functions(); - - return status; -} diff --git a/src/glsl/opt_algebraic.cpp b/src/glsl/opt_algebraic.cpp deleted file mode 100644 index 1e58062cb0d..00000000000 --- a/src/glsl/opt_algebraic.cpp +++ /dev/null @@ -1,984 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_algebraic.cpp - * - * Takes advantage of association, commutivity, and other algebraic - * properties to simplify expressions. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "ir_optimization.h" -#include "ir_builder.h" -#include "compiler/glsl_types.h" - -using namespace ir_builder; - -namespace { - -/** - * Visitor class for replacing expressions with ir_constant values. - */ - -class ir_algebraic_visitor : public ir_rvalue_visitor { -public: - ir_algebraic_visitor(bool native_integers, - const struct gl_shader_compiler_options *options) - : options(options) - { - this->progress = false; - this->mem_ctx = NULL; - this->native_integers = native_integers; - } - - virtual ~ir_algebraic_visitor() - { - } - - ir_rvalue *handle_expression(ir_expression *ir); - void handle_rvalue(ir_rvalue **rvalue); - bool reassociate_constant(ir_expression *ir1, - int const_index, - ir_constant *constant, - ir_expression *ir2); - void reassociate_operands(ir_expression *ir1, - int op1, - ir_expression *ir2, - int op2); - ir_rvalue *swizzle_if_required(ir_expression *expr, - ir_rvalue *operand); - - const struct gl_shader_compiler_options *options; - void *mem_ctx; - - bool native_integers; - bool progress; -}; - -} /* unnamed namespace */ - -static inline bool -is_vec_zero(ir_constant *ir) -{ - return (ir == NULL) ? false : ir->is_zero(); -} - -static inline bool -is_vec_one(ir_constant *ir) -{ - return (ir == NULL) ? false : ir->is_one(); -} - -static inline bool -is_vec_two(ir_constant *ir) -{ - return (ir == NULL) ? false : ir->is_value(2.0, 2); -} - -static inline bool -is_vec_four(ir_constant *ir) -{ - return (ir == NULL) ? false : ir->is_value(4.0, 4); -} - -static inline bool -is_vec_negative_one(ir_constant *ir) -{ - return (ir == NULL) ? false : ir->is_negative_one(); -} - -static inline bool -is_valid_vec_const(ir_constant *ir) -{ - if (ir == NULL) - return false; - - if (!ir->type->is_scalar() && !ir->type->is_vector()) - return false; - - return true; -} - -static inline bool -is_less_than_one(ir_constant *ir) -{ - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - - if (!is_valid_vec_const(ir)) - return false; - - unsigned component = 0; - for (int c = 0; c < ir->type->vector_elements; c++) { - if (ir->get_float_component(c) < 1.0f) - component++; - } - - return (component == ir->type->vector_elements); -} - -static inline bool -is_greater_than_zero(ir_constant *ir) -{ - assert(ir->type->base_type == GLSL_TYPE_FLOAT); - - if (!is_valid_vec_const(ir)) - return false; - - unsigned component = 0; - for (int c = 0; c < ir->type->vector_elements; c++) { - if (ir->get_float_component(c) > 0.0f) - component++; - } - - return (component == ir->type->vector_elements); -} - -static void -update_type(ir_expression *ir) -{ - if (ir->operands[0]->type->is_vector()) - ir->type = ir->operands[0]->type; - else - ir->type = ir->operands[1]->type; -} - -/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ -static ir_expression * -try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx) -{ - if (expr0 && expr0->operation == ir_binop_add && - expr0->type->is_float() && - expr1 && expr1->operation == ir_binop_add && - expr1->type->is_float()) { - ir_swizzle *x = expr0->operands[0]->as_swizzle(); - ir_swizzle *y = expr0->operands[1]->as_swizzle(); - ir_swizzle *z = expr1->operands[0]->as_swizzle(); - ir_swizzle *w = expr1->operands[1]->as_swizzle(); - - if (!x || x->mask.num_components != 1 || - !y || y->mask.num_components != 1 || - !z || z->mask.num_components != 1 || - !w || w->mask.num_components != 1) { - return NULL; - } - - bool swiz_seen[4] = {false, false, false, false}; - swiz_seen[x->mask.x] = true; - swiz_seen[y->mask.x] = true; - swiz_seen[z->mask.x] = true; - swiz_seen[w->mask.x] = true; - - if (!swiz_seen[0] || !swiz_seen[1] || - !swiz_seen[2] || !swiz_seen[3]) { - return NULL; - } - - if (x->val->equals(y->val) && - x->val->equals(z->val) && - x->val->equals(w->val)) { - return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); - } - } - return NULL; -} - -void -ir_algebraic_visitor::reassociate_operands(ir_expression *ir1, - int op1, - ir_expression *ir2, - int op2) -{ - ir_rvalue *temp = ir2->operands[op2]; - ir2->operands[op2] = ir1->operands[op1]; - ir1->operands[op1] = temp; - - /* Update the type of ir2. The type of ir1 won't have changed -- - * base types matched, and at least one of the operands of the 2 - * binops is still a vector if any of them were. - */ - update_type(ir2); - - this->progress = true; -} - -/** - * Reassociates a constant down a tree of adds or multiplies. - * - * Consider (2 * (a * (b * 0.5))). We want to send up with a * b. - */ -bool -ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index, - ir_constant *constant, - ir_expression *ir2) -{ - if (!ir2 || ir1->operation != ir2->operation) - return false; - - /* Don't want to even think about matrices. */ - if (ir1->operands[0]->type->is_matrix() || - ir1->operands[1]->type->is_matrix() || - ir2->operands[0]->type->is_matrix() || - ir2->operands[1]->type->is_matrix()) - return false; - - ir_constant *ir2_const[2]; - ir2_const[0] = ir2->operands[0]->constant_expression_value(); - ir2_const[1] = ir2->operands[1]->constant_expression_value(); - - if (ir2_const[0] && ir2_const[1]) - return false; - - if (ir2_const[0]) { - reassociate_operands(ir1, const_index, ir2, 1); - return true; - } else if (ir2_const[1]) { - reassociate_operands(ir1, const_index, ir2, 0); - return true; - } - - if (reassociate_constant(ir1, const_index, constant, - ir2->operands[0]->as_expression())) { - update_type(ir2); - return true; - } - - if (reassociate_constant(ir1, const_index, constant, - ir2->operands[1]->as_expression())) { - update_type(ir2); - return true; - } - - return false; -} - -/* When eliminating an expression and just returning one of its operands, - * we may need to swizzle that operand out to a vector if the expression was - * vector type. - */ -ir_rvalue * -ir_algebraic_visitor::swizzle_if_required(ir_expression *expr, - ir_rvalue *operand) -{ - if (expr->type->is_vector() && operand->type->is_scalar()) { - return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0, - expr->type->vector_elements); - } else - return operand; -} - -ir_rvalue * -ir_algebraic_visitor::handle_expression(ir_expression *ir) -{ - ir_constant *op_const[4] = {NULL, NULL, NULL, NULL}; - ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL}; - unsigned int i; - - if (ir->operation == ir_binop_mul && - ir->operands[0]->type->is_matrix() && - ir->operands[1]->type->is_vector()) { - ir_expression *matrix_mul = ir->operands[0]->as_expression(); - - if (matrix_mul && matrix_mul->operation == ir_binop_mul && - matrix_mul->operands[0]->type->is_matrix() && - matrix_mul->operands[1]->type->is_matrix()) { - - return mul(matrix_mul->operands[0], - mul(matrix_mul->operands[1], ir->operands[1])); - } - } - - assert(ir->get_num_operands() <= 4); - for (i = 0; i < ir->get_num_operands(); i++) { - if (ir->operands[i]->type->is_matrix()) - return ir; - - op_const[i] = ir->operands[i]->constant_expression_value(); - op_expr[i] = ir->operands[i]->as_expression(); - } - - if (this->mem_ctx == NULL) - this->mem_ctx = ralloc_parent(ir); - - switch (ir->operation) { - case ir_unop_bit_not: - if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not) - return op_expr[0]->operands[0]; - break; - - case ir_unop_abs: - if (op_expr[0] == NULL) - break; - - switch (op_expr[0]->operation) { - case ir_unop_abs: - case ir_unop_neg: - return abs(op_expr[0]->operands[0]); - default: - break; - } - break; - - case ir_unop_neg: - if (op_expr[0] == NULL) - break; - - if (op_expr[0]->operation == ir_unop_neg) { - return op_expr[0]->operands[0]; - } - break; - - case ir_unop_exp: - if (op_expr[0] == NULL) - break; - - if (op_expr[0]->operation == ir_unop_log) { - return op_expr[0]->operands[0]; - } - break; - - case ir_unop_log: - if (op_expr[0] == NULL) - break; - - if (op_expr[0]->operation == ir_unop_exp) { - return op_expr[0]->operands[0]; - } - break; - - case ir_unop_exp2: - if (op_expr[0] == NULL) - break; - - if (op_expr[0]->operation == ir_unop_log2) { - return op_expr[0]->operands[0]; - } - - if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) { - for (int log2_pos = 0; log2_pos < 2; log2_pos++) { - ir_expression *log2_expr = - op_expr[0]->operands[log2_pos]->as_expression(); - - if (log2_expr && log2_expr->operation == ir_unop_log2) { - return new(mem_ctx) ir_expression(ir_binop_pow, - ir->type, - log2_expr->operands[0], - op_expr[0]->operands[1 - log2_pos]); - } - } - } - break; - - case ir_unop_log2: - if (op_expr[0] == NULL) - break; - - if (op_expr[0]->operation == ir_unop_exp2) { - return op_expr[0]->operands[0]; - } - break; - - case ir_unop_f2i: - case ir_unop_f2u: - if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) { - return new(mem_ctx) ir_expression(ir->operation, - ir->type, - op_expr[0]->operands[0]); - } - break; - - case ir_unop_logic_not: { - enum ir_expression_operation new_op = ir_unop_logic_not; - - if (op_expr[0] == NULL) - break; - - switch (op_expr[0]->operation) { - case ir_binop_less: new_op = ir_binop_gequal; break; - case ir_binop_greater: new_op = ir_binop_lequal; break; - case ir_binop_lequal: new_op = ir_binop_greater; break; - case ir_binop_gequal: new_op = ir_binop_less; break; - case ir_binop_equal: new_op = ir_binop_nequal; break; - case ir_binop_nequal: new_op = ir_binop_equal; break; - case ir_binop_all_equal: new_op = ir_binop_any_nequal; break; - case ir_binop_any_nequal: new_op = ir_binop_all_equal; break; - - default: - /* The default case handler is here to silence a warning from GCC. - */ - break; - } - - if (new_op != ir_unop_logic_not) { - return new(mem_ctx) ir_expression(new_op, - ir->type, - op_expr[0]->operands[0], - op_expr[0]->operands[1]); - } - - break; - } - - case ir_unop_saturate: - if (op_expr[0] && op_expr[0]->operation == ir_binop_add) { - ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression(); - ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression(); - - if (b2f_0 && b2f_0->operation == ir_unop_b2f && - b2f_1 && b2f_1->operation == ir_unop_b2f) { - return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0])); - } - } - break; - - case ir_binop_add: - if (is_vec_zero(op_const[0])) - return ir->operands[1]; - if (is_vec_zero(op_const[1])) - return ir->operands[0]; - - /* Reassociate addition of constants so that we can do constant - * folding. - */ - if (op_const[0] && !op_const[1]) - reassociate_constant(ir, 0, op_const[0], op_expr[1]); - if (op_const[1] && !op_const[0]) - reassociate_constant(ir, 1, op_const[1], op_expr[0]); - - /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ - if (options->OptimizeForAOS) { - ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], - mem_ctx); - if (expr) - return expr; - } - - /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). - * - * (-x + y) * a + x - * (x * -a) + (y * a) + x - * x + (x * -a) + (y * a) - * x * (1 - a) + y * a - * lrp(x, y, a) - */ - for (int mul_pos = 0; mul_pos < 2; mul_pos++) { - ir_expression *mul = op_expr[mul_pos]; - - if (!mul || mul->operation != ir_binop_mul) - continue; - - /* Multiply found on one of the operands. Now check for an - * inner addition operation. - */ - for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) { - ir_expression *inner_add = - mul->operands[inner_add_pos]->as_expression(); - - if (!inner_add || inner_add->operation != ir_binop_add) - continue; - - /* Inner addition found on one of the operands. Now check for - * one of the operands of the inner addition to be the negative - * of x_operand. - */ - for (int neg_pos = 0; neg_pos < 2; neg_pos++) { - ir_expression *neg = - inner_add->operands[neg_pos]->as_expression(); - - if (!neg || neg->operation != ir_unop_neg) - continue; - - ir_rvalue *x_operand = ir->operands[1 - mul_pos]; - - if (!neg->operands[0]->equals(x_operand)) - continue; - - ir_rvalue *y_operand = inner_add->operands[1 - neg_pos]; - ir_rvalue *a_operand = mul->operands[1 - inner_add_pos]; - - if (x_operand->type != y_operand->type || - x_operand->type != a_operand->type) - continue; - - return lrp(x_operand, y_operand, a_operand); - } - } - } - - break; - - case ir_binop_sub: - if (is_vec_zero(op_const[0])) - return neg(ir->operands[1]); - if (is_vec_zero(op_const[1])) - return ir->operands[0]; - break; - - case ir_binop_mul: - if (is_vec_one(op_const[0])) - return ir->operands[1]; - if (is_vec_one(op_const[1])) - return ir->operands[0]; - - if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) - return ir_constant::zero(ir, ir->type); - - if (is_vec_negative_one(op_const[0])) - return neg(ir->operands[1]); - if (is_vec_negative_one(op_const[1])) - return neg(ir->operands[0]); - - if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f && - op_expr[1] && op_expr[1]->operation == ir_unop_b2f) { - return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0])); - } - - /* Reassociate multiplication of constants so that we can do - * constant folding. - */ - if (op_const[0] && !op_const[1]) - reassociate_constant(ir, 0, op_const[0], op_expr[1]); - if (op_const[1] && !op_const[0]) - reassociate_constant(ir, 1, op_const[1], op_expr[0]); - - /* Optimizes - * - * (mul (floor (add (abs x) 0.5) (sign x))) - * - * into - * - * (trunc (add x (mul (sign x) 0.5))) - */ - for (int i = 0; i < 2; i++) { - ir_expression *sign_expr = ir->operands[i]->as_expression(); - ir_expression *floor_expr = ir->operands[1 - i]->as_expression(); - - if (!sign_expr || sign_expr->operation != ir_unop_sign || - !floor_expr || floor_expr->operation != ir_unop_floor) - continue; - - ir_expression *add_expr = floor_expr->operands[0]->as_expression(); - if (!add_expr || add_expr->operation != ir_binop_add) - continue; - - for (int j = 0; j < 2; j++) { - ir_expression *abs_expr = add_expr->operands[j]->as_expression(); - if (!abs_expr || abs_expr->operation != ir_unop_abs) - continue; - - ir_constant *point_five = add_expr->operands[1 - j]->as_constant(); - if (!point_five || !point_five->is_value(0.5, 0)) - continue; - - if (abs_expr->operands[0]->equals(sign_expr->operands[0])) { - return trunc(add(abs_expr->operands[0], - mul(sign_expr, point_five))); - } - } - } - break; - - case ir_binop_div: - if (is_vec_one(op_const[0]) && ( - ir->type->base_type == GLSL_TYPE_FLOAT || - ir->type->base_type == GLSL_TYPE_DOUBLE)) { - return new(mem_ctx) ir_expression(ir_unop_rcp, - ir->operands[1]->type, - ir->operands[1], - NULL); - } - if (is_vec_one(op_const[1])) - return ir->operands[0]; - break; - - case ir_binop_dot: - if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) - return ir_constant::zero(mem_ctx, ir->type); - - for (int i = 0; i < 2; i++) { - if (!op_const[i]) - continue; - - unsigned components[4] = { 0 }, count = 0; - - for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) { - if (op_const[i]->is_zero()) - continue; - - components[count] = c; - count++; - } - - /* No channels had zero values; bail. */ - if (count >= op_const[i]->type->vector_elements) - break; - - ir_expression_operation op = count == 1 ? - ir_binop_mul : ir_binop_dot; - - /* Swizzle both operands to remove the channels that were zero. */ - return new(mem_ctx) - ir_expression(op, ir->type, - new(mem_ctx) ir_swizzle(ir->operands[0], - components, count), - new(mem_ctx) ir_swizzle(ir->operands[1], - components, count)); - } - break; - - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_nequal: - for (int add_pos = 0; add_pos < 2; add_pos++) { - ir_expression *add = op_expr[add_pos]; - - if (!add || add->operation != ir_binop_add) - continue; - - ir_constant *zero = op_const[1 - add_pos]; - if (!is_vec_zero(zero)) - continue; - - /* Depending of the zero position we want to optimize - * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y) - */ - if (add_pos == 1) { - return new(mem_ctx) ir_expression(ir->operation, - neg(add->operands[0]), - add->operands[1]); - } else { - return new(mem_ctx) ir_expression(ir->operation, - add->operands[0], - neg(add->operands[1])); - } - } - break; - - case ir_binop_all_equal: - case ir_binop_any_nequal: - if (ir->operands[0]->type->is_scalar() && - ir->operands[1]->type->is_scalar()) - return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal - ? ir_binop_equal : ir_binop_nequal, - ir->operands[0], - ir->operands[1]); - break; - - case ir_binop_rshift: - case ir_binop_lshift: - /* 0 >> x == 0 */ - if (is_vec_zero(op_const[0])) - return ir->operands[0]; - /* x >> 0 == x */ - if (is_vec_zero(op_const[1])) - return ir->operands[0]; - break; - - case ir_binop_logic_and: - if (is_vec_one(op_const[0])) { - return ir->operands[1]; - } else if (is_vec_one(op_const[1])) { - return ir->operands[0]; - } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { - return ir_constant::zero(mem_ctx, ir->type); - } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && - op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { - /* De Morgan's Law: - * (not A) and (not B) === not (A or B) - */ - return logic_not(logic_or(op_expr[0]->operands[0], - op_expr[1]->operands[0])); - } else if (ir->operands[0]->equals(ir->operands[1])) { - /* (a && a) == a */ - return ir->operands[0]; - } - break; - - case ir_binop_logic_xor: - if (is_vec_zero(op_const[0])) { - return ir->operands[1]; - } else if (is_vec_zero(op_const[1])) { - return ir->operands[0]; - } else if (is_vec_one(op_const[0])) { - return logic_not(ir->operands[1]); - } else if (is_vec_one(op_const[1])) { - return logic_not(ir->operands[0]); - } else if (ir->operands[0]->equals(ir->operands[1])) { - /* (a ^^ a) == false */ - return ir_constant::zero(mem_ctx, ir->type); - } - break; - - case ir_binop_logic_or: - if (is_vec_zero(op_const[0])) { - return ir->operands[1]; - } else if (is_vec_zero(op_const[1])) { - return ir->operands[0]; - } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) { - ir_constant_data data; - - for (unsigned i = 0; i < 16; i++) - data.b[i] = true; - - return new(mem_ctx) ir_constant(ir->type, &data); - } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && - op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { - /* De Morgan's Law: - * (not A) or (not B) === not (A and B) - */ - return logic_not(logic_and(op_expr[0]->operands[0], - op_expr[1]->operands[0])); - } else if (ir->operands[0]->equals(ir->operands[1])) { - /* (a || a) == a */ - return ir->operands[0]; - } - break; - - case ir_binop_pow: - /* 1^x == 1 */ - if (is_vec_one(op_const[0])) - return op_const[0]; - - /* x^1 == x */ - if (is_vec_one(op_const[1])) - return ir->operands[0]; - - /* pow(2,x) == exp2(x) */ - if (is_vec_two(op_const[0])) - return expr(ir_unop_exp2, ir->operands[1]); - - if (is_vec_two(op_const[1])) { - ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", - ir_var_temporary); - base_ir->insert_before(x); - base_ir->insert_before(assign(x, ir->operands[0])); - return mul(x, x); - } - - if (is_vec_four(op_const[1])) { - ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", - ir_var_temporary); - base_ir->insert_before(x); - base_ir->insert_before(assign(x, ir->operands[0])); - - ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type, - "squared", - ir_var_temporary); - base_ir->insert_before(squared); - base_ir->insert_before(assign(squared, mul(x, x))); - return mul(squared, squared); - } - - break; - - case ir_binop_min: - case ir_binop_max: - if (ir->type->base_type != GLSL_TYPE_FLOAT || options->EmitNoSat) - break; - - /* Replace min(max) operations and its commutative combinations with - * a saturate operation - */ - for (int op = 0; op < 2; op++) { - ir_expression *inner_expr = op_expr[op]; - ir_constant *outer_const = op_const[1 - op]; - ir_expression_operation op_cond = (ir->operation == ir_binop_max) ? - ir_binop_min : ir_binop_max; - - if (!inner_expr || !outer_const || (inner_expr->operation != op_cond)) - continue; - - /* One of these has to be a constant */ - if (!inner_expr->operands[0]->as_constant() && - !inner_expr->operands[1]->as_constant()) - break; - - /* Found a min(max) combination. Now try to see if its operands - * meet our conditions that we can do just a single saturate operation - */ - for (int minmax_op = 0; minmax_op < 2; minmax_op++) { - ir_rvalue *x = inner_expr->operands[minmax_op]; - ir_rvalue *y = inner_expr->operands[1 - minmax_op]; - - ir_constant *inner_const = y->as_constant(); - if (!inner_const) - continue; - - /* min(max(x, 0.0), 1.0) is sat(x) */ - if (ir->operation == ir_binop_min && - inner_const->is_zero() && - outer_const->is_one()) - return saturate(x); - - /* max(min(x, 1.0), 0.0) is sat(x) */ - if (ir->operation == ir_binop_max && - inner_const->is_one() && - outer_const->is_zero()) - return saturate(x); - - /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */ - if (ir->operation == ir_binop_min && - inner_const->is_zero() && - is_less_than_one(outer_const)) - return saturate(expr(ir_binop_min, x, outer_const)); - - /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */ - if (ir->operation == ir_binop_max && - is_less_than_one(inner_const) && - outer_const->is_zero()) - return saturate(expr(ir_binop_min, x, inner_const)); - - /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */ - if (ir->operation == ir_binop_max && - inner_const->is_one() && - is_greater_than_zero(outer_const)) - return saturate(expr(ir_binop_max, x, outer_const)); - - /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */ - if (ir->operation == ir_binop_min && - is_greater_than_zero(inner_const) && - outer_const->is_one()) - return saturate(expr(ir_binop_max, x, inner_const)); - } - } - - break; - - case ir_unop_rcp: - if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) - return op_expr[0]->operands[0]; - - if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 || - op_expr[0]->operation == ir_unop_exp)) { - return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type, - neg(op_expr[0]->operands[0])); - } - - /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at - * its IR level, so we can always apply this transformation. - */ - if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq) - return sqrt(op_expr[0]->operands[0]); - - /* As far as we know, all backends are OK with rsq. */ - if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) { - return rsq(op_expr[0]->operands[0]); - } - - break; - - case ir_triop_fma: - /* Operands are op0 * op1 + op2. */ - if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { - return ir->operands[2]; - } else if (is_vec_zero(op_const[2])) { - return mul(ir->operands[0], ir->operands[1]); - } else if (is_vec_one(op_const[0])) { - return add(ir->operands[1], ir->operands[2]); - } else if (is_vec_one(op_const[1])) { - return add(ir->operands[0], ir->operands[2]); - } - break; - - case ir_triop_lrp: - /* Operands are (x, y, a). */ - if (is_vec_zero(op_const[2])) { - return ir->operands[0]; - } else if (is_vec_one(op_const[2])) { - return ir->operands[1]; - } else if (ir->operands[0]->equals(ir->operands[1])) { - return ir->operands[0]; - } else if (is_vec_zero(op_const[0])) { - return mul(ir->operands[1], ir->operands[2]); - } else if (is_vec_zero(op_const[1])) { - unsigned op2_components = ir->operands[2]->type->vector_elements; - ir_constant *one; - - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - one = new(mem_ctx) ir_constant(1.0f, op2_components); - break; - case GLSL_TYPE_DOUBLE: - one = new(mem_ctx) ir_constant(1.0, op2_components); - break; - default: - one = NULL; - unreachable("unexpected type"); - } - - return mul(ir->operands[0], add(one, neg(ir->operands[2]))); - } - break; - - case ir_triop_csel: - if (is_vec_one(op_const[0])) - return ir->operands[1]; - if (is_vec_zero(op_const[0])) - return ir->operands[2]; - break; - - default: - break; - } - - return ir; -} - -void -ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_expression *expr = (*rvalue)->as_expression(); - if (!expr || expr->operation == ir_quadop_vector) - return; - - ir_rvalue *new_rvalue = handle_expression(expr); - if (new_rvalue == *rvalue) - return; - - /* If the expr used to be some vec OP scalar returning a vector, and the - * optimization gave us back a scalar, we still need to turn it into a - * vector. - */ - *rvalue = swizzle_if_required(expr, new_rvalue); - - this->progress = true; -} - -bool -do_algebraic(exec_list *instructions, bool native_integers, - const struct gl_shader_compiler_options *options) -{ - ir_algebraic_visitor v(native_integers, options); - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/opt_array_splitting.cpp b/src/glsl/opt_array_splitting.cpp deleted file mode 100644 index cceec6b6431..00000000000 --- a/src/glsl/opt_array_splitting.cpp +++ /dev/null @@ -1,408 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_array_splitting.cpp - * - * If an array is always dereferenced with a constant index, then - * split it apart into its elements, making it more amenable to other - * optimization passes. - * - * This skips uniform/varying arrays, which would need careful - * handling due to their ir->location fields tying them to the GL API - * and other shader stages. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "compiler/glsl_types.h" - -static bool debug = false; - -namespace { - -namespace opt_array_splitting { - -class variable_entry : public exec_node -{ -public: - variable_entry(ir_variable *var) - { - this->var = var; - this->split = true; - this->declaration = false; - this->components = NULL; - this->mem_ctx = NULL; - if (var->type->is_array()) - this->size = var->type->length; - else - this->size = var->type->matrix_columns; - } - - ir_variable *var; /* The key: the variable's pointer. */ - unsigned size; /* array length or matrix columns */ - - /** Whether this array should be split or not. */ - bool split; - - /* If the variable had a decl we can work with in the instruction - * stream. We can't do splitting on function arguments, which - * don't get this variable set. - */ - bool declaration; - - ir_variable **components; - - /** ralloc_parent(this->var) -- the shader's talloc context. */ - void *mem_ctx; -}; - -} /* namespace */ - -using namespace opt_array_splitting; - -/** - * This class does a walk over the tree, coming up with the set of - * variables that could be split by looking to see if they are arrays - * that are only ever constant-index dereferenced. - */ -class ir_array_reference_visitor : public ir_hierarchical_visitor { -public: - ir_array_reference_visitor(void) - { - this->mem_ctx = ralloc_context(NULL); - this->variable_list.make_empty(); - } - - ~ir_array_reference_visitor(void) - { - ralloc_free(mem_ctx); - } - - bool get_split_list(exec_list *instructions, bool linked); - - virtual ir_visitor_status visit(ir_variable *); - virtual ir_visitor_status visit(ir_dereference_variable *); - virtual ir_visitor_status visit_enter(ir_dereference_array *); - virtual ir_visitor_status visit_enter(ir_function_signature *); - - variable_entry *get_variable_entry(ir_variable *var); - - /* List of variable_entry */ - exec_list variable_list; - - void *mem_ctx; -}; - -} /* namespace */ - -variable_entry * -ir_array_reference_visitor::get_variable_entry(ir_variable *var) -{ - assert(var); - - if (var->data.mode != ir_var_auto && - var->data.mode != ir_var_temporary) - return NULL; - - if (!(var->type->is_array() || var->type->is_matrix())) - return NULL; - - /* If the array hasn't been sized yet, we can't split it. After - * linking, this should be resolved. - */ - if (var->type->is_unsized_array()) - return NULL; - - foreach_in_list(variable_entry, entry, &this->variable_list) { - if (entry->var == var) - return entry; - } - - variable_entry *entry = new(mem_ctx) variable_entry(var); - this->variable_list.push_tail(entry); - return entry; -} - - -ir_visitor_status -ir_array_reference_visitor::visit(ir_variable *ir) -{ - variable_entry *entry = this->get_variable_entry(ir); - - if (entry) - entry->declaration = true; - - return visit_continue; -} - -ir_visitor_status -ir_array_reference_visitor::visit(ir_dereference_variable *ir) -{ - variable_entry *entry = this->get_variable_entry(ir->var); - - /* If we made it to here without seeing an ir_dereference_array, - * then the dereference of this array didn't have a constant index - * (see the visit_continue_with_parent below), so we can't split - * the variable. - */ - if (entry) - entry->split = false; - - return visit_continue; -} - -ir_visitor_status -ir_array_reference_visitor::visit_enter(ir_dereference_array *ir) -{ - ir_dereference_variable *deref = ir->array->as_dereference_variable(); - if (!deref) - return visit_continue; - - variable_entry *entry = this->get_variable_entry(deref->var); - - /* If the access to the array has a variable index, we wouldn't - * know which split variable this dereference should go to. - */ - if (entry && !ir->array_index->as_constant()) - entry->split = false; - - /* If the index is also array dereference, visit index. */ - if (ir->array_index->as_dereference_array()) - visit_enter(ir->array_index->as_dereference_array()); - - return visit_continue_with_parent; -} - -ir_visitor_status -ir_array_reference_visitor::visit_enter(ir_function_signature *ir) -{ - /* We don't have logic for array-splitting function arguments, - * so just look at the body instructions and not the parameter - * declarations. - */ - visit_list_elements(this, &ir->body); - return visit_continue_with_parent; -} - -bool -ir_array_reference_visitor::get_split_list(exec_list *instructions, - bool linked) -{ - visit_list_elements(this, instructions); - - /* If the shaders aren't linked yet, we can't mess with global - * declarations, which need to be matched by name across shaders. - */ - if (!linked) { - foreach_in_list(ir_instruction, node, instructions) { - ir_variable *var = node->as_variable(); - if (var) { - variable_entry *entry = get_variable_entry(var); - if (entry) - entry->remove(); - } - } - } - - /* Trim out variables we found that we can't split. */ - foreach_in_list_safe(variable_entry, entry, &variable_list) { - if (debug) { - printf("array %s@%p: decl %d, split %d\n", - entry->var->name, (void *) entry->var, entry->declaration, - entry->split); - } - - if (!(entry->declaration && entry->split)) { - entry->remove(); - } - } - - return !variable_list.is_empty(); -} - -/** - * This class rewrites the dereferences of arrays that have been split - * to use the newly created ir_variables for each component. - */ -class ir_array_splitting_visitor : public ir_rvalue_visitor { -public: - ir_array_splitting_visitor(exec_list *vars) - { - this->variable_list = vars; - } - - virtual ~ir_array_splitting_visitor() - { - } - - virtual ir_visitor_status visit_leave(ir_assignment *); - - void split_deref(ir_dereference **deref); - void handle_rvalue(ir_rvalue **rvalue); - variable_entry *get_splitting_entry(ir_variable *var); - - exec_list *variable_list; -}; - -variable_entry * -ir_array_splitting_visitor::get_splitting_entry(ir_variable *var) -{ - assert(var); - - foreach_in_list(variable_entry, entry, this->variable_list) { - if (entry->var == var) { - return entry; - } - } - - return NULL; -} - -void -ir_array_splitting_visitor::split_deref(ir_dereference **deref) -{ - ir_dereference_array *deref_array = (*deref)->as_dereference_array(); - if (!deref_array) - return; - - ir_dereference_variable *deref_var = deref_array->array->as_dereference_variable(); - if (!deref_var) - return; - ir_variable *var = deref_var->var; - - variable_entry *entry = get_splitting_entry(var); - if (!entry) - return; - - ir_constant *constant = deref_array->array_index->as_constant(); - assert(constant); - - if (constant->value.i[0] >= 0 && constant->value.i[0] < (int)entry->size) { - *deref = new(entry->mem_ctx) - ir_dereference_variable(entry->components[constant->value.i[0]]); - } else { - /* There was a constant array access beyond the end of the - * array. This might have happened due to constant folding - * after the initial parse. This produces an undefined value, - * but shouldn't crash. Just give them an uninitialized - * variable. - */ - ir_variable *temp = new(entry->mem_ctx) ir_variable(deref_array->type, - "undef", - ir_var_temporary); - entry->components[0]->insert_before(temp); - *deref = new(entry->mem_ctx) ir_dereference_variable(temp); - } -} - -void -ir_array_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_dereference *deref = (*rvalue)->as_dereference(); - - if (!deref) - return; - - split_deref(&deref); - *rvalue = deref; -} - -ir_visitor_status -ir_array_splitting_visitor::visit_leave(ir_assignment *ir) -{ - /* The normal rvalue visitor skips the LHS of assignments, but we - * need to process those just the same. - */ - ir_rvalue *lhs = ir->lhs; - - handle_rvalue(&lhs); - ir->lhs = lhs->as_dereference(); - - ir->lhs->accept(this); - - handle_rvalue(&ir->rhs); - ir->rhs->accept(this); - - if (ir->condition) { - handle_rvalue(&ir->condition); - ir->condition->accept(this); - } - - return visit_continue; -} - -bool -optimize_split_arrays(exec_list *instructions, bool linked) -{ - ir_array_reference_visitor refs; - if (!refs.get_split_list(instructions, linked)) - return false; - - void *mem_ctx = ralloc_context(NULL); - - /* Replace the decls of the arrays to be split with their split - * components. - */ - foreach_in_list(variable_entry, entry, &refs.variable_list) { - const struct glsl_type *type = entry->var->type; - const struct glsl_type *subtype; - - if (type->is_matrix()) - subtype = type->column_type(); - else - subtype = type->fields.array; - - entry->mem_ctx = ralloc_parent(entry->var); - - entry->components = ralloc_array(mem_ctx, - ir_variable *, - entry->size); - - for (unsigned int i = 0; i < entry->size; i++) { - const char *name = ralloc_asprintf(mem_ctx, "%s_%d", - entry->var->name, i); - - entry->components[i] = - new(entry->mem_ctx) ir_variable(subtype, name, ir_var_temporary); - entry->var->insert_before(entry->components[i]); - } - - entry->var->remove(); - } - - ir_array_splitting_visitor split(&refs.variable_list); - visit_list_elements(&split, instructions); - - if (debug) - _mesa_print_ir(stdout, instructions, NULL); - - ralloc_free(mem_ctx); - - return true; - -} diff --git a/src/glsl/opt_conditional_discard.cpp b/src/glsl/opt_conditional_discard.cpp deleted file mode 100644 index 1ca8803f643..00000000000 --- a/src/glsl/opt_conditional_discard.cpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_conditional_discard.cpp - * - * Replace - * - * if (cond) discard; - * - * with - * - * (discard ) - */ - -#include "compiler/glsl_types.h" -#include "ir.h" - -namespace { - -class opt_conditional_discard_visitor : public ir_hierarchical_visitor { -public: - opt_conditional_discard_visitor() - { - progress = false; - } - - ir_visitor_status visit_leave(ir_if *); - - bool progress; -}; - -} /* anonymous namespace */ - -bool -opt_conditional_discard(exec_list *instructions) -{ - opt_conditional_discard_visitor v; - v.run(instructions); - return v.progress; -} - -ir_visitor_status -opt_conditional_discard_visitor::visit_leave(ir_if *ir) -{ - /* Look for "if (...) discard" with no else clause or extra statements. */ - if (ir->then_instructions.is_empty() || - !ir->then_instructions.head->next->is_tail_sentinel() || - !((ir_instruction *) ir->then_instructions.head)->as_discard() || - !ir->else_instructions.is_empty()) - return visit_continue; - - /* Move the condition and replace the ir_if with the ir_discard. */ - ir_discard *discard = (ir_discard *) ir->then_instructions.head; - discard->condition = ir->condition; - ir->replace_with(discard); - - progress = true; - - return visit_continue; -} diff --git a/src/glsl/opt_constant_folding.cpp b/src/glsl/opt_constant_folding.cpp deleted file mode 100644 index 150a17b2af6..00000000000 --- a/src/glsl/opt_constant_folding.cpp +++ /dev/null @@ -1,190 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_constant_folding.cpp - * Replace constant-valued expressions with references to constant values. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" - -namespace { - -/** - * Visitor class for replacing expressions with ir_constant values. - */ - -class ir_constant_folding_visitor : public ir_rvalue_visitor { -public: - ir_constant_folding_visitor() - { - this->progress = false; - } - - virtual ~ir_constant_folding_visitor() - { - /* empty */ - } - - virtual ir_visitor_status visit_enter(ir_discard *ir); - virtual ir_visitor_status visit_enter(ir_assignment *ir); - virtual ir_visitor_status visit_enter(ir_call *ir); - - virtual void handle_rvalue(ir_rvalue **rvalue); - - bool progress; -}; - -} /* unnamed namespace */ - -void -ir_constant_folding_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (*rvalue == NULL || (*rvalue)->ir_type == ir_type_constant) - return; - - /* Note that we do rvalue visitoring on leaving. So if an - * expression has a non-constant operand, no need to go looking - * down it to find if it's constant. This cuts the time of this - * pass down drastically. - */ - ir_expression *expr = (*rvalue)->as_expression(); - if (expr) { - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - if (!expr->operands[i]->as_constant()) - return; - } - } - - /* Ditto for swizzles. */ - ir_swizzle *swiz = (*rvalue)->as_swizzle(); - if (swiz && !swiz->val->as_constant()) - return; - - ir_constant *constant = (*rvalue)->constant_expression_value(); - if (constant) { - *rvalue = constant; - this->progress = true; - } else { - (*rvalue)->accept(this); - } -} - -ir_visitor_status -ir_constant_folding_visitor::visit_enter(ir_discard *ir) -{ - if (ir->condition) { - ir->condition->accept(this); - handle_rvalue(&ir->condition); - - ir_constant *const_val = ir->condition->as_constant(); - /* If the condition is constant, either remove the condition or - * remove the never-executed assignment. - */ - if (const_val) { - if (const_val->value.b[0]) - ir->condition = NULL; - else - ir->remove(); - this->progress = true; - } - } - - return visit_continue_with_parent; -} - -ir_visitor_status -ir_constant_folding_visitor::visit_enter(ir_assignment *ir) -{ - ir->rhs->accept(this); - handle_rvalue(&ir->rhs); - - if (ir->condition) { - ir->condition->accept(this); - handle_rvalue(&ir->condition); - - ir_constant *const_val = ir->condition->as_constant(); - /* If the condition is constant, either remove the condition or - * remove the never-executed assignment. - */ - if (const_val) { - if (const_val->value.b[0]) - ir->condition = NULL; - else - ir->remove(); - this->progress = true; - } - } - - /* Don't descend into the LHS because we want it to stay as a - * variable dereference. FINISHME: We probably should to get array - * indices though. - */ - return visit_continue_with_parent; -} - -ir_visitor_status -ir_constant_folding_visitor::visit_enter(ir_call *ir) -{ - /* Attempt to constant fold parameters */ - foreach_two_lists(formal_node, &ir->callee->parameters, - actual_node, &ir->actual_parameters) { - ir_rvalue *param_rval = (ir_rvalue *) actual_node; - ir_variable *sig_param = (ir_variable *) formal_node; - - if (sig_param->data.mode == ir_var_function_in - || sig_param->data.mode == ir_var_const_in) { - ir_rvalue *new_param = param_rval; - - handle_rvalue(&new_param); - if (new_param != param_rval) { - param_rval->replace_with(new_param); - } - } - } - - /* Next, see if the call can be replaced with an assignment of a constant */ - ir_constant *const_val = ir->constant_expression_value(); - - if (const_val != NULL) { - ir_assignment *assignment = - new(ralloc_parent(ir)) ir_assignment(ir->return_deref, const_val); - ir->replace_with(assignment); - } - - return visit_continue_with_parent; -} - -bool -do_constant_folding(exec_list *instructions) -{ - ir_constant_folding_visitor constant_folding; - - visit_list_elements(&constant_folding, instructions); - - return constant_folding.progress; -} diff --git a/src/glsl/opt_constant_propagation.cpp b/src/glsl/opt_constant_propagation.cpp deleted file mode 100644 index 416ba16a3c5..00000000000 --- a/src/glsl/opt_constant_propagation.cpp +++ /dev/null @@ -1,524 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * constant of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, constant, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above constantright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR CONSTANTRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_constant_propagation.cpp - * - * Tracks assignments of constants to channels of variables, and - * usage of those constant channels with direct usage of the constants. - * - * This can lead to constant folding and algebraic optimizations in - * those later expressions, while causing no increase in instruction - * count (due to constants being generally free to load from a - * constant push buffer or as instruction immediate values) and - * possibly reducing register pressure. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "ir_basic_block.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" -#include "util/hash_table.h" - -namespace { - -class acp_entry : public exec_node -{ -public: - acp_entry(ir_variable *var, unsigned write_mask, ir_constant *constant) - { - assert(var); - assert(constant); - this->var = var; - this->write_mask = write_mask; - this->constant = constant; - this->initial_values = write_mask; - } - - acp_entry(const acp_entry *src) - { - this->var = src->var; - this->write_mask = src->write_mask; - this->constant = src->constant; - this->initial_values = src->initial_values; - } - - ir_variable *var; - ir_constant *constant; - unsigned write_mask; - - /** Mask of values initially available in the constant. */ - unsigned initial_values; -}; - - -class kill_entry : public exec_node -{ -public: - kill_entry(ir_variable *var, unsigned write_mask) - { - assert(var); - this->var = var; - this->write_mask = write_mask; - } - - ir_variable *var; - unsigned write_mask; -}; - -class ir_constant_propagation_visitor : public ir_rvalue_visitor { -public: - ir_constant_propagation_visitor() - { - progress = false; - killed_all = false; - mem_ctx = ralloc_context(0); - this->acp = new(mem_ctx) exec_list; - this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - } - ~ir_constant_propagation_visitor() - { - ralloc_free(mem_ctx); - } - - virtual ir_visitor_status visit_enter(class ir_loop *); - virtual ir_visitor_status visit_enter(class ir_function_signature *); - virtual ir_visitor_status visit_enter(class ir_function *); - virtual ir_visitor_status visit_leave(class ir_assignment *); - virtual ir_visitor_status visit_enter(class ir_call *); - virtual ir_visitor_status visit_enter(class ir_if *); - - void add_constant(ir_assignment *ir); - void constant_folding(ir_rvalue **rvalue); - void constant_propagation(ir_rvalue **rvalue); - void kill(ir_variable *ir, unsigned write_mask); - void handle_if_block(exec_list *instructions); - void handle_rvalue(ir_rvalue **rvalue); - - /** List of acp_entry: The available constants to propagate */ - exec_list *acp; - - /** - * List of kill_entry: The masks of variables whose values were - * killed in this block. - */ - hash_table *kills; - - bool progress; - - bool killed_all; - - void *mem_ctx; -}; - - -void -ir_constant_propagation_visitor::constant_folding(ir_rvalue **rvalue) { - - if (*rvalue == NULL || (*rvalue)->ir_type == ir_type_constant) - return; - - /* Note that we visit rvalues one leaving. So if an expression has a - * non-constant operand, no need to go looking down it to find if it's - * constant. This cuts the time of this pass down drastically. - */ - ir_expression *expr = (*rvalue)->as_expression(); - if (expr) { - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - if (!expr->operands[i]->as_constant()) - return; - } - } - - /* Ditto for swizzles. */ - ir_swizzle *swiz = (*rvalue)->as_swizzle(); - if (swiz && !swiz->val->as_constant()) - return; - - ir_constant *constant = (*rvalue)->constant_expression_value(); - if (constant) { - *rvalue = constant; - this->progress = true; - } -} - -void -ir_constant_propagation_visitor::constant_propagation(ir_rvalue **rvalue) { - - if (this->in_assignee || !*rvalue) - return; - - const glsl_type *type = (*rvalue)->type; - if (!type->is_scalar() && !type->is_vector()) - return; - - ir_swizzle *swiz = NULL; - ir_dereference_variable *deref = (*rvalue)->as_dereference_variable(); - if (!deref) { - swiz = (*rvalue)->as_swizzle(); - if (!swiz) - return; - - deref = swiz->val->as_dereference_variable(); - if (!deref) - return; - } - - ir_constant_data data; - memset(&data, 0, sizeof(data)); - - for (unsigned int i = 0; i < type->components(); i++) { - int channel; - acp_entry *found = NULL; - - if (swiz) { - switch (i) { - case 0: channel = swiz->mask.x; break; - case 1: channel = swiz->mask.y; break; - case 2: channel = swiz->mask.z; break; - case 3: channel = swiz->mask.w; break; - default: assert(!"shouldn't be reached"); channel = 0; break; - } - } else { - channel = i; - } - - foreach_in_list(acp_entry, entry, this->acp) { - if (entry->var == deref->var && entry->write_mask & (1 << channel)) { - found = entry; - break; - } - } - - if (!found) - return; - - int rhs_channel = 0; - for (int j = 0; j < 4; j++) { - if (j == channel) - break; - if (found->initial_values & (1 << j)) - rhs_channel++; - } - - switch (type->base_type) { - case GLSL_TYPE_FLOAT: - data.f[i] = found->constant->value.f[rhs_channel]; - break; - case GLSL_TYPE_DOUBLE: - data.d[i] = found->constant->value.d[rhs_channel]; - break; - case GLSL_TYPE_INT: - data.i[i] = found->constant->value.i[rhs_channel]; - break; - case GLSL_TYPE_UINT: - data.u[i] = found->constant->value.u[rhs_channel]; - break; - case GLSL_TYPE_BOOL: - data.b[i] = found->constant->value.b[rhs_channel]; - break; - default: - assert(!"not reached"); - break; - } - } - - *rvalue = new(ralloc_parent(deref)) ir_constant(type, &data); - this->progress = true; -} - -void -ir_constant_propagation_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - constant_propagation(rvalue); - constant_folding(rvalue); -} - -ir_visitor_status -ir_constant_propagation_visitor::visit_enter(ir_function_signature *ir) -{ - /* Treat entry into a function signature as a completely separate - * block. Any instructions at global scope will be shuffled into - * main() at link time, so they're irrelevant to us. - */ - exec_list *orig_acp = this->acp; - hash_table *orig_kills = this->kills; - bool orig_killed_all = this->killed_all; - - this->acp = new(mem_ctx) exec_list; - this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - this->killed_all = false; - - visit_list_elements(this, &ir->body); - - this->kills = orig_kills; - this->acp = orig_acp; - this->killed_all = orig_killed_all; - - return visit_continue_with_parent; -} - -ir_visitor_status -ir_constant_propagation_visitor::visit_leave(ir_assignment *ir) -{ - constant_folding(&ir->rhs); - - if (this->in_assignee) - return visit_continue; - - unsigned kill_mask = ir->write_mask; - if (ir->lhs->as_dereference_array()) { - /* The LHS of the assignment uses an array indexing operator (e.g. v[i] - * = ...;). Since we only try to constant propagate vectors and - * scalars, this means that either (a) array indexing is being used to - * select a vector component, or (b) the variable in question is neither - * a scalar or a vector, so we don't care about it. In the former case, - * we want to kill the whole vector, since in general we can't predict - * which vector component will be selected by array indexing. In the - * latter case, it doesn't matter what we do, so go ahead and kill the - * whole variable anyway. - * - * Note that if the array index is constant (e.g. v[2] = ...;), we could - * in principle be smarter, but we don't need to, because a future - * optimization pass will convert it to a simple assignment with the - * correct mask. - */ - kill_mask = ~0; - } - kill(ir->lhs->variable_referenced(), kill_mask); - - add_constant(ir); - - return visit_continue; -} - -ir_visitor_status -ir_constant_propagation_visitor::visit_enter(ir_function *ir) -{ - (void) ir; - return visit_continue; -} - -ir_visitor_status -ir_constant_propagation_visitor::visit_enter(ir_call *ir) -{ - /* Do constant propagation on call parameters, but skip any out params */ - foreach_two_lists(formal_node, &ir->callee->parameters, - actual_node, &ir->actual_parameters) { - ir_variable *sig_param = (ir_variable *) formal_node; - ir_rvalue *param = (ir_rvalue *) actual_node; - if (sig_param->data.mode != ir_var_function_out - && sig_param->data.mode != ir_var_function_inout) { - ir_rvalue *new_param = param; - handle_rvalue(&new_param); - if (new_param != param) - param->replace_with(new_param); - else - param->accept(this); - } - } - - /* Since we're unlinked, we don't (necssarily) know the side effects of - * this call. So kill all copies. - */ - acp->make_empty(); - this->killed_all = true; - - return visit_continue_with_parent; -} - -void -ir_constant_propagation_visitor::handle_if_block(exec_list *instructions) -{ - exec_list *orig_acp = this->acp; - hash_table *orig_kills = this->kills; - bool orig_killed_all = this->killed_all; - - this->acp = new(mem_ctx) exec_list; - this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - this->killed_all = false; - - /* Populate the initial acp with a constant of the original */ - foreach_in_list(acp_entry, a, orig_acp) { - this->acp->push_tail(new(this->mem_ctx) acp_entry(a)); - } - - visit_list_elements(this, instructions); - - if (this->killed_all) { - orig_acp->make_empty(); - } - - hash_table *new_kills = this->kills; - this->kills = orig_kills; - this->acp = orig_acp; - this->killed_all = this->killed_all || orig_killed_all; - - hash_entry *htk; - hash_table_foreach(new_kills, htk) { - kill_entry *k = (kill_entry *) htk->data; - kill(k->var, k->write_mask); - } -} - -ir_visitor_status -ir_constant_propagation_visitor::visit_enter(ir_if *ir) -{ - ir->condition->accept(this); - handle_rvalue(&ir->condition); - - handle_if_block(&ir->then_instructions); - handle_if_block(&ir->else_instructions); - - /* handle_if_block() already descended into the children. */ - return visit_continue_with_parent; -} - -ir_visitor_status -ir_constant_propagation_visitor::visit_enter(ir_loop *ir) -{ - exec_list *orig_acp = this->acp; - hash_table *orig_kills = this->kills; - bool orig_killed_all = this->killed_all; - - /* FINISHME: For now, the initial acp for loops is totally empty. - * We could go through once, then go through again with the acp - * cloned minus the killed entries after the first run through. - */ - this->acp = new(mem_ctx) exec_list; - this->kills = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer, - _mesa_key_pointer_equal); - this->killed_all = false; - - visit_list_elements(this, &ir->body_instructions); - - if (this->killed_all) { - orig_acp->make_empty(); - } - - hash_table *new_kills = this->kills; - this->kills = orig_kills; - this->acp = orig_acp; - this->killed_all = this->killed_all || orig_killed_all; - - hash_entry *htk; - hash_table_foreach(new_kills, htk) { - kill_entry *k = (kill_entry *) htk->data; - kill(k->var, k->write_mask); - } - - /* already descended into the children. */ - return visit_continue_with_parent; -} - -void -ir_constant_propagation_visitor::kill(ir_variable *var, unsigned write_mask) -{ - assert(var != NULL); - - /* We don't track non-vectors. */ - if (!var->type->is_vector() && !var->type->is_scalar()) - return; - - /* Remove any entries currently in the ACP for this kill. */ - foreach_in_list_safe(acp_entry, entry, this->acp) { - if (entry->var == var) { - entry->write_mask &= ~write_mask; - if (entry->write_mask == 0) - entry->remove(); - } - } - - /* Add this writemask of the variable to the list of killed - * variables in this block. - */ - hash_entry *kill_hash_entry = _mesa_hash_table_search(this->kills, var); - if (kill_hash_entry) { - kill_entry *entry = (kill_entry *) kill_hash_entry->data; - entry->write_mask |= write_mask; - return; - } - /* Not already in the list. Make new entry. */ - _mesa_hash_table_insert(this->kills, var, - new(this->mem_ctx) kill_entry(var, write_mask)); -} - -/** - * Adds an entry to the available constant list if it's a plain assignment - * of a variable to a variable. - */ -void -ir_constant_propagation_visitor::add_constant(ir_assignment *ir) -{ - acp_entry *entry; - - if (ir->condition) - return; - - if (!ir->write_mask) - return; - - ir_dereference_variable *deref = ir->lhs->as_dereference_variable(); - ir_constant *constant = ir->rhs->as_constant(); - - if (!deref || !constant) - return; - - /* Only do constant propagation on vectors. Constant matrices, - * arrays, or structures would require more work elsewhere. - */ - if (!deref->var->type->is_vector() && !deref->var->type->is_scalar()) - return; - - /* We can't do copy propagation on buffer variables, since the underlying - * memory storage is shared across multiple threads we can't be sure that - * the variable value isn't modified between this assignment and the next - * instruction where its value is read. - */ - if (deref->var->data.mode == ir_var_shader_storage || - deref->var->data.mode == ir_var_shader_shared) - return; - - entry = new(this->mem_ctx) acp_entry(deref->var, ir->write_mask, constant); - this->acp->push_tail(entry); -} - -} /* unnamed namespace */ - -/** - * Does a constant propagation pass on the code present in the instruction stream. - */ -bool -do_constant_propagation(exec_list *instructions) -{ - ir_constant_propagation_visitor v; - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/opt_constant_variable.cpp b/src/glsl/opt_constant_variable.cpp deleted file mode 100644 index 3ddb12904c7..00000000000 --- a/src/glsl/opt_constant_variable.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_constant_variable.cpp - * - * Marks variables assigned a single constant value over the course - * of the program as constant. - * - * The goal here is to trigger further constant folding and then dead - * code elimination. This is common with vector/matrix constructors - * and calls to builtin functions. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" -#include "util/hash_table.h" - -namespace { - -struct assignment_entry { - int assignment_count; - ir_variable *var; - ir_constant *constval; - bool our_scope; -}; - -class ir_constant_variable_visitor : public ir_hierarchical_visitor { -public: - virtual ir_visitor_status visit_enter(ir_dereference_variable *); - virtual ir_visitor_status visit(ir_variable *); - virtual ir_visitor_status visit_enter(ir_assignment *); - virtual ir_visitor_status visit_enter(ir_call *); - - struct hash_table *ht; -}; - -} /* unnamed namespace */ - -static struct assignment_entry * -get_assignment_entry(ir_variable *var, struct hash_table *ht) -{ - struct hash_entry *hte = _mesa_hash_table_search(ht, var); - struct assignment_entry *entry; - - if (hte) { - entry = (struct assignment_entry *) hte->data; - } else { - entry = (struct assignment_entry *) calloc(1, sizeof(*entry)); - entry->var = var; - _mesa_hash_table_insert(ht, var, entry); - } - - return entry; -} - -ir_visitor_status -ir_constant_variable_visitor::visit(ir_variable *ir) -{ - struct assignment_entry *entry = get_assignment_entry(ir, this->ht); - entry->our_scope = true; - return visit_continue; -} - -/* Skip derefs of variables so that we can detect declarations. */ -ir_visitor_status -ir_constant_variable_visitor::visit_enter(ir_dereference_variable *ir) -{ - (void)ir; - return visit_continue_with_parent; -} - -ir_visitor_status -ir_constant_variable_visitor::visit_enter(ir_assignment *ir) -{ - ir_constant *constval; - struct assignment_entry *entry; - - entry = get_assignment_entry(ir->lhs->variable_referenced(), this->ht); - assert(entry); - entry->assignment_count++; - - /* If it's already constant, don't do the work. */ - if (entry->var->constant_value) - return visit_continue; - - /* OK, now find if we actually have all the right conditions for - * this to be a constant value assigned to the var. - */ - if (ir->condition) - return visit_continue; - - ir_variable *var = ir->whole_variable_written(); - if (!var) - return visit_continue; - - /* Ignore buffer variables, since the underlying storage is shared - * and we can't be sure that this variable won't be written by another - * thread. - */ - if (var->data.mode == ir_var_shader_storage || - var->data.mode == ir_var_shader_shared) - return visit_continue; - - constval = ir->rhs->constant_expression_value(); - if (!constval) - return visit_continue; - - /* Mark this entry as having a constant assignment (if the - * assignment count doesn't go >1). do_constant_variable will fix - * up the variable with the constant value later. - */ - entry->constval = constval; - - return visit_continue; -} - -ir_visitor_status -ir_constant_variable_visitor::visit_enter(ir_call *ir) -{ - /* Mark any out parameters as assigned to */ - foreach_two_lists(formal_node, &ir->callee->parameters, - actual_node, &ir->actual_parameters) { - ir_rvalue *param_rval = (ir_rvalue *) actual_node; - ir_variable *param = (ir_variable *) formal_node; - - if (param->data.mode == ir_var_function_out || - param->data.mode == ir_var_function_inout) { - ir_variable *var = param_rval->variable_referenced(); - struct assignment_entry *entry; - - assert(var); - entry = get_assignment_entry(var, this->ht); - entry->assignment_count++; - } - } - - /* Mark the return storage as having been assigned to */ - if (ir->return_deref != NULL) { - ir_variable *var = ir->return_deref->variable_referenced(); - struct assignment_entry *entry; - - assert(var); - entry = get_assignment_entry(var, this->ht); - entry->assignment_count++; - } - - return visit_continue; -} - -/** - * Does a copy propagation pass on the code present in the instruction stream. - */ -bool -do_constant_variable(exec_list *instructions) -{ - bool progress = false; - ir_constant_variable_visitor v; - - v.ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, - _mesa_key_pointer_equal); - v.run(instructions); - - struct hash_entry *hte; - hash_table_foreach(v.ht, hte) { - struct assignment_entry *entry = (struct assignment_entry *) hte->data; - - if (entry->assignment_count == 1 && entry->constval && entry->our_scope) { - entry->var->constant_value = entry->constval; - progress = true; - } - hte->data = NULL; - free(entry); - } - _mesa_hash_table_destroy(v.ht, NULL); - - return progress; -} - -bool -do_constant_variable_unlinked(exec_list *instructions) -{ - bool progress = false; - - foreach_in_list(ir_instruction, ir, instructions) { - ir_function *f = ir->as_function(); - if (f) { - foreach_in_list(ir_function_signature, sig, &f->signatures) { - if (do_constant_variable(&sig->body)) - progress = true; - } - } - } - - return progress; -} diff --git a/src/glsl/opt_copy_propagation.cpp b/src/glsl/opt_copy_propagation.cpp deleted file mode 100644 index 310708db868..00000000000 --- a/src/glsl/opt_copy_propagation.cpp +++ /dev/null @@ -1,352 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_copy_propagation.cpp - * - * Moves usage of recently-copied variables to the previous copy of - * the variable. - * - * This should reduce the number of MOV instructions in the generated - * programs unless copy propagation is also done on the LIR, and may - * help anyway by triggering other optimizations that live in the HIR. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_basic_block.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" - -namespace { - -class acp_entry : public exec_node -{ -public: - acp_entry(ir_variable *lhs, ir_variable *rhs) - { - assert(lhs); - assert(rhs); - this->lhs = lhs; - this->rhs = rhs; - } - - ir_variable *lhs; - ir_variable *rhs; -}; - - -class kill_entry : public exec_node -{ -public: - kill_entry(ir_variable *var) - { - assert(var); - this->var = var; - } - - ir_variable *var; -}; - -class ir_copy_propagation_visitor : public ir_hierarchical_visitor { -public: - ir_copy_propagation_visitor() - { - progress = false; - mem_ctx = ralloc_context(0); - this->acp = new(mem_ctx) exec_list; - this->kills = new(mem_ctx) exec_list; - } - ~ir_copy_propagation_visitor() - { - ralloc_free(mem_ctx); - } - - virtual ir_visitor_status visit(class ir_dereference_variable *); - virtual ir_visitor_status visit_enter(class ir_loop *); - virtual ir_visitor_status visit_enter(class ir_function_signature *); - virtual ir_visitor_status visit_enter(class ir_function *); - virtual ir_visitor_status visit_leave(class ir_assignment *); - virtual ir_visitor_status visit_enter(class ir_call *); - virtual ir_visitor_status visit_enter(class ir_if *); - - void add_copy(ir_assignment *ir); - void kill(ir_variable *ir); - void handle_if_block(exec_list *instructions); - - /** List of acp_entry: The available copies to propagate */ - exec_list *acp; - /** - * List of kill_entry: The variables whose values were killed in this - * block. - */ - exec_list *kills; - - bool progress; - - bool killed_all; - - void *mem_ctx; -}; - -} /* unnamed namespace */ - -ir_visitor_status -ir_copy_propagation_visitor::visit_enter(ir_function_signature *ir) -{ - /* Treat entry into a function signature as a completely separate - * block. Any instructions at global scope will be shuffled into - * main() at link time, so they're irrelevant to us. - */ - exec_list *orig_acp = this->acp; - exec_list *orig_kills = this->kills; - bool orig_killed_all = this->killed_all; - - this->acp = new(mem_ctx) exec_list; - this->kills = new(mem_ctx) exec_list; - this->killed_all = false; - - visit_list_elements(this, &ir->body); - - ralloc_free(this->acp); - ralloc_free(this->kills); - - this->kills = orig_kills; - this->acp = orig_acp; - this->killed_all = orig_killed_all; - - return visit_continue_with_parent; -} - -ir_visitor_status -ir_copy_propagation_visitor::visit_leave(ir_assignment *ir) -{ - kill(ir->lhs->variable_referenced()); - - add_copy(ir); - - return visit_continue; -} - -ir_visitor_status -ir_copy_propagation_visitor::visit_enter(ir_function *ir) -{ - (void) ir; - return visit_continue; -} - -/** - * Replaces dereferences of ACP RHS variables with ACP LHS variables. - * - * This is where the actual copy propagation occurs. Note that the - * rewriting of ir_dereference means that the ir_dereference instance - * must not be shared by multiple IR operations! - */ -ir_visitor_status -ir_copy_propagation_visitor::visit(ir_dereference_variable *ir) -{ - if (this->in_assignee) - return visit_continue; - - ir_variable *var = ir->var; - - foreach_in_list(acp_entry, entry, this->acp) { - if (var == entry->lhs) { - ir->var = entry->rhs; - this->progress = true; - break; - } - } - - return visit_continue; -} - - -ir_visitor_status -ir_copy_propagation_visitor::visit_enter(ir_call *ir) -{ - /* Do copy propagation on call parameters, but skip any out params */ - foreach_two_lists(formal_node, &ir->callee->parameters, - actual_node, &ir->actual_parameters) { - ir_variable *sig_param = (ir_variable *) formal_node; - ir_rvalue *ir = (ir_rvalue *) actual_node; - if (sig_param->data.mode != ir_var_function_out - && sig_param->data.mode != ir_var_function_inout) { - ir->accept(this); - } - } - - /* Since we're unlinked, we don't (necessarily) know the side effects of - * this call. So kill all copies. - */ - acp->make_empty(); - this->killed_all = true; - - return visit_continue_with_parent; -} - -void -ir_copy_propagation_visitor::handle_if_block(exec_list *instructions) -{ - exec_list *orig_acp = this->acp; - exec_list *orig_kills = this->kills; - bool orig_killed_all = this->killed_all; - - this->acp = new(mem_ctx) exec_list; - this->kills = new(mem_ctx) exec_list; - this->killed_all = false; - - /* Populate the initial acp with a copy of the original */ - foreach_in_list(acp_entry, a, orig_acp) { - this->acp->push_tail(new(this->acp) acp_entry(a->lhs, a->rhs)); - } - - visit_list_elements(this, instructions); - - if (this->killed_all) { - orig_acp->make_empty(); - } - - exec_list *new_kills = this->kills; - this->kills = orig_kills; - ralloc_free(this->acp); - this->acp = orig_acp; - this->killed_all = this->killed_all || orig_killed_all; - - foreach_in_list(kill_entry, k, new_kills) { - kill(k->var); - } - - ralloc_free(new_kills); -} - -ir_visitor_status -ir_copy_propagation_visitor::visit_enter(ir_if *ir) -{ - ir->condition->accept(this); - - handle_if_block(&ir->then_instructions); - handle_if_block(&ir->else_instructions); - - /* handle_if_block() already descended into the children. */ - return visit_continue_with_parent; -} - -ir_visitor_status -ir_copy_propagation_visitor::visit_enter(ir_loop *ir) -{ - exec_list *orig_acp = this->acp; - exec_list *orig_kills = this->kills; - bool orig_killed_all = this->killed_all; - - /* FINISHME: For now, the initial acp for loops is totally empty. - * We could go through once, then go through again with the acp - * cloned minus the killed entries after the first run through. - */ - this->acp = new(mem_ctx) exec_list; - this->kills = new(mem_ctx) exec_list; - this->killed_all = false; - - visit_list_elements(this, &ir->body_instructions); - - if (this->killed_all) { - orig_acp->make_empty(); - } - - exec_list *new_kills = this->kills; - this->kills = orig_kills; - ralloc_free(this->acp); - this->acp = orig_acp; - this->killed_all = this->killed_all || orig_killed_all; - - foreach_in_list(kill_entry, k, new_kills) { - kill(k->var); - } - - ralloc_free(new_kills); - - /* already descended into the children. */ - return visit_continue_with_parent; -} - -void -ir_copy_propagation_visitor::kill(ir_variable *var) -{ - assert(var != NULL); - - /* Remove any entries currently in the ACP for this kill. */ - foreach_in_list_safe(acp_entry, entry, acp) { - if (entry->lhs == var || entry->rhs == var) { - entry->remove(); - } - } - - /* Add the LHS variable to the list of killed variables in this block. - */ - this->kills->push_tail(new(this->kills) kill_entry(var)); -} - -/** - * Adds an entry to the available copy list if it's a plain assignment - * of a variable to a variable. - */ -void -ir_copy_propagation_visitor::add_copy(ir_assignment *ir) -{ - acp_entry *entry; - - if (ir->condition) - return; - - ir_variable *lhs_var = ir->whole_variable_written(); - ir_variable *rhs_var = ir->rhs->whole_variable_referenced(); - - if ((lhs_var != NULL) && (rhs_var != NULL)) { - if (lhs_var == rhs_var) { - /* This is a dumb assignment, but we've conveniently noticed - * it here. Removing it now would mess up the loop iteration - * calling us. Just flag it to not execute, and someone else - * will clean up the mess. - */ - ir->condition = new(ralloc_parent(ir)) ir_constant(false); - this->progress = true; - } else if (lhs_var->data.mode != ir_var_shader_storage && - lhs_var->data.mode != ir_var_shader_shared) { - entry = new(this->acp) acp_entry(lhs_var, rhs_var); - this->acp->push_tail(entry); - } - } -} - -/** - * Does a copy propagation pass on the code present in the instruction stream. - */ -bool -do_copy_propagation(exec_list *instructions) -{ - ir_copy_propagation_visitor v; - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/opt_copy_propagation_elements.cpp b/src/glsl/opt_copy_propagation_elements.cpp deleted file mode 100644 index a6791801943..00000000000 --- a/src/glsl/opt_copy_propagation_elements.cpp +++ /dev/null @@ -1,509 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_copy_propagation_elements.cpp - * - * Replaces usage of recently-copied components of variables with the - * previous copy of the variable. - * - * This pass can be compared with opt_copy_propagation, which operands - * on arbitrary whole-variable copies. However, in order to handle - * the copy propagation of swizzled variables or writemasked writes, - * we want to track things on a channel-wise basis. I found that - * trying to mix the swizzled/writemasked support here with the - * whole-variable stuff in opt_copy_propagation.cpp just made a mess, - * so this is separate despite the ACP handling being somewhat - * similar. - * - * This should reduce the number of MOV instructions in the generated - * programs unless copy propagation is also done on the LIR, and may - * help anyway by triggering other optimizations that live in the HIR. - */ - -#include "ir.h" -#include "ir_rvalue_visitor.h" -#include "ir_basic_block.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" - -static bool debug = false; - -namespace { - -class acp_entry : public exec_node -{ -public: - acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4]) - { - this->lhs = lhs; - this->rhs = rhs; - this->write_mask = write_mask; - memcpy(this->swizzle, swizzle, sizeof(this->swizzle)); - } - - acp_entry(acp_entry *a) - { - this->lhs = a->lhs; - this->rhs = a->rhs; - this->write_mask = a->write_mask; - memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle)); - } - - ir_variable *lhs; - ir_variable *rhs; - unsigned int write_mask; - int swizzle[4]; -}; - - -class kill_entry : public exec_node -{ -public: - kill_entry(ir_variable *var, int write_mask) - { - this->var = var; - this->write_mask = write_mask; - } - - ir_variable *var; - unsigned int write_mask; -}; - -class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor { -public: - ir_copy_propagation_elements_visitor() - { - this->progress = false; - this->killed_all = false; - this->mem_ctx = ralloc_context(NULL); - this->shader_mem_ctx = NULL; - this->acp = new(mem_ctx) exec_list; - this->kills = new(mem_ctx) exec_list; - } - ~ir_copy_propagation_elements_visitor() - { - ralloc_free(mem_ctx); - } - - virtual ir_visitor_status visit_enter(class ir_loop *); - virtual ir_visitor_status visit_enter(class ir_function_signature *); - virtual ir_visitor_status visit_leave(class ir_assignment *); - virtual ir_visitor_status visit_enter(class ir_call *); - virtual ir_visitor_status visit_enter(class ir_if *); - virtual ir_visitor_status visit_leave(class ir_swizzle *); - - void handle_rvalue(ir_rvalue **rvalue); - - void add_copy(ir_assignment *ir); - void kill(kill_entry *k); - void handle_if_block(exec_list *instructions); - - /** List of acp_entry: The available copies to propagate */ - exec_list *acp; - /** - * List of kill_entry: The variables whose values were killed in this - * block. - */ - exec_list *kills; - - bool progress; - - bool killed_all; - - /* Context for our local data structures. */ - void *mem_ctx; - /* Context for allocating new shader nodes. */ - void *shader_mem_ctx; -}; - -} /* unnamed namespace */ - -ir_visitor_status -ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir) -{ - /* Treat entry into a function signature as a completely separate - * block. Any instructions at global scope will be shuffled into - * main() at link time, so they're irrelevant to us. - */ - exec_list *orig_acp = this->acp; - exec_list *orig_kills = this->kills; - bool orig_killed_all = this->killed_all; - - this->acp = new(mem_ctx) exec_list; - this->kills = new(mem_ctx) exec_list; - this->killed_all = false; - - visit_list_elements(this, &ir->body); - - ralloc_free(this->acp); - ralloc_free(this->kills); - - this->kills = orig_kills; - this->acp = orig_acp; - this->killed_all = orig_killed_all; - - return visit_continue_with_parent; -} - -ir_visitor_status -ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir) -{ - ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); - ir_variable *var = ir->lhs->variable_referenced(); - - if (var->type->is_scalar() || var->type->is_vector()) { - kill_entry *k; - - if (lhs) - k = new(this->kills) kill_entry(var, ir->write_mask); - else - k = new(this->kills) kill_entry(var, ~0); - - kill(k); - } - - add_copy(ir); - - return visit_continue; -} - -ir_visitor_status -ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *) -{ - /* Don't visit the values of swizzles since they are handled while - * visiting the swizzle itself. - */ - return visit_continue; -} - -/** - * Replaces dereferences of ACP RHS variables with ACP LHS variables. - * - * This is where the actual copy propagation occurs. Note that the - * rewriting of ir_dereference means that the ir_dereference instance - * must not be shared by multiple IR operations! - */ -void -ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir) -{ - int swizzle_chan[4]; - ir_dereference_variable *deref_var; - ir_variable *source[4] = {NULL, NULL, NULL, NULL}; - int source_chan[4] = {0, 0, 0, 0}; - int chans; - bool noop_swizzle = true; - - if (!*ir) - return; - - ir_swizzle *swizzle = (*ir)->as_swizzle(); - if (swizzle) { - deref_var = swizzle->val->as_dereference_variable(); - if (!deref_var) - return; - - swizzle_chan[0] = swizzle->mask.x; - swizzle_chan[1] = swizzle->mask.y; - swizzle_chan[2] = swizzle->mask.z; - swizzle_chan[3] = swizzle->mask.w; - chans = swizzle->type->vector_elements; - } else { - deref_var = (*ir)->as_dereference_variable(); - if (!deref_var) - return; - - swizzle_chan[0] = 0; - swizzle_chan[1] = 1; - swizzle_chan[2] = 2; - swizzle_chan[3] = 3; - chans = deref_var->type->vector_elements; - } - - if (this->in_assignee) - return; - - ir_variable *var = deref_var->var; - - /* Try to find ACP entries covering swizzle_chan[], hoping they're - * the same source variable. - */ - foreach_in_list(acp_entry, entry, this->acp) { - if (var == entry->lhs) { - for (int c = 0; c < chans; c++) { - if (entry->write_mask & (1 << swizzle_chan[c])) { - source[c] = entry->rhs; - source_chan[c] = entry->swizzle[swizzle_chan[c]]; - - if (source_chan[c] != swizzle_chan[c]) - noop_swizzle = false; - } - } - } - } - - /* Make sure all channels are copying from the same source variable. */ - if (!source[0]) - return; - for (int c = 1; c < chans; c++) { - if (source[c] != source[0]) - return; - } - - if (!shader_mem_ctx) - shader_mem_ctx = ralloc_parent(deref_var); - - /* Don't pointlessly replace the rvalue with itself (or a noop swizzle - * of itself, which would just be deleted by opt_noop_swizzle). - */ - if (source[0] == var && noop_swizzle) - return; - - if (debug) { - printf("Copy propagation from:\n"); - (*ir)->print(); - } - - deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]); - *ir = new(shader_mem_ctx) ir_swizzle(deref_var, - source_chan[0], - source_chan[1], - source_chan[2], - source_chan[3], - chans); - progress = true; - - if (debug) { - printf("to:\n"); - (*ir)->print(); - printf("\n"); - } -} - - -ir_visitor_status -ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir) -{ - /* Do copy propagation on call parameters, but skip any out params */ - foreach_two_lists(formal_node, &ir->callee->parameters, - actual_node, &ir->actual_parameters) { - ir_variable *sig_param = (ir_variable *) formal_node; - ir_rvalue *ir = (ir_rvalue *) actual_node; - if (sig_param->data.mode != ir_var_function_out - && sig_param->data.mode != ir_var_function_inout) { - ir->accept(this); - } - } - - /* Since we're unlinked, we don't (necessarily) know the side effects of - * this call. So kill all copies. - */ - acp->make_empty(); - this->killed_all = true; - - return visit_continue_with_parent; -} - -void -ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions) -{ - exec_list *orig_acp = this->acp; - exec_list *orig_kills = this->kills; - bool orig_killed_all = this->killed_all; - - this->acp = new(mem_ctx) exec_list; - this->kills = new(mem_ctx) exec_list; - this->killed_all = false; - - /* Populate the initial acp with a copy of the original */ - foreach_in_list(acp_entry, a, orig_acp) { - this->acp->push_tail(new(this->acp) acp_entry(a)); - } - - visit_list_elements(this, instructions); - - if (this->killed_all) { - orig_acp->make_empty(); - } - - exec_list *new_kills = this->kills; - this->kills = orig_kills; - ralloc_free(this->acp); - this->acp = orig_acp; - this->killed_all = this->killed_all || orig_killed_all; - - /* Move the new kills into the parent block's list, removing them - * from the parent's ACP list in the process. - */ - foreach_in_list_safe(kill_entry, k, new_kills) { - kill(k); - } - - ralloc_free(new_kills); -} - -ir_visitor_status -ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir) -{ - ir->condition->accept(this); - - handle_if_block(&ir->then_instructions); - handle_if_block(&ir->else_instructions); - - /* handle_if_block() already descended into the children. */ - return visit_continue_with_parent; -} - -ir_visitor_status -ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir) -{ - exec_list *orig_acp = this->acp; - exec_list *orig_kills = this->kills; - bool orig_killed_all = this->killed_all; - - /* FINISHME: For now, the initial acp for loops is totally empty. - * We could go through once, then go through again with the acp - * cloned minus the killed entries after the first run through. - */ - this->acp = new(mem_ctx) exec_list; - this->kills = new(mem_ctx) exec_list; - this->killed_all = false; - - visit_list_elements(this, &ir->body_instructions); - - if (this->killed_all) { - orig_acp->make_empty(); - } - - exec_list *new_kills = this->kills; - this->kills = orig_kills; - ralloc_free(this->acp); - this->acp = orig_acp; - this->killed_all = this->killed_all || orig_killed_all; - - foreach_in_list_safe(kill_entry, k, new_kills) { - kill(k); - } - - ralloc_free(new_kills); - - /* already descended into the children. */ - return visit_continue_with_parent; -} - -/* Remove any entries currently in the ACP for this kill. */ -void -ir_copy_propagation_elements_visitor::kill(kill_entry *k) -{ - foreach_in_list_safe(acp_entry, entry, acp) { - if (entry->lhs == k->var) { - entry->write_mask = entry->write_mask & ~k->write_mask; - if (entry->write_mask == 0) { - entry->remove(); - continue; - } - } - if (entry->rhs == k->var) { - entry->remove(); - } - } - - /* If we were on a list, remove ourselves before inserting */ - if (k->next) - k->remove(); - - ralloc_steal(this->kills, k); - this->kills->push_tail(k); -} - -/** - * Adds directly-copied channels between vector variables to the available - * copy propagation list. - */ -void -ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir) -{ - acp_entry *entry; - int orig_swizzle[4] = {0, 1, 2, 3}; - int swizzle[4]; - - if (ir->condition) - return; - - ir_dereference_variable *lhs = ir->lhs->as_dereference_variable(); - if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector())) - return; - - ir_dereference_variable *rhs = ir->rhs->as_dereference_variable(); - if (!rhs) { - ir_swizzle *swiz = ir->rhs->as_swizzle(); - if (!swiz) - return; - - rhs = swiz->val->as_dereference_variable(); - if (!rhs) - return; - - orig_swizzle[0] = swiz->mask.x; - orig_swizzle[1] = swiz->mask.y; - orig_swizzle[2] = swiz->mask.z; - orig_swizzle[3] = swiz->mask.w; - } - - /* Move the swizzle channels out to the positions they match in the - * destination. We don't want to have to rewrite the swizzle[] - * array every time we clear a bit of the write_mask. - */ - int j = 0; - for (int i = 0; i < 4; i++) { - if (ir->write_mask & (1 << i)) - swizzle[i] = orig_swizzle[j++]; - } - - int write_mask = ir->write_mask; - if (lhs->var == rhs->var) { - /* If this is a copy from the variable to itself, then we need - * to be sure not to include the updated channels from this - * instruction in the set of new source channels to be - * copy-propagated from. - */ - for (int i = 0; i < 4; i++) { - if (ir->write_mask & (1 << orig_swizzle[i])) - write_mask &= ~(1 << i); - } - } - - entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask, - swizzle); - this->acp->push_tail(entry); -} - -bool -do_copy_propagation_elements(exec_list *instructions) -{ - ir_copy_propagation_elements_visitor v; - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/opt_dead_builtin_variables.cpp b/src/glsl/opt_dead_builtin_variables.cpp deleted file mode 100644 index 03e578982b9..00000000000 --- a/src/glsl/opt_dead_builtin_variables.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_optimization.h" - -/** - * Pre-linking, optimize unused built-in variables - * - * Uniforms, constants, system values, inputs (vertex shader only), and - * outputs (fragment shader only) that are not used can be removed. - */ -void -optimize_dead_builtin_variables(exec_list *instructions, - enum ir_variable_mode other) -{ - foreach_in_list_safe(ir_variable, var, instructions) { - if (var->ir_type != ir_type_variable || var->data.used) - continue; - - if (var->data.mode != ir_var_uniform - && var->data.mode != ir_var_auto - && var->data.mode != ir_var_system_value - && var->data.mode != other) - continue; - - /* So that linker rules can later be enforced, we cannot elimate - * variables that were redeclared in the shader code. - */ - if ((var->data.mode == other || var->data.mode == ir_var_system_value) - && var->data.how_declared != ir_var_declared_implicitly) - continue; - - if (!is_gl_identifier(var->name)) - continue; - - /* gl_ModelViewProjectionMatrix and gl_Vertex are special because they - * are used by ftransform. No other built-in variable is used by a - * built-in function. The forward declarations of these variables in - * the built-in function shader does not have the "state slot" - * information, so removing these variables from the user shader will - * cause problems later. - * - * For compute shaders, gl_GlobalInvocationID has some dependencies, so - * we avoid removing these dependencies. - * - * We also avoid removing gl_GlobalInvocationID at this stage because it - * might be used by a linked shader. In this case it still needs to be - * initialized by the main function. - * - * gl_GlobalInvocationID = - * gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID - * - * Similarly, we initialize gl_LocalInvocationIndex in the main function: - * - * gl_LocalInvocationIndex = - * gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y + - * gl_LocalInvocationID.y * gl_WorkGroupSize.x + - * gl_LocalInvocationID.x; - * - * Matrix uniforms with "Transpose" are not eliminated because there's - * an optimization pass that can turn references to the regular matrix - * into references to the transpose matrix. Eliminating the transpose - * matrix would cause that pass to generate references to undeclareds - * variables (thank you, ir_validate). - * - * It doesn't seem worth the effort to track when the transpose could be - * eliminated (i.e., when the non-transpose was eliminated). - */ - if (strcmp(var->name, "gl_ModelViewProjectionMatrix") == 0 - || strcmp(var->name, "gl_Vertex") == 0 - || strcmp(var->name, "gl_WorkGroupID") == 0 - || strcmp(var->name, "gl_WorkGroupSize") == 0 - || strcmp(var->name, "gl_LocalInvocationID") == 0 - || strcmp(var->name, "gl_GlobalInvocationID") == 0 - || strcmp(var->name, "gl_LocalInvocationIndex") == 0 - || strstr(var->name, "Transpose") != NULL) - continue; - - var->remove(); - } -} diff --git a/src/glsl/opt_dead_builtin_varyings.cpp b/src/glsl/opt_dead_builtin_varyings.cpp deleted file mode 100644 index 37bcbccf0c5..00000000000 --- a/src/glsl/opt_dead_builtin_varyings.cpp +++ /dev/null @@ -1,606 +0,0 @@ -/* - * Copyright © 2013 Marek Olšák - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_dead_builtin_varyings.cpp - * - * This eliminates the built-in shader outputs which are either not written - * at all or not used by the next stage. It also eliminates unused elements - * of gl_TexCoord inputs, which reduces the overall varying usage. - * The varyings handled here are the primary and secondary color, the fog, - * and the texture coordinates (gl_TexCoord). - * - * This pass is necessary, because the Mesa GLSL linker cannot eliminate - * built-in varyings like it eliminates user-defined varyings, because - * the built-in varyings have pre-assigned locations. Also, the elimination - * of unused gl_TexCoord elements requires its own lowering pass anyway. - * - * It's implemented by replacing all occurrences of dead varyings with - * temporary variables, which creates dead code. It is recommended to run - * a dead-code elimination pass after this. - * - * If any texture coordinate slots can be eliminated, the gl_TexCoord array is - * broken down into separate vec4 variables with locations equal to - * VARYING_SLOT_TEX0 + i. - * - * The same is done for the gl_FragData fragment shader output. - */ - -#include "main/core.h" /* for snprintf and ARRAY_SIZE */ -#include "ir.h" -#include "ir_rvalue_visitor.h" -#include "ir_optimization.h" -#include "ir_print_visitor.h" -#include "compiler/glsl_types.h" -#include "link_varyings.h" - -namespace { - -/** - * This obtains detailed information about built-in varyings from shader code. - */ -class varying_info_visitor : public ir_hierarchical_visitor { -public: - /* "mode" can be either ir_var_shader_in or ir_var_shader_out */ - varying_info_visitor(ir_variable_mode mode, bool find_frag_outputs = false) - : lower_texcoord_array(true), - texcoord_array(NULL), - texcoord_usage(0), - find_frag_outputs(find_frag_outputs), - lower_fragdata_array(true), - fragdata_array(NULL), - fragdata_usage(0), - color_usage(0), - tfeedback_color_usage(0), - fog(NULL), - has_fog(false), - tfeedback_has_fog(false), - mode(mode) - { - memset(color, 0, sizeof(color)); - memset(backcolor, 0, sizeof(backcolor)); - } - - virtual ir_visitor_status visit_enter(ir_dereference_array *ir) - { - ir_variable *var = ir->variable_referenced(); - - if (!var || var->data.mode != this->mode || !var->type->is_array()) - return visit_continue; - - if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) { - this->fragdata_array = var; - - ir_constant *index = ir->array_index->as_constant(); - if (index == NULL) { - /* This is variable indexing. */ - this->fragdata_usage |= (1 << var->type->array_size()) - 1; - this->lower_fragdata_array = false; - } - else { - this->fragdata_usage |= 1 << index->get_uint_component(0); - /* Don't lower fragdata array if the output variable - * is not a float variable (or float vector) because it will - * generate wrong register assignments because of different - * data types. - */ - if (var->type->gl_type != GL_FLOAT && - var->type->gl_type != GL_FLOAT_VEC2 && - var->type->gl_type != GL_FLOAT_VEC3 && - var->type->gl_type != GL_FLOAT_VEC4) - this->lower_fragdata_array = false; - } - - /* Don't visit the leaves of ir_dereference_array. */ - return visit_continue_with_parent; - } - - if (!this->find_frag_outputs && var->data.location == VARYING_SLOT_TEX0) { - this->texcoord_array = var; - - ir_constant *index = ir->array_index->as_constant(); - if (index == NULL) { - /* There is variable indexing, we can't lower the texcoord array. - */ - this->texcoord_usage |= (1 << var->type->array_size()) - 1; - this->lower_texcoord_array = false; - } - else { - this->texcoord_usage |= 1 << index->get_uint_component(0); - } - - /* Don't visit the leaves of ir_dereference_array. */ - return visit_continue_with_parent; - } - - return visit_continue; - } - - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - ir_variable *var = ir->variable_referenced(); - - if (var->data.mode != this->mode || !var->type->is_array()) - return visit_continue; - - if (this->find_frag_outputs && var->data.location == FRAG_RESULT_DATA0) { - /* This is a whole array dereference. */ - this->fragdata_usage |= (1 << var->type->array_size()) - 1; - this->lower_fragdata_array = false; - return visit_continue; - } - - if (!this->find_frag_outputs && var->data.location == VARYING_SLOT_TEX0) { - /* This is a whole array dereference like "gl_TexCoord = x;", - * there's probably no point in lowering that. - */ - this->texcoord_usage |= (1 << var->type->array_size()) - 1; - this->lower_texcoord_array = false; - } - return visit_continue; - } - - virtual ir_visitor_status visit(ir_variable *var) - { - if (var->data.mode != this->mode) - return visit_continue; - - /* Nothing to do here for fragment outputs. */ - if (this->find_frag_outputs) - return visit_continue; - - /* Handle colors and fog. */ - switch (var->data.location) { - case VARYING_SLOT_COL0: - this->color[0] = var; - this->color_usage |= 1; - break; - case VARYING_SLOT_COL1: - this->color[1] = var; - this->color_usage |= 2; - break; - case VARYING_SLOT_BFC0: - this->backcolor[0] = var; - this->color_usage |= 1; - break; - case VARYING_SLOT_BFC1: - this->backcolor[1] = var; - this->color_usage |= 2; - break; - case VARYING_SLOT_FOGC: - this->fog = var; - this->has_fog = true; - break; - } - - return visit_continue; - } - - void get(exec_list *ir, - unsigned num_tfeedback_decls, - tfeedback_decl *tfeedback_decls) - { - /* Handle the transform feedback varyings. */ - for (unsigned i = 0; i < num_tfeedback_decls; i++) { - if (!tfeedback_decls[i].is_varying()) - continue; - - unsigned location = tfeedback_decls[i].get_location(); - - switch (location) { - case VARYING_SLOT_COL0: - case VARYING_SLOT_BFC0: - this->tfeedback_color_usage |= 1; - break; - case VARYING_SLOT_COL1: - case VARYING_SLOT_BFC1: - this->tfeedback_color_usage |= 2; - break; - case VARYING_SLOT_FOGC: - this->tfeedback_has_fog = true; - break; - default: - if (location >= VARYING_SLOT_TEX0 && - location <= VARYING_SLOT_TEX7) { - this->lower_texcoord_array = false; - } - } - } - - /* Process the shader. */ - visit_list_elements(this, ir); - - if (!this->texcoord_array) { - this->lower_texcoord_array = false; - } - if (!this->fragdata_array) { - this->lower_fragdata_array = false; - } - } - - bool lower_texcoord_array; - ir_variable *texcoord_array; - unsigned texcoord_usage; /* bitmask */ - - bool find_frag_outputs; /* false if it's looking for varyings */ - bool lower_fragdata_array; - ir_variable *fragdata_array; - unsigned fragdata_usage; /* bitmask */ - - ir_variable *color[2]; - ir_variable *backcolor[2]; - unsigned color_usage; /* bitmask */ - unsigned tfeedback_color_usage; /* bitmask */ - - ir_variable *fog; - bool has_fog; - bool tfeedback_has_fog; - - ir_variable_mode mode; -}; - - -/** - * This replaces unused varyings with temporary variables. - * - * If "ir" is the producer, the "external" usage should come from - * the consumer. It also works the other way around. If either one is - * missing, set the "external" usage to a full mask. - */ -class replace_varyings_visitor : public ir_rvalue_visitor { -public: - replace_varyings_visitor(struct gl_shader *sha, - const varying_info_visitor *info, - unsigned external_texcoord_usage, - unsigned external_color_usage, - bool external_has_fog) - : shader(sha), info(info), new_fog(NULL) - { - void *const ctx = shader->ir; - - memset(this->new_fragdata, 0, sizeof(this->new_fragdata)); - memset(this->new_texcoord, 0, sizeof(this->new_texcoord)); - memset(this->new_color, 0, sizeof(this->new_color)); - memset(this->new_backcolor, 0, sizeof(this->new_backcolor)); - - const char *mode_str = - info->mode == ir_var_shader_in ? "in" : "out"; - - /* Handle texcoord outputs. - * - * We're going to break down the gl_TexCoord array into separate - * variables. First, add declarations of the new variables all - * occurrences of gl_TexCoord will be replaced with. - */ - if (info->lower_texcoord_array) { - prepare_array(shader->ir, this->new_texcoord, - ARRAY_SIZE(this->new_texcoord), - VARYING_SLOT_TEX0, "TexCoord", mode_str, - info->texcoord_usage, external_texcoord_usage); - } - - /* Handle gl_FragData in the same way like gl_TexCoord. */ - if (info->lower_fragdata_array) { - prepare_array(shader->ir, this->new_fragdata, - ARRAY_SIZE(this->new_fragdata), - FRAG_RESULT_DATA0, "FragData", mode_str, - info->fragdata_usage, (1 << MAX_DRAW_BUFFERS) - 1); - } - - /* Create dummy variables which will replace set-but-unused color and - * fog outputs. - */ - external_color_usage |= info->tfeedback_color_usage; - - for (int i = 0; i < 2; i++) { - char name[32]; - - if (!(external_color_usage & (1 << i))) { - if (info->color[i]) { - snprintf(name, 32, "gl_%s_FrontColor%i_dummy", mode_str, i); - this->new_color[i] = - new (ctx) ir_variable(glsl_type::vec4_type, name, - ir_var_temporary); - } - - if (info->backcolor[i]) { - snprintf(name, 32, "gl_%s_BackColor%i_dummy", mode_str, i); - this->new_backcolor[i] = - new (ctx) ir_variable(glsl_type::vec4_type, name, - ir_var_temporary); - } - } - } - - if (!external_has_fog && !info->tfeedback_has_fog && - info->fog) { - char name[32]; - - snprintf(name, 32, "gl_%s_FogFragCoord_dummy", mode_str); - this->new_fog = new (ctx) ir_variable(glsl_type::float_type, name, - ir_var_temporary); - } - - /* Now do the replacing. */ - visit_list_elements(this, shader->ir); - } - - void prepare_array(exec_list *ir, - ir_variable **new_var, - int max_elements, unsigned start_location, - const char *var_name, const char *mode_str, - unsigned usage, unsigned external_usage) - { - void *const ctx = ir; - - for (int i = max_elements-1; i >= 0; i--) { - if (usage & (1 << i)) { - char name[32]; - - if (!(external_usage & (1 << i))) { - /* This varying is unused in the next stage. Declare - * a temporary instead of an output. */ - snprintf(name, 32, "gl_%s_%s%i_dummy", mode_str, var_name, i); - new_var[i] = - new (ctx) ir_variable(glsl_type::vec4_type, name, - ir_var_temporary); - } - else { - snprintf(name, 32, "gl_%s_%s%i", mode_str, var_name, i); - new_var[i] = - new(ctx) ir_variable(glsl_type::vec4_type, name, - this->info->mode); - new_var[i]->data.location = start_location + i; - new_var[i]->data.explicit_location = true; - new_var[i]->data.explicit_index = 0; - } - - ir->head->insert_before(new_var[i]); - } - } - } - - virtual ir_visitor_status visit(ir_variable *var) - { - /* Remove the gl_TexCoord array. */ - if (this->info->lower_texcoord_array && - var == this->info->texcoord_array) { - var->remove(); - } - - /* Remove the gl_FragData array. */ - if (this->info->lower_fragdata_array && - var == this->info->fragdata_array) { - - /* Clone variable for program resource list before it is removed. */ - if (!shader->fragdata_arrays) - shader->fragdata_arrays = new (shader) exec_list; - - shader->fragdata_arrays->push_tail(var->clone(shader, NULL)); - - var->remove(); - } - - /* Replace set-but-unused color and fog outputs with dummy variables. */ - for (int i = 0; i < 2; i++) { - if (var == this->info->color[i] && this->new_color[i]) { - var->replace_with(this->new_color[i]); - } - if (var == this->info->backcolor[i] && - this->new_backcolor[i]) { - var->replace_with(this->new_backcolor[i]); - } - } - - if (var == this->info->fog && this->new_fog) { - var->replace_with(this->new_fog); - } - - return visit_continue; - } - - virtual void handle_rvalue(ir_rvalue **rvalue) - { - if (!*rvalue) - return; - - void *ctx = ralloc_parent(*rvalue); - - /* Replace an array dereference gl_TexCoord[i] with a single - * variable dereference representing gl_TexCoord[i]. - */ - if (this->info->lower_texcoord_array) { - /* gl_TexCoord[i] occurrence */ - ir_dereference_array *const da = (*rvalue)->as_dereference_array(); - - if (da && da->variable_referenced() == - this->info->texcoord_array) { - unsigned i = da->array_index->as_constant()->get_uint_component(0); - - *rvalue = new(ctx) ir_dereference_variable(this->new_texcoord[i]); - return; - } - } - - /* Same for gl_FragData. */ - if (this->info->lower_fragdata_array) { - /* gl_FragData[i] occurrence */ - ir_dereference_array *const da = (*rvalue)->as_dereference_array(); - - if (da && da->variable_referenced() == this->info->fragdata_array) { - unsigned i = da->array_index->as_constant()->get_uint_component(0); - - *rvalue = new(ctx) ir_dereference_variable(this->new_fragdata[i]); - return; - } - } - - /* Replace set-but-unused color and fog outputs with dummy variables. */ - ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); - if (!dv) - return; - - ir_variable *var = dv->variable_referenced(); - - for (int i = 0; i < 2; i++) { - if (var == this->info->color[i] && this->new_color[i]) { - *rvalue = new(ctx) ir_dereference_variable(this->new_color[i]); - return; - } - if (var == this->info->backcolor[i] && - this->new_backcolor[i]) { - *rvalue = new(ctx) ir_dereference_variable(this->new_backcolor[i]); - return; - } - } - - if (var == this->info->fog && this->new_fog) { - *rvalue = new(ctx) ir_dereference_variable(this->new_fog); - } - } - - virtual ir_visitor_status visit_leave(ir_assignment *ir) - { - handle_rvalue(&ir->rhs); - handle_rvalue(&ir->condition); - - /* We have to use set_lhs when changing the LHS of an assignment. */ - ir_rvalue *lhs = ir->lhs; - - handle_rvalue(&lhs); - if (lhs != ir->lhs) { - ir->set_lhs(lhs); - } - - return visit_continue; - } - -private: - struct gl_shader *shader; - const varying_info_visitor *info; - ir_variable *new_fragdata[MAX_DRAW_BUFFERS]; - ir_variable *new_texcoord[MAX_TEXTURE_COORD_UNITS]; - ir_variable *new_color[2]; - ir_variable *new_backcolor[2]; - ir_variable *new_fog; -}; - -} /* anonymous namespace */ - -static void -lower_texcoord_array(struct gl_shader *shader, const varying_info_visitor *info) -{ - replace_varyings_visitor(shader, info, - (1 << MAX_TEXTURE_COORD_UNITS) - 1, - 1 | 2, true); -} - -static void -lower_fragdata_array(struct gl_shader *shader) -{ - varying_info_visitor info(ir_var_shader_out, true); - info.get(shader->ir, 0, NULL); - - replace_varyings_visitor(shader, &info, 0, 0, 0); -} - - -void -do_dead_builtin_varyings(struct gl_context *ctx, - gl_shader *producer, gl_shader *consumer, - unsigned num_tfeedback_decls, - tfeedback_decl *tfeedback_decls) -{ - /* Lower the gl_FragData array to separate variables. */ - if (consumer && consumer->Stage == MESA_SHADER_FRAGMENT) { - lower_fragdata_array(consumer); - } - - /* Lowering of built-in varyings has no effect with the core context and - * GLES2, because they are not available there. - */ - if (ctx->API == API_OPENGL_CORE || - ctx->API == API_OPENGLES2) { - return; - } - - /* Information about built-in varyings. */ - varying_info_visitor producer_info(ir_var_shader_out); - varying_info_visitor consumer_info(ir_var_shader_in); - - if (producer) { - producer_info.get(producer->ir, num_tfeedback_decls, tfeedback_decls); - - if (!consumer) { - /* At least eliminate unused gl_TexCoord elements. */ - if (producer_info.lower_texcoord_array) { - lower_texcoord_array(producer, &producer_info); - } - return; - } - } - - if (consumer) { - consumer_info.get(consumer->ir, 0, NULL); - - if (!producer) { - /* At least eliminate unused gl_TexCoord elements. */ - if (consumer_info.lower_texcoord_array) { - lower_texcoord_array(consumer, &consumer_info); - } - return; - } - } - - /* Eliminate the outputs unused by the consumer. */ - if (producer_info.lower_texcoord_array || - producer_info.color_usage || - producer_info.has_fog) { - replace_varyings_visitor(producer, - &producer_info, - consumer_info.texcoord_usage, - consumer_info.color_usage, - consumer_info.has_fog); - } - - /* The gl_TexCoord fragment shader inputs can be initialized - * by GL_COORD_REPLACE, so we can't eliminate them. - * - * This doesn't prevent elimination of the gl_TexCoord elements which - * are not read by the fragment shader. We want to eliminate those anyway. - */ - if (consumer->Stage == MESA_SHADER_FRAGMENT) { - producer_info.texcoord_usage = (1 << MAX_TEXTURE_COORD_UNITS) - 1; - } - - /* Eliminate the inputs uninitialized by the producer. */ - if (consumer_info.lower_texcoord_array || - consumer_info.color_usage || - consumer_info.has_fog) { - replace_varyings_visitor(consumer, - &consumer_info, - producer_info.texcoord_usage, - producer_info.color_usage, - producer_info.has_fog); - } -} diff --git a/src/glsl/opt_dead_code.cpp b/src/glsl/opt_dead_code.cpp deleted file mode 100644 index dbdb7de8bb8..00000000000 --- a/src/glsl/opt_dead_code.cpp +++ /dev/null @@ -1,197 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_dead_code.cpp - * - * Eliminates dead assignments and variable declarations from the code. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_variable_refcount.h" -#include "compiler/glsl_types.h" -#include "util/hash_table.h" - -static bool debug = false; - -/** - * Do a dead code pass over instructions and everything that instructions - * references. - * - * Note that this will remove assignments to globals, so it is not suitable - * for usage on an unlinked instruction stream. - */ -bool -do_dead_code(exec_list *instructions, bool uniform_locations_assigned) -{ - ir_variable_refcount_visitor v; - bool progress = false; - - v.run(instructions); - - struct hash_entry *e; - hash_table_foreach(v.ht, e) { - ir_variable_refcount_entry *entry = (ir_variable_refcount_entry *)e->data; - - /* Since each assignment is a reference, the refereneced count must be - * greater than or equal to the assignment count. If they are equal, - * then all of the references are assignments, and the variable is - * dead. - * - * Note that if the variable is neither assigned nor referenced, both - * counts will be zero and will be caught by the equality test. - */ - assert(entry->referenced_count >= entry->assigned_count); - - if (debug) { - printf("%s@%p: %d refs, %d assigns, %sdeclared in our scope\n", - entry->var->name, (void *) entry->var, - entry->referenced_count, entry->assigned_count, - entry->declaration ? "" : "not "); - } - - if ((entry->referenced_count > entry->assigned_count) - || !entry->declaration) - continue; - - /* Section 7.4.1 (Shader Interface Matching) of the OpenGL 4.5 - * (Core Profile) spec says: - * - * "With separable program objects, interfaces between shader - * stages may involve the outputs from one program object and the - * inputs from a second program object. For such interfaces, it is - * not possible to detect mismatches at link time, because the - * programs are linked separately. When each such program is - * linked, all inputs or outputs interfacing with another program - * stage are treated as active." - */ - if (entry->var->data.always_active_io) - continue; - - if (!entry->assign_list.is_empty()) { - /* Remove all the dead assignments to the variable we found. - * Don't do so if it's a shader or function output, though. - */ - if (entry->var->data.mode != ir_var_function_out && - entry->var->data.mode != ir_var_function_inout && - entry->var->data.mode != ir_var_shader_out && - entry->var->data.mode != ir_var_shader_storage) { - - while (!entry->assign_list.is_empty()) { - struct assignment_entry *assignment_entry = - exec_node_data(struct assignment_entry, - entry->assign_list.head, link); - - assignment_entry->assign->remove(); - - if (debug) { - printf("Removed assignment to %s@%p\n", - entry->var->name, (void *) entry->var); - } - - assignment_entry->link.remove(); - free(assignment_entry); - } - progress = true; - } - } - - if (entry->assign_list.is_empty()) { - /* If there are no assignments or references to the variable left, - * then we can remove its declaration. - */ - - /* uniform initializers are precious, and could get used by another - * stage. Also, once uniform locations have been assigned, the - * declaration cannot be deleted. - */ - if (entry->var->data.mode == ir_var_uniform || - entry->var->data.mode == ir_var_shader_storage) { - if (uniform_locations_assigned || entry->var->constant_initializer) - continue; - - /* Section 2.11.6 (Uniform Variables) of the OpenGL ES 3.0.3 spec - * says: - * - * "All members of a named uniform block declared with a - * shared or std140 layout qualifier are considered active, - * even if they are not referenced in any shader in the - * program. The uniform block itself is also considered - * active, even if no member of the block is referenced." - * - * If the variable is in a uniform block with one of those - * layouts, do not eliminate it. - */ - if (entry->var->is_in_buffer_block()) { - if (entry->var->get_interface_type()->interface_packing != - GLSL_INTERFACE_PACKING_PACKED) - continue; - } - - if (entry->var->type->is_subroutine()) - continue; - } - - entry->var->remove(); - progress = true; - - if (debug) { - printf("Removed declaration of %s@%p\n", - entry->var->name, (void *) entry->var); - } - } - } - - return progress; -} - -/** - * Does a dead code pass on the functions present in the instruction stream. - * - * This is suitable for use while the program is not linked, as it will - * ignore variable declarations (and the assignments to them) for variables - * with global scope. - */ -bool -do_dead_code_unlinked(exec_list *instructions) -{ - bool progress = false; - - foreach_in_list(ir_instruction, ir, instructions) { - ir_function *f = ir->as_function(); - if (f) { - foreach_in_list(ir_function_signature, sig, &f->signatures) { - /* The setting of the uniform_locations_assigned flag here is - * irrelevent. If there is a uniform declaration encountered - * inside the body of the function, something has already gone - * terribly, terribly wrong. - */ - if (do_dead_code(&sig->body, false)) - progress = true; - } - } - } - - return progress; -} diff --git a/src/glsl/opt_dead_code_local.cpp b/src/glsl/opt_dead_code_local.cpp deleted file mode 100644 index d38fd2bf638..00000000000 --- a/src/glsl/opt_dead_code_local.cpp +++ /dev/null @@ -1,336 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_dead_code_local.cpp - * - * Eliminates local dead assignments from the code. - * - * This operates on basic blocks, tracking assignments and finding if - * they're used before the variable is completely reassigned. - * - * Compare this to ir_dead_code.cpp, which operates globally looking - * for assignments to variables that are never read. - */ - -#include "ir.h" -#include "ir_basic_block.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" - -static bool debug = false; - -namespace { - -class assignment_entry : public exec_node -{ -public: - assignment_entry(ir_variable *lhs, ir_assignment *ir) - { - assert(lhs); - assert(ir); - this->lhs = lhs; - this->ir = ir; - this->unused = ir->write_mask; - } - - ir_variable *lhs; - ir_assignment *ir; - - /* bitmask of xyzw channels written that haven't been used so far. */ - int unused; -}; - -class kill_for_derefs_visitor : public ir_hierarchical_visitor { -public: - kill_for_derefs_visitor(exec_list *assignments) - { - this->assignments = assignments; - } - - void use_channels(ir_variable *const var, int used) - { - foreach_in_list_safe(assignment_entry, entry, this->assignments) { - if (entry->lhs == var) { - if (var->type->is_scalar() || var->type->is_vector()) { - if (debug) - printf("used %s (0x%01x - 0x%01x)\n", entry->lhs->name, - entry->unused, used & 0xf); - entry->unused &= ~used; - if (!entry->unused) - entry->remove(); - } else { - if (debug) - printf("used %s\n", entry->lhs->name); - entry->remove(); - } - } - } - } - - virtual ir_visitor_status visit(ir_dereference_variable *ir) - { - use_channels(ir->var, ~0); - - return visit_continue; - } - - virtual ir_visitor_status visit(ir_swizzle *ir) - { - ir_dereference_variable *deref = ir->val->as_dereference_variable(); - if (!deref) - return visit_continue; - - int used = 0; - used |= 1 << ir->mask.x; - used |= 1 << ir->mask.y; - used |= 1 << ir->mask.z; - used |= 1 << ir->mask.w; - - use_channels(deref->var, used); - - return visit_continue_with_parent; - } - - virtual ir_visitor_status visit_leave(ir_emit_vertex *) - { - /* For the purpose of dead code elimination, emitting a vertex counts as - * "reading" all of the currently assigned output variables. - */ - foreach_in_list_safe(assignment_entry, entry, this->assignments) { - if (entry->lhs->data.mode == ir_var_shader_out) { - if (debug) - printf("kill %s\n", entry->lhs->name); - entry->remove(); - } - } - - return visit_continue; - } - -private: - exec_list *assignments; -}; - -class array_index_visit : public ir_hierarchical_visitor { -public: - array_index_visit(ir_hierarchical_visitor *v) - { - this->visitor = v; - } - - virtual ir_visitor_status visit_enter(class ir_dereference_array *ir) - { - ir->array_index->accept(visitor); - return visit_continue; - } - - static void run(ir_instruction *ir, ir_hierarchical_visitor *v) - { - array_index_visit top_visit(v); - ir->accept(& top_visit); - } - - ir_hierarchical_visitor *visitor; -}; - -} /* unnamed namespace */ - -/** - * Adds an entry to the available copy list if it's a plain assignment - * of a variable to a variable. - */ -static bool -process_assignment(void *ctx, ir_assignment *ir, exec_list *assignments) -{ - ir_variable *var = NULL; - bool progress = false; - kill_for_derefs_visitor v(assignments); - - /* Kill assignment entries for things used to produce this assignment. */ - ir->rhs->accept(&v); - if (ir->condition) { - ir->condition->accept(&v); - } - - /* Kill assignment enties used as array indices. - */ - array_index_visit::run(ir->lhs, &v); - var = ir->lhs->variable_referenced(); - assert(var); - - /* Now, check if we did a whole-variable assignment. */ - if (!ir->condition) { - ir_dereference_variable *deref_var = ir->lhs->as_dereference_variable(); - - /* If it's a vector type, we can do per-channel elimination of - * use of the RHS. - */ - if (deref_var && (deref_var->var->type->is_scalar() || - deref_var->var->type->is_vector())) { - - if (debug) - printf("looking for %s.0x%01x to remove\n", var->name, - ir->write_mask); - - foreach_in_list_safe(assignment_entry, entry, assignments) { - if (entry->lhs != var) - continue; - - /* Skip if the assignment we're trying to eliminate isn't a plain - * variable deref. */ - if (entry->ir->lhs->ir_type != ir_type_dereference_variable) - continue; - - int remove = entry->unused & ir->write_mask; - if (debug) { - printf("%s 0x%01x - 0x%01x = 0x%01x\n", - var->name, - entry->ir->write_mask, - remove, entry->ir->write_mask & ~remove); - } - if (remove) { - progress = true; - - if (debug) { - printf("rewriting:\n "); - entry->ir->print(); - printf("\n"); - } - - entry->ir->write_mask &= ~remove; - entry->unused &= ~remove; - if (entry->ir->write_mask == 0) { - /* Delete the dead assignment. */ - entry->ir->remove(); - entry->remove(); - } else { - void *mem_ctx = ralloc_parent(entry->ir); - /* Reswizzle the RHS arguments according to the new - * write_mask. - */ - unsigned components[4]; - unsigned channels = 0; - unsigned next = 0; - - for (int i = 0; i < 4; i++) { - if ((entry->ir->write_mask | remove) & (1 << i)) { - if (!(remove & (1 << i))) - components[channels++] = next; - next++; - } - } - - entry->ir->rhs = new(mem_ctx) ir_swizzle(entry->ir->rhs, - components, - channels); - if (debug) { - printf("to:\n "); - entry->ir->print(); - printf("\n"); - } - } - } - } - } else if (ir->whole_variable_written() != NULL) { - /* We did a whole-variable assignment. So, any instruction in - * the assignment list with the same LHS is dead. - */ - if (debug) - printf("looking for %s to remove\n", var->name); - foreach_in_list_safe(assignment_entry, entry, assignments) { - if (entry->lhs == var) { - if (debug) - printf("removing %s\n", var->name); - entry->ir->remove(); - entry->remove(); - progress = true; - } - } - } - } - - /* Add this instruction to the assignment list available to be removed. */ - assignment_entry *entry = new(ctx) assignment_entry(var, ir); - assignments->push_tail(entry); - - if (debug) { - printf("add %s\n", var->name); - - printf("current entries\n"); - foreach_in_list(assignment_entry, entry, assignments) { - printf(" %s (0x%01x)\n", entry->lhs->name, entry->unused); - } - } - - return progress; -} - -static void -dead_code_local_basic_block(ir_instruction *first, - ir_instruction *last, - void *data) -{ - ir_instruction *ir, *ir_next; - /* List of avaialble_copy */ - exec_list assignments; - bool *out_progress = (bool *)data; - bool progress = false; - - void *ctx = ralloc_context(NULL); - /* Safe looping, since process_assignment */ - for (ir = first, ir_next = (ir_instruction *)first->next;; - ir = ir_next, ir_next = (ir_instruction *)ir->next) { - ir_assignment *ir_assign = ir->as_assignment(); - - if (debug) { - ir->print(); - printf("\n"); - } - - if (ir_assign) { - progress = process_assignment(ctx, ir_assign, &assignments) || progress; - } else { - kill_for_derefs_visitor kill(&assignments); - ir->accept(&kill); - } - - if (ir == last) - break; - } - *out_progress = progress; - ralloc_free(ctx); -} - -/** - * Does a copy propagation pass on the code present in the instruction stream. - */ -bool -do_dead_code_local(exec_list *instructions) -{ - bool progress = false; - - call_for_basic_blocks(instructions, dead_code_local_basic_block, &progress); - - return progress; -} diff --git a/src/glsl/opt_dead_functions.cpp b/src/glsl/opt_dead_functions.cpp deleted file mode 100644 index 2e90b650fa8..00000000000 --- a/src/glsl/opt_dead_functions.cpp +++ /dev/null @@ -1,152 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_dead_functions.cpp - * - * Eliminates unused functions from the linked program. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_expression_flattening.h" -#include "compiler/glsl_types.h" - -namespace { - -class signature_entry : public exec_node -{ -public: - signature_entry(ir_function_signature *sig) - { - this->signature = sig; - this->used = false; - } - - ir_function_signature *signature; - bool used; -}; - -class ir_dead_functions_visitor : public ir_hierarchical_visitor { -public: - ir_dead_functions_visitor() - { - this->mem_ctx = ralloc_context(NULL); - } - - ~ir_dead_functions_visitor() - { - ralloc_free(this->mem_ctx); - } - - virtual ir_visitor_status visit_enter(ir_function_signature *); - virtual ir_visitor_status visit_enter(ir_call *); - - signature_entry *get_signature_entry(ir_function_signature *var); - - /* List of signature_entry */ - exec_list signature_list; - void *mem_ctx; -}; - -} /* unnamed namespace */ - -signature_entry * -ir_dead_functions_visitor::get_signature_entry(ir_function_signature *sig) -{ - foreach_in_list(signature_entry, entry, &this->signature_list) { - if (entry->signature == sig) - return entry; - } - - signature_entry *entry = new(mem_ctx) signature_entry(sig); - this->signature_list.push_tail(entry); - return entry; -} - - -ir_visitor_status -ir_dead_functions_visitor::visit_enter(ir_function_signature *ir) -{ - signature_entry *entry = this->get_signature_entry(ir); - - if (strcmp(ir->function_name(), "main") == 0) { - entry->used = true; - } - - - - return visit_continue; -} - - -ir_visitor_status -ir_dead_functions_visitor::visit_enter(ir_call *ir) -{ - signature_entry *entry = this->get_signature_entry(ir->callee); - - entry->used = true; - - return visit_continue; -} - -bool -do_dead_functions(exec_list *instructions) -{ - ir_dead_functions_visitor v; - bool progress = false; - - visit_list_elements(&v, instructions); - - /* Now that we've figured out which function signatures are used, remove - * the unused ones, and remove function definitions that have no more - * signatures. - */ - foreach_in_list_safe(signature_entry, entry, &v.signature_list) { - if (!entry->used) { - entry->signature->remove(); - delete entry->signature; - progress = true; - } - delete(entry); - } - - /* We don't just do this above when we nuked a signature because of - * const pointers. - */ - foreach_in_list_safe(ir_instruction, ir, instructions) { - ir_function *func = ir->as_function(); - - if (func && func->signatures.is_empty()) { - /* At this point (post-linking), the symbol table is no - * longer in use, so not removing the function from the - * symbol table should be OK. - */ - func->remove(); - delete func; - progress = true; - } - } - - return progress; -} diff --git a/src/glsl/opt_flatten_nested_if_blocks.cpp b/src/glsl/opt_flatten_nested_if_blocks.cpp deleted file mode 100644 index c702102045f..00000000000 --- a/src/glsl/opt_flatten_nested_if_blocks.cpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_flatten_nested_if_blocks.cpp - * - * Flattens nested if blocks such as: - * - * if (x) { - * if (y) { - * ... - * } - * } - * - * into a single if block with a combined condition: - * - * if (x && y) { - * ... - * } - */ - -#include "ir.h" -#include "ir_builder.h" - -using namespace ir_builder; - -namespace { - -class nested_if_flattener : public ir_hierarchical_visitor { -public: - nested_if_flattener() - { - progress = false; - } - - ir_visitor_status visit_leave(ir_if *); - ir_visitor_status visit_enter(ir_assignment *); - - bool progress; -}; - -} /* unnamed namespace */ - -/* We only care about the top level "if" instructions, so don't - * descend into expressions. - */ -ir_visitor_status -nested_if_flattener::visit_enter(ir_assignment *ir) -{ - (void) ir; - return visit_continue_with_parent; -} - -bool -opt_flatten_nested_if_blocks(exec_list *instructions) -{ - nested_if_flattener v; - - v.run(instructions); - return v.progress; -} - - -ir_visitor_status -nested_if_flattener::visit_leave(ir_if *ir) -{ - /* Only handle a single ir_if within the then clause of an ir_if. No extra - * instructions, no else clauses, nothing. - */ - if (ir->then_instructions.is_empty() || !ir->else_instructions.is_empty()) - return visit_continue; - - ir_if *inner = ((ir_instruction *) ir->then_instructions.head)->as_if(); - if (!inner || !inner->next->is_tail_sentinel() || - !inner->else_instructions.is_empty()) - return visit_continue; - - ir->condition = logic_and(ir->condition, inner->condition); - inner->then_instructions.move_nodes_to(&ir->then_instructions); - - progress = true; - return visit_continue; -} diff --git a/src/glsl/opt_flip_matrices.cpp b/src/glsl/opt_flip_matrices.cpp deleted file mode 100644 index 04c6170b845..00000000000 --- a/src/glsl/opt_flip_matrices.cpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_flip_matrices.cpp - * - * Convert (matrix * vector) operations to (vector * matrixTranspose), - * which can be done using dot products rather than multiplies and adds. - * On some hardware, this is more efficient. - * - * This currently only does the conversion for built-in matrices which - * already have transposed equivalents. Namely, gl_ModelViewProjectionMatrix - * and gl_TextureMatrix. - */ -#include "ir.h" -#include "ir_optimization.h" -#include "main/macros.h" - -namespace { -class matrix_flipper : public ir_hierarchical_visitor { -public: - matrix_flipper(exec_list *instructions) - { - progress = false; - mvp_transpose = NULL; - texmat_transpose = NULL; - - foreach_in_list(ir_instruction, ir, instructions) { - ir_variable *var = ir->as_variable(); - if (!var) - continue; - if (strcmp(var->name, "gl_ModelViewProjectionMatrixTranspose") == 0) - mvp_transpose = var; - if (strcmp(var->name, "gl_TextureMatrixTranspose") == 0) - texmat_transpose = var; - } - } - - ir_visitor_status visit_enter(ir_expression *ir); - - bool progress; - -private: - ir_variable *mvp_transpose; - ir_variable *texmat_transpose; -}; -} - -ir_visitor_status -matrix_flipper::visit_enter(ir_expression *ir) -{ - if (ir->operation != ir_binop_mul || - !ir->operands[0]->type->is_matrix() || - !ir->operands[1]->type->is_vector()) - return visit_continue; - - ir_variable *mat_var = ir->operands[0]->variable_referenced(); - if (!mat_var) - return visit_continue; - - if (mvp_transpose && - strcmp(mat_var->name, "gl_ModelViewProjectionMatrix") == 0) { -#ifndef NDEBUG - ir_dereference_variable *deref = ir->operands[0]->as_dereference_variable(); - assert(deref && deref->var == mat_var); -#endif - - void *mem_ctx = ralloc_parent(ir); - - ir->operands[0] = ir->operands[1]; - ir->operands[1] = new(mem_ctx) ir_dereference_variable(mvp_transpose); - - progress = true; - } else if (texmat_transpose && - strcmp(mat_var->name, "gl_TextureMatrix") == 0) { - ir_dereference_array *array_ref = ir->operands[0]->as_dereference_array(); - assert(array_ref != NULL); - ir_dereference_variable *var_ref = array_ref->array->as_dereference_variable(); - assert(var_ref && var_ref->var == mat_var); - - ir->operands[0] = ir->operands[1]; - ir->operands[1] = array_ref; - - var_ref->var = texmat_transpose; - - texmat_transpose->data.max_array_access = - MAX2(texmat_transpose->data.max_array_access, mat_var->data.max_array_access); - - progress = true; - } - - return visit_continue; -} - -bool -opt_flip_matrices(struct exec_list *instructions) -{ - matrix_flipper v(instructions); - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/opt_function_inlining.cpp b/src/glsl/opt_function_inlining.cpp deleted file mode 100644 index 19f5fae0a17..00000000000 --- a/src/glsl/opt_function_inlining.cpp +++ /dev/null @@ -1,360 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_function_inlining.cpp - * - * Replaces calls to functions with the body of the function. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_function_inlining.h" -#include "ir_expression_flattening.h" -#include "compiler/glsl_types.h" -#include "program/hash_table.h" - -static void -do_variable_replacement(exec_list *instructions, - ir_variable *orig, - ir_dereference *repl); - -namespace { - -class ir_function_inlining_visitor : public ir_hierarchical_visitor { -public: - ir_function_inlining_visitor() - { - progress = false; - } - - virtual ~ir_function_inlining_visitor() - { - /* empty */ - } - - virtual ir_visitor_status visit_enter(ir_expression *); - virtual ir_visitor_status visit_enter(ir_call *); - virtual ir_visitor_status visit_enter(ir_return *); - virtual ir_visitor_status visit_enter(ir_texture *); - virtual ir_visitor_status visit_enter(ir_swizzle *); - - bool progress; -}; - -} /* unnamed namespace */ - -bool -do_function_inlining(exec_list *instructions) -{ - ir_function_inlining_visitor v; - - v.run(instructions); - - return v.progress; -} - -static void -replace_return_with_assignment(ir_instruction *ir, void *data) -{ - void *ctx = ralloc_parent(ir); - ir_dereference *orig_deref = (ir_dereference *) data; - ir_return *ret = ir->as_return(); - - if (ret) { - if (ret->value) { - ir_rvalue *lhs = orig_deref->clone(ctx, NULL); - ret->replace_with(new(ctx) ir_assignment(lhs, ret->value, NULL)); - } else { - /* un-valued return has to be the last return, or we shouldn't - * have reached here. (see can_inline()). - */ - assert(ret->next->is_tail_sentinel()); - ret->remove(); - } - } -} - -void -ir_call::generate_inline(ir_instruction *next_ir) -{ - void *ctx = ralloc_parent(this); - ir_variable **parameters; - unsigned num_parameters; - int i; - struct hash_table *ht; - - ht = hash_table_ctor(0, hash_table_pointer_hash, hash_table_pointer_compare); - - num_parameters = this->callee->parameters.length(); - parameters = new ir_variable *[num_parameters]; - - /* Generate the declarations for the parameters to our inlined code, - * and set up the mapping of real function body variables to ours. - */ - i = 0; - foreach_two_lists(formal_node, &this->callee->parameters, - actual_node, &this->actual_parameters) { - ir_variable *sig_param = (ir_variable *) formal_node; - ir_rvalue *param = (ir_rvalue *) actual_node; - - /* Generate a new variable for the parameter. */ - if (sig_param->type->contains_opaque()) { - /* For opaque types, we want the inlined variable references - * referencing the passed in variable, since that will have - * the location information, which an assignment of an opaque - * variable wouldn't. Fix it up below. - */ - parameters[i] = NULL; - } else { - parameters[i] = sig_param->clone(ctx, ht); - parameters[i]->data.mode = ir_var_auto; - - /* Remove the read-only decoration because we're going to write - * directly to this variable. If the cloned variable is left - * read-only and the inlined function is inside a loop, the loop - * analysis code will get confused. - */ - parameters[i]->data.read_only = false; - next_ir->insert_before(parameters[i]); - } - - /* Move the actual param into our param variable if it's an 'in' type. */ - if (parameters[i] && (sig_param->data.mode == ir_var_function_in || - sig_param->data.mode == ir_var_const_in || - sig_param->data.mode == ir_var_function_inout)) { - ir_assignment *assign; - - assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]), - param, NULL); - next_ir->insert_before(assign); - } - - ++i; - } - - exec_list new_instructions; - - /* Generate the inlined body of the function to a new list */ - foreach_in_list(ir_instruction, ir, &callee->body) { - ir_instruction *new_ir = ir->clone(ctx, ht); - - new_instructions.push_tail(new_ir); - visit_tree(new_ir, replace_return_with_assignment, this->return_deref); - } - - /* If any opaque types were passed in, replace any deref of the - * opaque variable with a deref of the argument. - */ - foreach_two_lists(formal_node, &this->callee->parameters, - actual_node, &this->actual_parameters) { - ir_rvalue *const param = (ir_rvalue *) actual_node; - ir_variable *sig_param = (ir_variable *) formal_node; - - if (sig_param->type->contains_opaque()) { - ir_dereference *deref = param->as_dereference(); - - assert(deref); - do_variable_replacement(&new_instructions, sig_param, deref); - } - } - - /* Now push those new instructions in. */ - next_ir->insert_before(&new_instructions); - - /* Copy back the value of any 'out' parameters from the function body - * variables to our own. - */ - i = 0; - foreach_two_lists(formal_node, &this->callee->parameters, - actual_node, &this->actual_parameters) { - ir_rvalue *const param = (ir_rvalue *) actual_node; - const ir_variable *const sig_param = (ir_variable *) formal_node; - - /* Move our param variable into the actual param if it's an 'out' type. */ - if (parameters[i] && (sig_param->data.mode == ir_var_function_out || - sig_param->data.mode == ir_var_function_inout)) { - ir_assignment *assign; - - assign = new(ctx) ir_assignment(param->clone(ctx, NULL)->as_rvalue(), - new(ctx) ir_dereference_variable(parameters[i]), - NULL); - next_ir->insert_before(assign); - } - - ++i; - } - - delete [] parameters; - - hash_table_dtor(ht); -} - - -ir_visitor_status -ir_function_inlining_visitor::visit_enter(ir_expression *ir) -{ - (void) ir; - return visit_continue_with_parent; -} - - -ir_visitor_status -ir_function_inlining_visitor::visit_enter(ir_return *ir) -{ - (void) ir; - return visit_continue_with_parent; -} - - -ir_visitor_status -ir_function_inlining_visitor::visit_enter(ir_texture *ir) -{ - (void) ir; - return visit_continue_with_parent; -} - - -ir_visitor_status -ir_function_inlining_visitor::visit_enter(ir_swizzle *ir) -{ - (void) ir; - return visit_continue_with_parent; -} - - -ir_visitor_status -ir_function_inlining_visitor::visit_enter(ir_call *ir) -{ - if (can_inline(ir)) { - ir->generate_inline(ir); - ir->remove(); - this->progress = true; - } - - return visit_continue; -} - - -/** - * Replaces references to the "orig" variable with a clone of "repl." - * - * From the spec, opaque types can appear in the tree as function - * (non-out) parameters and as the result of array indexing and - * structure field selection. In our builtin implementation, they - * also appear in the sampler field of an ir_tex instruction. - */ - -class ir_variable_replacement_visitor : public ir_hierarchical_visitor { -public: - ir_variable_replacement_visitor(ir_variable *orig, ir_dereference *repl) - { - this->orig = orig; - this->repl = repl; - } - - virtual ~ir_variable_replacement_visitor() - { - } - - virtual ir_visitor_status visit_leave(ir_call *); - virtual ir_visitor_status visit_leave(ir_dereference_array *); - virtual ir_visitor_status visit_leave(ir_dereference_record *); - virtual ir_visitor_status visit_leave(ir_texture *); - - void replace_deref(ir_dereference **deref); - void replace_rvalue(ir_rvalue **rvalue); - - ir_variable *orig; - ir_dereference *repl; -}; - -void -ir_variable_replacement_visitor::replace_deref(ir_dereference **deref) -{ - ir_dereference_variable *deref_var = (*deref)->as_dereference_variable(); - if (deref_var && deref_var->var == this->orig) { - *deref = this->repl->clone(ralloc_parent(*deref), NULL); - } -} - -void -ir_variable_replacement_visitor::replace_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_dereference *deref = (*rvalue)->as_dereference(); - - if (!deref) - return; - - replace_deref(&deref); - *rvalue = deref; -} - -ir_visitor_status -ir_variable_replacement_visitor::visit_leave(ir_texture *ir) -{ - replace_deref(&ir->sampler); - - return visit_continue; -} - -ir_visitor_status -ir_variable_replacement_visitor::visit_leave(ir_dereference_array *ir) -{ - replace_rvalue(&ir->array); - return visit_continue; -} - -ir_visitor_status -ir_variable_replacement_visitor::visit_leave(ir_dereference_record *ir) -{ - replace_rvalue(&ir->record); - return visit_continue; -} - -ir_visitor_status -ir_variable_replacement_visitor::visit_leave(ir_call *ir) -{ - foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { - ir_rvalue *new_param = param; - replace_rvalue(&new_param); - - if (new_param != param) { - param->replace_with(new_param); - } - } - return visit_continue; -} - -static void -do_variable_replacement(exec_list *instructions, - ir_variable *orig, - ir_dereference *repl) -{ - ir_variable_replacement_visitor v(orig, repl); - - visit_list_elements(&v, instructions); -} diff --git a/src/glsl/opt_if_simplification.cpp b/src/glsl/opt_if_simplification.cpp deleted file mode 100644 index e05f03190aa..00000000000 --- a/src/glsl/opt_if_simplification.cpp +++ /dev/null @@ -1,126 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_if_simplification.cpp - * - * Moves constant branches of if statements out to the surrounding - * instruction stream, and inverts if conditionals to avoid empty - * "then" blocks. - */ - -#include "ir.h" - -namespace { - -class ir_if_simplification_visitor : public ir_hierarchical_visitor { -public: - ir_if_simplification_visitor() - { - this->made_progress = false; - } - - ir_visitor_status visit_leave(ir_if *); - ir_visitor_status visit_enter(ir_assignment *); - - bool made_progress; -}; - -} /* unnamed namespace */ - -/* We only care about the top level "if" instructions, so don't - * descend into expressions. - */ -ir_visitor_status -ir_if_simplification_visitor::visit_enter(ir_assignment *ir) -{ - (void) ir; - return visit_continue_with_parent; -} - -bool -do_if_simplification(exec_list *instructions) -{ - ir_if_simplification_visitor v; - - v.run(instructions); - return v.made_progress; -} - - -ir_visitor_status -ir_if_simplification_visitor::visit_leave(ir_if *ir) -{ - /* If the if statement has nothing on either side, remove it. */ - if (ir->then_instructions.is_empty() && - ir->else_instructions.is_empty()) { - ir->remove(); - this->made_progress = true; - return visit_continue; - } - - /* FINISHME: Ideally there would be a way to note that the condition results - * FINISHME: in a constant before processing both of the other subtrees. - * FINISHME: This can probably be done with some flags, but it would take - * FINISHME: some work to get right. - */ - ir_constant *condition_constant = ir->condition->constant_expression_value(); - if (condition_constant) { - /* Move the contents of the one branch of the conditional - * that matters out. - */ - if (condition_constant->value.b[0]) { - ir->insert_before(&ir->then_instructions); - } else { - ir->insert_before(&ir->else_instructions); - } - ir->remove(); - this->made_progress = true; - return visit_continue; - } - - /* Turn: - * - * if (cond) { - * } else { - * do_work(); - * } - * - * into : - * - * if (!cond) - * do_work(); - * - * which avoids control flow for "else" (which is usually more - * expensive than normal operations), and the "not" can usually be - * folded into the generation of "cond" anyway. - */ - if (ir->then_instructions.is_empty()) { - ir->condition = new(ralloc_parent(ir->condition)) - ir_expression(ir_unop_logic_not, ir->condition); - ir->else_instructions.move_nodes_to(&ir->then_instructions); - this->made_progress = true; - } - - return visit_continue; -} diff --git a/src/glsl/opt_minmax.cpp b/src/glsl/opt_minmax.cpp deleted file mode 100644 index 29482ee69de..00000000000 --- a/src/glsl/opt_minmax.cpp +++ /dev/null @@ -1,488 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_minmax.cpp - * - * Drop operands from an expression tree of only min/max operations if they - * can be proven to not contribute to the final result. - * - * The algorithm is similar to alpha-beta pruning on a minmax search. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "ir_optimization.h" -#include "ir_builder.h" -#include "program/prog_instruction.h" -#include "compiler/glsl_types.h" -#include "main/macros.h" - -using namespace ir_builder; - -namespace { - -enum compare_components_result { - LESS, - LESS_OR_EQUAL, - EQUAL, - GREATER_OR_EQUAL, - GREATER, - MIXED -}; - -class minmax_range { -public: - minmax_range(ir_constant *low = NULL, ir_constant *high = NULL) - { - this->low = low; - this->high = high; - } - - /* low is the lower limit of the range, high is the higher limit. NULL on - * low means negative infinity (unlimited) and on high positive infinity - * (unlimited). Because of the two interpretations of the value NULL, - * arbitrary comparison between ir_constants is impossible. - */ - ir_constant *low; - ir_constant *high; -}; - -class ir_minmax_visitor : public ir_rvalue_enter_visitor { -public: - ir_minmax_visitor() - : progress(false) - { - } - - ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange); - - void handle_rvalue(ir_rvalue **rvalue); - - bool progress; -}; - -/* - * Returns LESS if all vector components of `a' are strictly lower than of `b', - * GREATER if all vector components of `a' are strictly greater than of `b', - * MIXED if some vector components of `a' are strictly lower than of `b' while - * others are strictly greater, or EQUAL otherwise. - */ -static enum compare_components_result -compare_components(ir_constant *a, ir_constant *b) -{ - assert(a != NULL); - assert(b != NULL); - - assert(a->type->base_type == b->type->base_type); - - unsigned a_inc = a->type->is_scalar() ? 0 : 1; - unsigned b_inc = b->type->is_scalar() ? 0 : 1; - unsigned components = MAX2(a->type->components(), b->type->components()); - - bool foundless = false; - bool foundgreater = false; - bool foundequal = false; - - for (unsigned i = 0, c0 = 0, c1 = 0; - i < components; - c0 += a_inc, c1 += b_inc, ++i) { - switch (a->type->base_type) { - case GLSL_TYPE_UINT: - if (a->value.u[c0] < b->value.u[c1]) - foundless = true; - else if (a->value.u[c0] > b->value.u[c1]) - foundgreater = true; - else - foundequal = true; - break; - case GLSL_TYPE_INT: - if (a->value.i[c0] < b->value.i[c1]) - foundless = true; - else if (a->value.i[c0] > b->value.i[c1]) - foundgreater = true; - else - foundequal = true; - break; - case GLSL_TYPE_FLOAT: - if (a->value.f[c0] < b->value.f[c1]) - foundless = true; - else if (a->value.f[c0] > b->value.f[c1]) - foundgreater = true; - else - foundequal = true; - break; - case GLSL_TYPE_DOUBLE: - if (a->value.d[c0] < b->value.d[c1]) - foundless = true; - else if (a->value.d[c0] > b->value.d[c1]) - foundgreater = true; - else - foundequal = true; - break; - default: - unreachable("not reached"); - } - } - - if (foundless && foundgreater) { - /* Some components are strictly lower, others are strictly greater */ - return MIXED; - } - - if (foundequal) { - /* It is not mixed, but it is not strictly lower or greater */ - if (foundless) - return LESS_OR_EQUAL; - if (foundgreater) - return GREATER_OR_EQUAL; - return EQUAL; - } - - /* All components are strictly lower or strictly greater */ - return foundless ? LESS : GREATER; -} - -static ir_constant * -combine_constant(bool ismin, ir_constant *a, ir_constant *b) -{ - void *mem_ctx = ralloc_parent(a); - ir_constant *c = a->clone(mem_ctx, NULL); - for (unsigned i = 0; i < c->type->components(); i++) { - switch (c->type->base_type) { - case GLSL_TYPE_UINT: - if ((ismin && b->value.u[i] < c->value.u[i]) || - (!ismin && b->value.u[i] > c->value.u[i])) - c->value.u[i] = b->value.u[i]; - break; - case GLSL_TYPE_INT: - if ((ismin && b->value.i[i] < c->value.i[i]) || - (!ismin && b->value.i[i] > c->value.i[i])) - c->value.i[i] = b->value.i[i]; - break; - case GLSL_TYPE_FLOAT: - if ((ismin && b->value.f[i] < c->value.f[i]) || - (!ismin && b->value.f[i] > c->value.f[i])) - c->value.f[i] = b->value.f[i]; - break; - case GLSL_TYPE_DOUBLE: - if ((ismin && b->value.d[i] < c->value.d[i]) || - (!ismin && b->value.d[i] > c->value.d[i])) - c->value.d[i] = b->value.d[i]; - break; - default: - assert(!"not reached"); - } - } - return c; -} - -static ir_constant * -smaller_constant(ir_constant *a, ir_constant *b) -{ - assert(a != NULL); - assert(b != NULL); - - enum compare_components_result ret = compare_components(a, b); - if (ret == MIXED) - return combine_constant(true, a, b); - else if (ret < EQUAL) - return a; - else - return b; -} - -static ir_constant * -larger_constant(ir_constant *a, ir_constant *b) -{ - assert(a != NULL); - assert(b != NULL); - - enum compare_components_result ret = compare_components(a, b); - if (ret == MIXED) - return combine_constant(false, a, b); - else if (ret < EQUAL) - return b; - else - return a; -} - -/* Combines two ranges by doing an element-wise min() / max() depending on the - * operation. - */ -static minmax_range -combine_range(minmax_range r0, minmax_range r1, bool ismin) -{ - minmax_range ret; - - if (!r0.low) { - ret.low = ismin ? r0.low : r1.low; - } else if (!r1.low) { - ret.low = ismin ? r1.low : r0.low; - } else { - ret.low = ismin ? smaller_constant(r0.low, r1.low) : - larger_constant(r0.low, r1.low); - } - - if (!r0.high) { - ret.high = ismin ? r1.high : r0.high; - } else if (!r1.high) { - ret.high = ismin ? r0.high : r1.high; - } else { - ret.high = ismin ? smaller_constant(r0.high, r1.high) : - larger_constant(r0.high, r1.high); - } - - return ret; -} - -/* Returns a range so that lower limit is the larger of the two lower limits, - * and higher limit is the smaller of the two higher limits. - */ -static minmax_range -range_intersection(minmax_range r0, minmax_range r1) -{ - minmax_range ret; - - if (!r0.low) - ret.low = r1.low; - else if (!r1.low) - ret.low = r0.low; - else - ret.low = larger_constant(r0.low, r1.low); - - if (!r0.high) - ret.high = r1.high; - else if (!r1.high) - ret.high = r0.high; - else - ret.high = smaller_constant(r0.high, r1.high); - - return ret; -} - -static minmax_range -get_range(ir_rvalue *rval) -{ - ir_expression *expr = rval->as_expression(); - if (expr && (expr->operation == ir_binop_min || - expr->operation == ir_binop_max)) { - minmax_range r0 = get_range(expr->operands[0]); - minmax_range r1 = get_range(expr->operands[1]); - return combine_range(r0, r1, expr->operation == ir_binop_min); - } - - ir_constant *c = rval->as_constant(); - if (c) { - return minmax_range(c, c); - } - - return minmax_range(); -} - -/** - * Prunes a min/max expression considering the base range of the parent - * min/max expression. - * - * @param baserange the range that the parents of this min/max expression - * in the min/max tree will clamp its value to. - */ -ir_rvalue * -ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange) -{ - assert(expr->operation == ir_binop_min || - expr->operation == ir_binop_max); - - bool ismin = expr->operation == ir_binop_min; - minmax_range limits[2]; - - /* Recurse to get the ranges for each of the subtrees of this - * expression. We need to do this as a separate step because we need to - * know the ranges of each of the subtrees before we prune either one. - * Consider something like this: - * - * max - * / \ - * max max - * / \ / \ - * 3 a b 2 - * - * We would like to prune away the max on the bottom-right, but to do so - * we need to know the range of the expression on the left beforehand, - * and there's no guarantee that we will visit either subtree in a - * particular order. - */ - for (unsigned i = 0; i < 2; ++i) - limits[i] = get_range(expr->operands[i]); - - for (unsigned i = 0; i < 2; ++i) { - bool is_redundant = false; - - enum compare_components_result cr = LESS; - if (ismin) { - /* If this operand will always be greater than the other one, it's - * redundant. - */ - if (limits[i].low && limits[1 - i].high) { - cr = compare_components(limits[i].low, limits[1 - i].high); - if (cr >= EQUAL && cr != MIXED) - is_redundant = true; - } - /* If this operand is always greater than baserange, then even if - * it's smaller than the other one it'll get clamped, so it's - * redundant. - */ - if (!is_redundant && limits[i].low && baserange.high) { - cr = compare_components(limits[i].low, baserange.high); - if (cr >= EQUAL && cr != MIXED) - is_redundant = true; - } - } else { - /* If this operand will always be lower than the other one, it's - * redundant. - */ - if (limits[i].high && limits[1 - i].low) { - cr = compare_components(limits[i].high, limits[1 - i].low); - if (cr <= EQUAL) - is_redundant = true; - } - /* If this operand is always lower than baserange, then even if - * it's greater than the other one it'll get clamped, so it's - * redundant. - */ - if (!is_redundant && limits[i].high && baserange.low) { - cr = compare_components(limits[i].high, baserange.low); - if (cr <= EQUAL) - is_redundant = true; - } - } - - if (is_redundant) { - progress = true; - - /* Recurse if necessary. */ - ir_expression *op_expr = expr->operands[1 - i]->as_expression(); - if (op_expr && (op_expr->operation == ir_binop_min || - op_expr->operation == ir_binop_max)) { - return prune_expression(op_expr, baserange); - } - - return expr->operands[1 - i]; - } else if (cr == MIXED) { - /* If we have mixed vector operands, we can try to resolve the minmax - * expression by doing a component-wise minmax: - * - * min min - * / \ / \ - * min a ===> [1,1] a - * / \ - * [1,3] [3,1] - * - */ - ir_constant *a = expr->operands[0]->as_constant(); - ir_constant *b = expr->operands[1]->as_constant(); - if (a && b) - return combine_constant(ismin, a, b); - } - } - - /* Now recurse to operands giving them the proper baserange. The baserange - * to pass is the intersection of our baserange and the other operand's - * limit with one of the ranges unlimited. If we can't compute a valid - * intersection, we use the current baserange. - */ - for (unsigned i = 0; i < 2; ++i) { - ir_expression *op_expr = expr->operands[i]->as_expression(); - if (op_expr && (op_expr->operation == ir_binop_min || - op_expr->operation == ir_binop_max)) { - /* We can only compute a new baserange for this operand if we managed - * to compute a valid range for the other operand. - */ - if (ismin) - limits[1 - i].low = NULL; - else - limits[1 - i].high = NULL; - minmax_range base = range_intersection(limits[1 - i], baserange); - expr->operands[i] = prune_expression(op_expr, base); - } - } - - /* If we got here we could not discard any of the operands of the minmax - * expression, but we can still try to resolve the expression if both - * operands are constant. We do this after the loop above, to make sure - * that if our operands are minmax expressions we have tried to prune them - * first (hopefully reducing them to constants). - */ - ir_constant *a = expr->operands[0]->as_constant(); - ir_constant *b = expr->operands[1]->as_constant(); - if (a && b) - return combine_constant(ismin, a, b); - - return expr; -} - -static ir_rvalue * -swizzle_if_required(ir_expression *expr, ir_rvalue *rval) -{ - if (expr->type->is_vector() && rval->type->is_scalar()) { - return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements); - } else { - return rval; - } -} - -void -ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_expression *expr = (*rvalue)->as_expression(); - if (!expr || (expr->operation != ir_binop_min && - expr->operation != ir_binop_max)) - return; - - ir_rvalue *new_rvalue = prune_expression(expr, minmax_range()); - if (new_rvalue == *rvalue) - return; - - /* If the expression type is a vector and the optimization leaves a scalar - * as the result, we need to turn it into a vector. - */ - *rvalue = swizzle_if_required(expr, new_rvalue); - - progress = true; -} - -} - -bool -do_minmax_prune(exec_list *instructions) -{ - ir_minmax_visitor v; - - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/opt_noop_swizzle.cpp b/src/glsl/opt_noop_swizzle.cpp deleted file mode 100644 index 41890ab2b15..00000000000 --- a/src/glsl/opt_noop_swizzle.cpp +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_noop_swizzle.cpp - * - * If a swizzle doesn't change the order or count of components, then - * remove the swizzle so that other optimization passes see the value - * behind it. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "compiler/glsl_types.h" - -namespace { - -class ir_noop_swizzle_visitor : public ir_rvalue_visitor { -public: - ir_noop_swizzle_visitor() - { - this->progress = false; - } - - void handle_rvalue(ir_rvalue **rvalue); - bool progress; -}; - -} /* unnamed namespace */ - -void -ir_noop_swizzle_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_swizzle *swiz = (*rvalue)->as_swizzle(); - if (!swiz || swiz->type != swiz->val->type) - return; - - int elems = swiz->val->type->vector_elements; - if (swiz->mask.x != 0) - return; - if (elems >= 2 && swiz->mask.y != 1) - return; - if (elems >= 3 && swiz->mask.z != 2) - return; - if (elems >= 4 && swiz->mask.w != 3) - return; - - this->progress = true; - *rvalue = swiz->val; -} - -bool -do_noop_swizzle(exec_list *instructions) -{ - ir_noop_swizzle_visitor v; - visit_list_elements(&v, instructions); - - return v.progress; -} diff --git a/src/glsl/opt_rebalance_tree.cpp b/src/glsl/opt_rebalance_tree.cpp deleted file mode 100644 index 095f2d7d2f0..00000000000 --- a/src/glsl/opt_rebalance_tree.cpp +++ /dev/null @@ -1,321 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_rebalance_tree.cpp - * - * Rebalances a reduction expression tree. - * - * For reduction operations (e.g., x + y + z + w) we generate an expression - * tree like - * - * + - * / \ - * + w - * / \ - * + z - * / \ - * x y - * - * which we can rebalance into - * - * + - * / \ - * / \ - * + + - * / \ / \ - * x y z w - * - * to get a better instruction scheduling. - * - * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout - * and Bette L. Warren. - * - * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable - * explanation of the of the tree_to_vine() (rightward rotation) and - * vine_to_tree() (leftward rotation) algorithms. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "ir_optimization.h" -#include "main/macros.h" /* for MAX2 */ - -/* The DSW algorithm generates a degenerate tree (really, a linked list) in - * tree_to_vine(). We'd rather not leave a binary expression with only one - * operand, so trivial modifications (the ternary operators below) are needed - * to ensure that we only rotate around the ir_expression nodes of the tree. - */ -static unsigned -tree_to_vine(ir_expression *root) -{ - unsigned size = 0; - ir_rvalue *vine_tail = root; - ir_rvalue *remainder = root->operands[1]; - - while (remainder != NULL) { - ir_expression *remainder_temp = remainder->as_expression(); - ir_expression *remainder_left = remainder_temp ? - remainder_temp->operands[0]->as_expression() : NULL; - - if (remainder_left == NULL) { - /* move vine_tail down one */ - vine_tail = remainder; - remainder = remainder->as_expression() ? - ((ir_expression *)remainder)->operands[1] : NULL; - size++; - } else { - /* rotate */ - ir_expression *tempptr = remainder_left; - ((ir_expression *)remainder)->operands[0] = tempptr->operands[1]; - tempptr->operands[1] = remainder; - remainder = tempptr; - ((ir_expression *)vine_tail)->operands[1] = tempptr; - } - } - - return size; -} - -static void -compression(ir_expression *root, unsigned count) -{ - ir_expression *scanner = root; - - for (unsigned i = 0; i < count; i++) { - ir_expression *child = (ir_expression *)scanner->operands[1]; - scanner->operands[1] = child->operands[1]; - scanner = (ir_expression *)scanner->operands[1]; - child->operands[1] = scanner->operands[0]; - scanner->operands[0] = child; - } -} - -static void -vine_to_tree(ir_expression *root, unsigned size) -{ - int n = size - 1; - for (int m = n / 2; m > 0; m = n / 2) { - compression(root, m); - n -= m + 1; - } -} - -namespace { - -class ir_rebalance_visitor : public ir_rvalue_enter_visitor { -public: - ir_rebalance_visitor() - { - progress = false; - } - - void handle_rvalue(ir_rvalue **rvalue); - - bool progress; -}; - -struct is_reduction_data { - ir_expression_operation operation; - const glsl_type *type; - unsigned num_expr; - bool is_reduction; - bool contains_constant; -}; - -} /* anonymous namespace */ - -static bool -is_reduction_operation(ir_expression_operation operation) -{ - switch (operation) { - case ir_binop_add: - case ir_binop_mul: - case ir_binop_bit_and: - case ir_binop_bit_xor: - case ir_binop_bit_or: - case ir_binop_logic_and: - case ir_binop_logic_xor: - case ir_binop_logic_or: - case ir_binop_min: - case ir_binop_max: - return true; - default: - return false; - } -} - -/* Note that this function does not attempt to recognize that reduction trees - * are already balanced. - * - * We return false from this function for a number of reasons other than an - * expression tree not being a mathematical reduction. Namely, - * - * - if the tree contains multiple constants that we may be able to combine. - * - if the tree contains matrices: - * - they might contain vec4's with many constant components that we can - * simplify after splitting. - * - applying the matrix chain ordering optimization is more than just - * balancing an expression tree. - * - if the tree contains operations on multiple types. - * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c - * would trick the visiting pass. - */ -static void -is_reduction(ir_instruction *ir, void *data) -{ - struct is_reduction_data *ird = (struct is_reduction_data *)data; - if (!ird->is_reduction) - return; - - /* We don't want to balance a tree that contains multiple constants, since - * we'll be able to constant fold them if they're not in separate subtrees. - */ - if (ir->as_constant()) { - if (ird->contains_constant) { - ird->is_reduction = false; - } - ird->contains_constant = true; - return; - } - - /* Array/record dereferences have subtrees that are not part of the expr - * tree we're balancing. Skip trees containing them. - */ - if (ir->ir_type == ir_type_dereference_array || - ir->ir_type == ir_type_dereference_record) { - ird->is_reduction = false; - return; - } - - ir_expression *expr = ir->as_expression(); - if (!expr) - return; - - /* Non-constant matrices might still contain constant vec4 that we can - * constant fold once split up. Handling matrices will need some more - * work. - */ - if (expr->type->is_matrix() || - expr->operands[0]->type->is_matrix() || - (expr->operands[1] && expr->operands[1]->type->is_matrix())) { - ird->is_reduction = false; - return; - } - - if (ird->type != NULL && ird->type != expr->type) { - ird->is_reduction = false; - return; - } - ird->type = expr->type; - - ird->num_expr++; - if (is_reduction_operation(expr->operation)) { - if (ird->operation != 0 && ird->operation != expr->operation) - ird->is_reduction = false; - ird->operation = expr->operation; - } else { - ird->is_reduction = false; - } -} - -static ir_rvalue * -handle_expression(ir_expression *expr) -{ - struct is_reduction_data ird; - ird.operation = (ir_expression_operation)0; - ird.type = NULL; - ird.num_expr = 0; - ird.is_reduction = true; - ird.contains_constant = false; - - visit_tree(expr, is_reduction, (void *)&ird); - - if (ird.is_reduction && ird.num_expr > 2) { - ir_constant z = ir_constant(0.0f); - ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr); - - unsigned size = tree_to_vine(&pseudo_root); - vine_to_tree(&pseudo_root, size); - - expr = (ir_expression *)pseudo_root.operands[1]; - } - return expr; -} - -static void -update_types(ir_instruction *ir, void *) -{ - ir_expression *expr = ir->as_expression(); - if (!expr) - return; - - const glsl_type *const new_type = - glsl_type::get_instance(expr->type->base_type, - MAX2(expr->operands[0]->type->vector_elements, - expr->operands[1]->type->vector_elements), - 1); - assert(new_type != glsl_type::error_type); - expr->type = new_type; -} - -void -ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_expression *expr = (*rvalue)->as_expression(); - if (!expr || !is_reduction_operation(expr->operation)) - return; - - ir_rvalue *new_rvalue = handle_expression(expr); - - /* If we failed to rebalance the tree (e.g., because it wasn't a reduction, - * or some other set of cases) new_rvalue will point to the same root as - * before. - * - * Similarly, if the tree rooted at *rvalue was a reduction and was already - * balanced, the algorithm will rearrange the tree but will ultimately - * return an identical tree, so this check will handle that as well and - * will not set progress = true. - */ - if (new_rvalue == *rvalue) - return; - - visit_tree(new_rvalue, NULL, NULL, update_types); - - *rvalue = new_rvalue; - this->progress = true; -} - -bool -do_rebalance_tree(exec_list *instructions) -{ - ir_rebalance_visitor v; - - v.run(instructions); - - return v.progress; -} diff --git a/src/glsl/opt_redundant_jumps.cpp b/src/glsl/opt_redundant_jumps.cpp deleted file mode 100644 index ee384d0f23c..00000000000 --- a/src/glsl/opt_redundant_jumps.cpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_redundant_jumps.cpp - * Remove certain types of redundant jumps - */ - -#include "ir.h" - -namespace { - -class redundant_jumps_visitor : public ir_hierarchical_visitor { -public: - redundant_jumps_visitor() - { - this->progress = false; - } - - virtual ir_visitor_status visit_leave(ir_if *); - virtual ir_visitor_status visit_leave(ir_loop *); - virtual ir_visitor_status visit_enter(ir_assignment *); - - bool progress; -}; - -} /* unnamed namespace */ - -/* We only care about the top level instructions, so don't descend - * into expressions. - */ -ir_visitor_status -redundant_jumps_visitor::visit_enter(ir_assignment *) -{ - return visit_continue_with_parent; -} - -ir_visitor_status -redundant_jumps_visitor::visit_leave(ir_if *ir) -{ - /* If the last instruction in both branches is a 'break' or a 'continue', - * pull it out of the branches and insert it after the if-statment. Note - * that both must be the same type (either 'break' or 'continue'). - */ - ir_instruction *const last_then = - (ir_instruction *) ir->then_instructions.get_tail(); - ir_instruction *const last_else = - (ir_instruction *) ir->else_instructions.get_tail(); - - if ((last_then == NULL) || (last_else == NULL)) - return visit_continue; - - if ((last_then->ir_type != ir_type_loop_jump) - || (last_else->ir_type != ir_type_loop_jump)) - return visit_continue; - - ir_loop_jump *const then_jump = (ir_loop_jump *) last_then; - ir_loop_jump *const else_jump = (ir_loop_jump *) last_else; - - if (then_jump->mode != else_jump->mode) - return visit_continue; - - then_jump->remove(); - else_jump->remove(); - this->progress = true; - - ir->insert_after(then_jump); - - /* If both branchs of the if-statement are now empty, remove the - * if-statement. - */ - if (ir->then_instructions.is_empty() && ir->else_instructions.is_empty()) - ir->remove(); - - return visit_continue; -} - - -ir_visitor_status -redundant_jumps_visitor::visit_leave(ir_loop *ir) -{ - /* If the last instruction of a loop body is a 'continue', remove it. - */ - ir_instruction *const last = - (ir_instruction *) ir->body_instructions.get_tail(); - - if (last && (last->ir_type == ir_type_loop_jump) - && (((ir_loop_jump *) last)->mode == ir_loop_jump::jump_continue)) { - last->remove(); - this->progress = true; - } - - return visit_continue; -} - - -bool -optimize_redundant_jumps(exec_list *instructions) -{ - redundant_jumps_visitor v; - - v.run(instructions); - return v.progress; -} diff --git a/src/glsl/opt_structure_splitting.cpp b/src/glsl/opt_structure_splitting.cpp deleted file mode 100644 index 0d18a2f7584..00000000000 --- a/src/glsl/opt_structure_splitting.cpp +++ /dev/null @@ -1,367 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_structure_splitting.cpp - * - * If a structure is only ever referenced by its components, then - * split those components out to individual variables so they can be - * handled normally by other optimization passes. - * - * This skips structures like uniforms, which need to be accessible as - * structures for their access by the GL. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_rvalue_visitor.h" -#include "compiler/glsl_types.h" - -namespace { - -static bool debug = false; - -class variable_entry : public exec_node -{ -public: - variable_entry(ir_variable *var) - { - this->var = var; - this->whole_structure_access = 0; - this->declaration = false; - this->components = NULL; - this->mem_ctx = NULL; - } - - ir_variable *var; /* The key: the variable's pointer. */ - - /** Number of times the variable is referenced, including assignments. */ - unsigned whole_structure_access; - - /* If the variable had a decl we can work with in the instruction - * stream. We can't do splitting on function arguments, which - * don't get this variable set. - */ - bool declaration; - - ir_variable **components; - - /** ralloc_parent(this->var) -- the shader's ralloc context. */ - void *mem_ctx; -}; - - -class ir_structure_reference_visitor : public ir_hierarchical_visitor { -public: - ir_structure_reference_visitor(void) - { - this->mem_ctx = ralloc_context(NULL); - this->variable_list.make_empty(); - } - - ~ir_structure_reference_visitor(void) - { - ralloc_free(mem_ctx); - } - - virtual ir_visitor_status visit(ir_variable *); - virtual ir_visitor_status visit(ir_dereference_variable *); - virtual ir_visitor_status visit_enter(ir_dereference_record *); - virtual ir_visitor_status visit_enter(ir_assignment *); - virtual ir_visitor_status visit_enter(ir_function_signature *); - - variable_entry *get_variable_entry(ir_variable *var); - - /* List of variable_entry */ - exec_list variable_list; - - void *mem_ctx; -}; - -variable_entry * -ir_structure_reference_visitor::get_variable_entry(ir_variable *var) -{ - assert(var); - - if (!var->type->is_record() || - var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage || - var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out) - return NULL; - - foreach_in_list(variable_entry, entry, &this->variable_list) { - if (entry->var == var) - return entry; - } - - variable_entry *entry = new(mem_ctx) variable_entry(var); - this->variable_list.push_tail(entry); - return entry; -} - - -ir_visitor_status -ir_structure_reference_visitor::visit(ir_variable *ir) -{ - variable_entry *entry = this->get_variable_entry(ir); - - if (entry) - entry->declaration = true; - - return visit_continue; -} - -ir_visitor_status -ir_structure_reference_visitor::visit(ir_dereference_variable *ir) -{ - ir_variable *const var = ir->variable_referenced(); - variable_entry *entry = this->get_variable_entry(var); - - if (entry) - entry->whole_structure_access++; - - return visit_continue; -} - -ir_visitor_status -ir_structure_reference_visitor::visit_enter(ir_dereference_record *ir) -{ - (void) ir; - /* Don't descend into the ir_dereference_variable below. */ - return visit_continue_with_parent; -} - -ir_visitor_status -ir_structure_reference_visitor::visit_enter(ir_assignment *ir) -{ - /* If there are no structure references yet, no need to bother with - * processing the expression tree. - */ - if (this->variable_list.is_empty()) - return visit_continue_with_parent; - - if (ir->lhs->as_dereference_variable() && - ir->rhs->as_dereference_variable() && - !ir->condition) { - /* We'll split copies of a structure to copies of components, so don't - * descend to the ir_dereference_variables. - */ - return visit_continue_with_parent; - } - return visit_continue; -} - -ir_visitor_status -ir_structure_reference_visitor::visit_enter(ir_function_signature *ir) -{ - /* We don't have logic for structure-splitting function arguments, - * so just look at the body instructions and not the parameter - * declarations. - */ - visit_list_elements(this, &ir->body); - return visit_continue_with_parent; -} - -class ir_structure_splitting_visitor : public ir_rvalue_visitor { -public: - ir_structure_splitting_visitor(exec_list *vars) - { - this->variable_list = vars; - } - - virtual ~ir_structure_splitting_visitor() - { - } - - virtual ir_visitor_status visit_leave(ir_assignment *); - - void split_deref(ir_dereference **deref); - void handle_rvalue(ir_rvalue **rvalue); - variable_entry *get_splitting_entry(ir_variable *var); - - exec_list *variable_list; -}; - -variable_entry * -ir_structure_splitting_visitor::get_splitting_entry(ir_variable *var) -{ - assert(var); - - if (!var->type->is_record()) - return NULL; - - foreach_in_list(variable_entry, entry, this->variable_list) { - if (entry->var == var) { - return entry; - } - } - - return NULL; -} - -void -ir_structure_splitting_visitor::split_deref(ir_dereference **deref) -{ - if ((*deref)->ir_type != ir_type_dereference_record) - return; - - ir_dereference_record *deref_record = (ir_dereference_record *)*deref; - ir_dereference_variable *deref_var = deref_record->record->as_dereference_variable(); - if (!deref_var) - return; - - variable_entry *entry = get_splitting_entry(deref_var->var); - if (!entry) - return; - - unsigned int i; - for (i = 0; i < entry->var->type->length; i++) { - if (strcmp(deref_record->field, - entry->var->type->fields.structure[i].name) == 0) - break; - } - assert(i != entry->var->type->length); - - *deref = new(entry->mem_ctx) ir_dereference_variable(entry->components[i]); -} - -void -ir_structure_splitting_visitor::handle_rvalue(ir_rvalue **rvalue) -{ - if (!*rvalue) - return; - - ir_dereference *deref = (*rvalue)->as_dereference(); - - if (!deref) - return; - - split_deref(&deref); - *rvalue = deref; -} - -ir_visitor_status -ir_structure_splitting_visitor::visit_leave(ir_assignment *ir) -{ - ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable(); - ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable(); - variable_entry *lhs_entry = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL; - variable_entry *rhs_entry = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL; - const glsl_type *type = ir->rhs->type; - - if ((lhs_entry || rhs_entry) && !ir->condition) { - for (unsigned int i = 0; i < type->length; i++) { - ir_dereference *new_lhs, *new_rhs; - void *mem_ctx = lhs_entry ? lhs_entry->mem_ctx : rhs_entry->mem_ctx; - - if (lhs_entry) { - new_lhs = new(mem_ctx) ir_dereference_variable(lhs_entry->components[i]); - } else { - new_lhs = new(mem_ctx) - ir_dereference_record(ir->lhs->clone(mem_ctx, NULL), - type->fields.structure[i].name); - } - - if (rhs_entry) { - new_rhs = new(mem_ctx) ir_dereference_variable(rhs_entry->components[i]); - } else { - new_rhs = new(mem_ctx) - ir_dereference_record(ir->rhs->clone(mem_ctx, NULL), - type->fields.structure[i].name); - } - - ir->insert_before(new(mem_ctx) ir_assignment(new_lhs, - new_rhs, - NULL)); - } - ir->remove(); - } else { - handle_rvalue(&ir->rhs); - split_deref(&ir->lhs); - } - - handle_rvalue(&ir->condition); - - return visit_continue; -} - -} /* unnamed namespace */ - -bool -do_structure_splitting(exec_list *instructions) -{ - ir_structure_reference_visitor refs; - - visit_list_elements(&refs, instructions); - - /* Trim out variables we can't split. */ - foreach_in_list_safe(variable_entry, entry, &refs.variable_list) { - if (debug) { - printf("structure %s@%p: decl %d, whole_access %d\n", - entry->var->name, (void *) entry->var, entry->declaration, - entry->whole_structure_access); - } - - if (!entry->declaration || entry->whole_structure_access) { - entry->remove(); - } - } - - if (refs.variable_list.is_empty()) - return false; - - void *mem_ctx = ralloc_context(NULL); - - /* Replace the decls of the structures to be split with their split - * components. - */ - foreach_in_list_safe(variable_entry, entry, &refs.variable_list) { - const struct glsl_type *type = entry->var->type; - - entry->mem_ctx = ralloc_parent(entry->var); - - entry->components = ralloc_array(mem_ctx, - ir_variable *, - type->length); - - for (unsigned int i = 0; i < entry->var->type->length; i++) { - const char *name = ralloc_asprintf(mem_ctx, "%s_%s", - entry->var->name, - type->fields.structure[i].name); - - entry->components[i] = - new(entry->mem_ctx) ir_variable(type->fields.structure[i].type, - name, - ir_var_temporary); - entry->var->insert_before(entry->components[i]); - } - - entry->var->remove(); - } - - ir_structure_splitting_visitor split(&refs.variable_list); - visit_list_elements(&split, instructions); - - ralloc_free(mem_ctx); - - return true; -} diff --git a/src/glsl/opt_swizzle_swizzle.cpp b/src/glsl/opt_swizzle_swizzle.cpp deleted file mode 100644 index 7285474b089..00000000000 --- a/src/glsl/opt_swizzle_swizzle.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_swizzle_swizzle.cpp - * - * Eliminates the second swizzle in a swizzle chain. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" - -namespace { - -class ir_swizzle_swizzle_visitor : public ir_hierarchical_visitor { -public: - ir_swizzle_swizzle_visitor() - { - progress = false; - } - - virtual ir_visitor_status visit_enter(ir_swizzle *); - - bool progress; -}; - -} /* unnamed namespace */ - -ir_visitor_status -ir_swizzle_swizzle_visitor::visit_enter(ir_swizzle *ir) -{ - int mask2[4]; - - ir_swizzle *swiz2 = ir->val->as_swizzle(); - if (!swiz2) - return visit_continue; - - memset(&mask2, 0, sizeof(mask2)); - if (swiz2->mask.num_components >= 1) - mask2[0] = swiz2->mask.x; - if (swiz2->mask.num_components >= 2) - mask2[1] = swiz2->mask.y; - if (swiz2->mask.num_components >= 3) - mask2[2] = swiz2->mask.z; - if (swiz2->mask.num_components >= 4) - mask2[3] = swiz2->mask.w; - - if (ir->mask.num_components >= 1) - ir->mask.x = mask2[ir->mask.x]; - if (ir->mask.num_components >= 2) - ir->mask.y = mask2[ir->mask.y]; - if (ir->mask.num_components >= 3) - ir->mask.z = mask2[ir->mask.z]; - if (ir->mask.num_components >= 4) - ir->mask.w = mask2[ir->mask.w]; - - ir->val = swiz2->val; - - this->progress = true; - - return visit_continue; -} - -/** - * Does a copy propagation pass on the code present in the instruction stream. - */ -bool -do_swizzle_swizzle(exec_list *instructions) -{ - ir_swizzle_swizzle_visitor v; - - v.run(instructions); - - return v.progress; -} diff --git a/src/glsl/opt_tree_grafting.cpp b/src/glsl/opt_tree_grafting.cpp deleted file mode 100644 index 83effb7424c..00000000000 --- a/src/glsl/opt_tree_grafting.cpp +++ /dev/null @@ -1,404 +0,0 @@ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_tree_grafting.cpp - * - * Takes assignments to variables that are dereferenced only once and - * pastes the RHS expression into where the variable is dereferenced. - * - * In the process of various operations like function inlining and - * tertiary op handling, we'll end up with our expression trees having - * been chopped up into a series of assignments of short expressions - * to temps. Other passes like ir_algebraic.cpp would prefer to see - * the deepest expression trees they can to try to optimize them. - * - * This is a lot like copy propagaton. In comparison, copy - * propagation only acts on plain copies, not arbitrary expressions on - * the RHS. Generally, we wouldn't want to go pasting some - * complicated expression everywhere it got used, though, so we don't - * handle expressions in that pass. - * - * The hard part is making sure we don't move an expression across - * some other assignments that would change the value of the - * expression. So we split this into two passes: First, find the - * variables in our scope which are written to once and read once, and - * then go through basic blocks seeing if we find an opportunity to - * move those expressions safely. - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_variable_refcount.h" -#include "ir_basic_block.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" - -namespace { - -static bool debug = false; - -class ir_tree_grafting_visitor : public ir_hierarchical_visitor { -public: - ir_tree_grafting_visitor(ir_assignment *graft_assign, - ir_variable *graft_var) - { - this->progress = false; - this->graft_assign = graft_assign; - this->graft_var = graft_var; - } - - virtual ir_visitor_status visit_leave(class ir_assignment *); - virtual ir_visitor_status visit_enter(class ir_call *); - virtual ir_visitor_status visit_enter(class ir_expression *); - virtual ir_visitor_status visit_enter(class ir_function *); - virtual ir_visitor_status visit_enter(class ir_function_signature *); - virtual ir_visitor_status visit_enter(class ir_if *); - virtual ir_visitor_status visit_enter(class ir_loop *); - virtual ir_visitor_status visit_enter(class ir_swizzle *); - virtual ir_visitor_status visit_enter(class ir_texture *); - - ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var); - - bool do_graft(ir_rvalue **rvalue); - - bool progress; - ir_variable *graft_var; - ir_assignment *graft_assign; -}; - -struct find_deref_info { - ir_variable *var; - bool found; -}; - -void -dereferences_variable_callback(ir_instruction *ir, void *data) -{ - struct find_deref_info *info = (struct find_deref_info *)data; - ir_dereference_variable *deref = ir->as_dereference_variable(); - - if (deref && deref->var == info->var) - info->found = true; -} - -static bool -dereferences_variable(ir_instruction *ir, ir_variable *var) -{ - struct find_deref_info info; - - info.var = var; - info.found = false; - - visit_tree(ir, dereferences_variable_callback, &info); - - return info.found; -} - -bool -ir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue) -{ - if (!*rvalue) - return false; - - ir_dereference_variable *deref = (*rvalue)->as_dereference_variable(); - - if (!deref || deref->var != this->graft_var) - return false; - - if (debug) { - fprintf(stderr, "GRAFTING:\n"); - this->graft_assign->fprint(stderr); - fprintf(stderr, "\n"); - fprintf(stderr, "TO:\n"); - (*rvalue)->fprint(stderr); - fprintf(stderr, "\n"); - } - - this->graft_assign->remove(); - *rvalue = this->graft_assign->rhs; - - this->progress = true; - return true; -} - -ir_visitor_status -ir_tree_grafting_visitor::visit_enter(ir_loop *ir) -{ - (void)ir; - /* Do not traverse into the body of the loop since that is a - * different basic block. - */ - return visit_stop; -} - -/** - * Check if we can continue grafting after writing to a variable. If the - * expression we're trying to graft references the variable, we must stop. - * - * \param ir An instruction that writes to a variable. - * \param var The variable being updated. - */ -ir_visitor_status -ir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var) -{ - if (dereferences_variable(this->graft_assign->rhs, var)) { - if (debug) { - fprintf(stderr, "graft killed by: "); - ir->fprint(stderr); - fprintf(stderr, "\n"); - } - return visit_stop; - } - - return visit_continue; -} - -ir_visitor_status -ir_tree_grafting_visitor::visit_leave(ir_assignment *ir) -{ - if (do_graft(&ir->rhs) || - do_graft(&ir->condition)) - return visit_stop; - - /* If this assignment updates a variable used in the assignment - * we're trying to graft, then we're done. - */ - return check_graft(ir, ir->lhs->variable_referenced()); -} - -ir_visitor_status -ir_tree_grafting_visitor::visit_enter(ir_function *ir) -{ - (void) ir; - return visit_continue_with_parent; -} - -ir_visitor_status -ir_tree_grafting_visitor::visit_enter(ir_function_signature *ir) -{ - (void)ir; - return visit_continue_with_parent; -} - -ir_visitor_status -ir_tree_grafting_visitor::visit_enter(ir_call *ir) -{ - foreach_two_lists(formal_node, &ir->callee->parameters, - actual_node, &ir->actual_parameters) { - ir_variable *sig_param = (ir_variable *) formal_node; - ir_rvalue *ir = (ir_rvalue *) actual_node; - ir_rvalue *new_ir = ir; - - if (sig_param->data.mode != ir_var_function_in - && sig_param->data.mode != ir_var_const_in) { - if (check_graft(ir, sig_param) == visit_stop) - return visit_stop; - continue; - } - - if (do_graft(&new_ir)) { - ir->replace_with(new_ir); - return visit_stop; - } - } - - if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop) - return visit_stop; - - return visit_continue; -} - -ir_visitor_status -ir_tree_grafting_visitor::visit_enter(ir_expression *ir) -{ - for (unsigned int i = 0; i < ir->get_num_operands(); i++) { - if (do_graft(&ir->operands[i])) - return visit_stop; - } - - return visit_continue; -} - -ir_visitor_status -ir_tree_grafting_visitor::visit_enter(ir_if *ir) -{ - if (do_graft(&ir->condition)) - return visit_stop; - - /* Do not traverse into the body of the if-statement since that is a - * different basic block. - */ - return visit_continue_with_parent; -} - -ir_visitor_status -ir_tree_grafting_visitor::visit_enter(ir_swizzle *ir) -{ - if (do_graft(&ir->val)) - return visit_stop; - - return visit_continue; -} - -ir_visitor_status -ir_tree_grafting_visitor::visit_enter(ir_texture *ir) -{ - if (do_graft(&ir->coordinate) || - do_graft(&ir->projector) || - do_graft(&ir->offset) || - do_graft(&ir->shadow_comparitor)) - return visit_stop; - - switch (ir->op) { - case ir_tex: - case ir_lod: - case ir_query_levels: - case ir_texture_samples: - case ir_samples_identical: - break; - case ir_txb: - if (do_graft(&ir->lod_info.bias)) - return visit_stop; - break; - case ir_txf: - case ir_txl: - case ir_txs: - if (do_graft(&ir->lod_info.lod)) - return visit_stop; - break; - case ir_txf_ms: - if (do_graft(&ir->lod_info.sample_index)) - return visit_stop; - break; - case ir_txd: - if (do_graft(&ir->lod_info.grad.dPdx) || - do_graft(&ir->lod_info.grad.dPdy)) - return visit_stop; - break; - case ir_tg4: - if (do_graft(&ir->lod_info.component)) - return visit_stop; - break; - } - - return visit_continue; -} - -struct tree_grafting_info { - ir_variable_refcount_visitor *refs; - bool progress; -}; - -static bool -try_tree_grafting(ir_assignment *start, - ir_variable *lhs_var, - ir_instruction *bb_last) -{ - ir_tree_grafting_visitor v(start, lhs_var); - - if (debug) { - fprintf(stderr, "trying to graft: "); - lhs_var->fprint(stderr); - fprintf(stderr, "\n"); - } - - for (ir_instruction *ir = (ir_instruction *)start->next; - ir != bb_last->next; - ir = (ir_instruction *)ir->next) { - - if (debug) { - fprintf(stderr, "- "); - ir->fprint(stderr); - fprintf(stderr, "\n"); - } - - ir_visitor_status s = ir->accept(&v); - if (s == visit_stop) - return v.progress; - } - - return false; -} - -static void -tree_grafting_basic_block(ir_instruction *bb_first, - ir_instruction *bb_last, - void *data) -{ - struct tree_grafting_info *info = (struct tree_grafting_info *)data; - ir_instruction *ir, *next; - - for (ir = bb_first, next = (ir_instruction *)ir->next; - ir != bb_last->next; - ir = next, next = (ir_instruction *)ir->next) { - ir_assignment *assign = ir->as_assignment(); - - if (!assign) - continue; - - ir_variable *lhs_var = assign->whole_variable_written(); - if (!lhs_var) - continue; - - if (lhs_var->data.mode == ir_var_function_out || - lhs_var->data.mode == ir_var_function_inout || - lhs_var->data.mode == ir_var_shader_out || - lhs_var->data.mode == ir_var_shader_storage) - continue; - - ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var); - - if (!entry->declaration || - entry->assigned_count != 1 || - entry->referenced_count != 2) - continue; - - /* Found a possibly graftable assignment. Now, walk through the - * rest of the BB seeing if the deref is here, and if nothing interfered with - * pasting its expression's values in between. - */ - info->progress |= try_tree_grafting(assign, lhs_var, bb_last); - } -} - -} /* unnamed namespace */ - -/** - * Does a copy propagation pass on the code present in the instruction stream. - */ -bool -do_tree_grafting(exec_list *instructions) -{ - ir_variable_refcount_visitor refs; - struct tree_grafting_info info; - - info.progress = false; - info.refs = &refs; - - visit_list_elements(info.refs, instructions); - - call_for_basic_blocks(instructions, tree_grafting_basic_block, &info); - - return info.progress; -} diff --git a/src/glsl/opt_vectorize.cpp b/src/glsl/opt_vectorize.cpp deleted file mode 100644 index 88318cd8a6e..00000000000 --- a/src/glsl/opt_vectorize.cpp +++ /dev/null @@ -1,407 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file opt_vectorize.cpp - * - * Combines scalar assignments of the same expression (modulo swizzle) to - * multiple channels of the same variable into a single vectorized expression - * and assignment. - * - * Many generated shaders contain scalarized code. That is, they contain - * - * r1.x = log2(v0.x); - * r1.y = log2(v0.y); - * r1.z = log2(v0.z); - * - * rather than - * - * r1.xyz = log2(v0.xyz); - * - * We look for consecutive assignments of the same expression (modulo swizzle) - * to each channel of the same variable. - * - * For instance, we want to convert these three scalar operations - * - * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0)))) - * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0)))) - * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0)))) - * - * into a single vector operation - * - * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0)))) - */ - -#include "ir.h" -#include "ir_visitor.h" -#include "ir_optimization.h" -#include "compiler/glsl_types.h" -#include "program/prog_instruction.h" - -namespace { - -class ir_vectorize_visitor : public ir_hierarchical_visitor { -public: - void clear() - { - assignment[0] = NULL; - assignment[1] = NULL; - assignment[2] = NULL; - assignment[3] = NULL; - current_assignment = NULL; - last_assignment = NULL; - channels = 0; - has_swizzle = false; - } - - ir_vectorize_visitor() - { - clear(); - progress = false; - } - - virtual ir_visitor_status visit_enter(ir_assignment *); - virtual ir_visitor_status visit_enter(ir_swizzle *); - virtual ir_visitor_status visit_enter(ir_dereference_array *); - virtual ir_visitor_status visit_enter(ir_expression *); - virtual ir_visitor_status visit_enter(ir_if *); - virtual ir_visitor_status visit_enter(ir_loop *); - virtual ir_visitor_status visit_enter(ir_texture *); - - virtual ir_visitor_status visit_leave(ir_assignment *); - - void try_vectorize(); - - ir_assignment *assignment[4]; - ir_assignment *current_assignment, *last_assignment; - unsigned channels; - bool has_swizzle; - - bool progress; -}; - -} /* unnamed namespace */ - -/** - * Rewrites the swizzles and types of a right-hand side of an assignment. - * - * From the example above, this function would be called (by visit_tree()) on - * the nodes of the tree (expression float log2 (swiz z (var_ref v0))), - * rewriting it into (expression vec3 log2 (swiz xyz (var_ref v0))). - * - * The function operates on ir_expressions (and its operands) and ir_swizzles. - * For expressions it sets a new type and swizzles any non-expression and non- - * swizzle scalar operands into appropriately sized vector arguments. For - * example, if combining - * - * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1)))) - * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1)))) - * - * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on - * (var_ref v1) such that the final result was - * - * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0)) - * (swiz xx (var_ref v1)))) - * - * For swizzles, it sets a new type, and if the variable being swizzled is a - * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the - * data parameter. If the swizzled variable is scalar, then the swizzle was - * added by an earlier call to rewrite_swizzle() on an expression, so the - * mask should not be modified. - */ -static void -rewrite_swizzle(ir_instruction *ir, void *data) -{ - ir_swizzle_mask *mask = (ir_swizzle_mask *)data; - - switch (ir->ir_type) { - case ir_type_swizzle: { - ir_swizzle *swz = (ir_swizzle *)ir; - if (swz->val->type->is_vector()) { - swz->mask = *mask; - } - swz->type = glsl_type::get_instance(swz->type->base_type, - mask->num_components, 1); - break; - } - case ir_type_expression: { - ir_expression *expr = (ir_expression *)ir; - expr->type = glsl_type::get_instance(expr->type->base_type, - mask->num_components, 1); - for (unsigned i = 0; i < 4; i++) { - if (expr->operands[i]) { - ir_rvalue *rval = expr->operands[i]->as_rvalue(); - if (rval && rval->type->is_scalar() && - !rval->as_expression() && !rval->as_swizzle()) { - expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0, - mask->num_components); - } - } - } - break; - } - default: - break; - } -} - -/** - * Attempt to vectorize the previously saved assignments, and clear them from - * consideration. - * - * If the assignments are able to be combined, it modifies in-place the last - * assignment seen to be an equivalent vector form of the scalar assignments. - * It then removes the other now obsolete scalar assignments. - */ -void -ir_vectorize_visitor::try_vectorize() -{ - if (this->last_assignment && this->channels > 1) { - ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0}; - - this->last_assignment->write_mask = 0; - - for (unsigned i = 0, j = 0; i < 4; i++) { - if (this->assignment[i]) { - this->last_assignment->write_mask |= 1 << i; - - if (this->assignment[i] != this->last_assignment) { - this->assignment[i]->remove(); - } - - switch (j) { - case 0: mask.x = i; break; - case 1: mask.y = i; break; - case 2: mask.z = i; break; - case 3: mask.w = i; break; - } - - j++; - } - } - - visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask); - - this->progress = true; - } - clear(); -} - -/** - * Returns whether the write mask is a single channel. - */ -static bool -single_channel_write_mask(unsigned write_mask) -{ - return write_mask != 0 && (write_mask & (write_mask - 1)) == 0; -} - -/** - * Translates single-channeled write mask to single-channeled swizzle. - */ -static unsigned -write_mask_to_swizzle(unsigned write_mask) -{ - switch (write_mask) { - case WRITEMASK_X: return SWIZZLE_X; - case WRITEMASK_Y: return SWIZZLE_Y; - case WRITEMASK_Z: return SWIZZLE_Z; - case WRITEMASK_W: return SWIZZLE_W; - } - unreachable("not reached"); -} - -/** - * Returns whether a single-channeled write mask matches a swizzle. - */ -static bool -write_mask_matches_swizzle(unsigned write_mask, - const ir_swizzle *swz) -{ - return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) || - (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) || - (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) || - (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W)); -} - -/** - * Upon entering an ir_assignment, attempt to vectorize the currently tracked - * assignments if the current assignment is not suitable. Keep a pointer to - * the current assignment. - */ -ir_visitor_status -ir_vectorize_visitor::visit_enter(ir_assignment *ir) -{ - ir_dereference *lhs = this->last_assignment != NULL ? - this->last_assignment->lhs : NULL; - ir_rvalue *rhs = this->last_assignment != NULL ? - this->last_assignment->rhs : NULL; - - if (ir->condition || - this->channels >= 4 || - !single_channel_write_mask(ir->write_mask) || - this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL || - (lhs && !ir->lhs->equals(lhs)) || - (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) { - try_vectorize(); - } - - this->current_assignment = ir; - - return visit_continue; -} - -/** - * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an - * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask - * matches the current assignment's write mask. - * - * If the write mask doesn't match the swizzle mask, remove the current - * assignment from further consideration. - */ -ir_visitor_status -ir_vectorize_visitor::visit_enter(ir_swizzle *ir) -{ - if (this->current_assignment) { - if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) { - this->has_swizzle = true; - } else { - this->current_assignment = NULL; - } - } - return visit_continue; -} - -/* Upon entering an ir_array_dereference, remove the current assignment from - * further consideration. Since the index of an array dereference must scalar, - * we are not able to vectorize it. - * - * FINISHME: If all of scalar indices are identical we could vectorize. - */ -ir_visitor_status -ir_vectorize_visitor::visit_enter(ir_dereference_array *) -{ - this->current_assignment = NULL; - return visit_continue_with_parent; -} - -/** - * Upon entering an ir_expression, remove the current assignment from further - * consideration if the expression operates horizontally on vectors. - */ -ir_visitor_status -ir_vectorize_visitor::visit_enter(ir_expression *ir) -{ - if (ir->is_horizontal()) { - this->current_assignment = NULL; - return visit_continue_with_parent; - } - return visit_continue; -} - -/* Since there is no statement to visit between the "then" and "else" - * instructions try to vectorize before, in between, and after them to avoid - * combining statements from different basic blocks. - */ -ir_visitor_status -ir_vectorize_visitor::visit_enter(ir_if *ir) -{ - try_vectorize(); - - visit_list_elements(this, &ir->then_instructions); - try_vectorize(); - - visit_list_elements(this, &ir->else_instructions); - try_vectorize(); - - return visit_continue_with_parent; -} - -/* Since there is no statement to visit between the instructions in the body of - * the loop and the instructions after it try to vectorize before and after the - * body to avoid combining statements from different basic blocks. - */ -ir_visitor_status -ir_vectorize_visitor::visit_enter(ir_loop *ir) -{ - try_vectorize(); - - visit_list_elements(this, &ir->body_instructions); - try_vectorize(); - - return visit_continue_with_parent; -} - -/** - * Upon entering an ir_texture, remove the current assignment from - * further consideration. Vectorizing multiple texture lookups into one - * is wrong. - */ -ir_visitor_status -ir_vectorize_visitor::visit_enter(ir_texture *) -{ - this->current_assignment = NULL; - return visit_continue_with_parent; -} - -/** - * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if - * the swizzle mask(s) found were appropriate. Also save a pointer in - * ::last_assignment so that we can compare future assignments with it. - * - * Finally, clear ::current_assignment and ::has_swizzle. - */ -ir_visitor_status -ir_vectorize_visitor::visit_leave(ir_assignment *ir) -{ - if (this->has_swizzle && this->current_assignment) { - assert(this->current_assignment == ir); - - unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask); - this->assignment[channel] = ir; - this->channels++; - - this->last_assignment = this->current_assignment; - } - this->current_assignment = NULL; - this->has_swizzle = false; - return visit_continue; -} - -/** - * Combines scalar assignments of the same expression (modulo swizzle) to - * multiple channels of the same variable into a single vectorized expression - * and assignment. - */ -bool -do_vectorize(exec_list *instructions) -{ - ir_vectorize_visitor v; - - v.run(instructions); - - /* Try to vectorize the last assignments seen. */ - v.try_vectorize(); - - return v.progress; -} diff --git a/src/glsl/program.h b/src/glsl/program.h deleted file mode 100644 index 64f54635f62..00000000000 --- a/src/glsl/program.h +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. - * Copyright (C) 2009 VMware, Inc. All Rights Reserved. - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - */ - -#include "main/core.h" - -#ifdef __cplusplus -extern "C" { -#endif - -extern void -_mesa_glsl_compile_shader(struct gl_context *ctx, struct gl_shader *shader, - bool dump_ast, bool dump_hir); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -extern void -link_shaders(struct gl_context *ctx, struct gl_shader_program *prog); - -extern void -build_program_resource_list(struct gl_shader_program *shProg); - -extern void -linker_error(struct gl_shader_program *prog, const char *fmt, ...) - PRINTFLIKE(2, 3); - -extern void -linker_warning(struct gl_shader_program *prog, const char *fmt, ...) - PRINTFLIKE(2, 3); - -extern long -parse_program_resource_name(const GLchar *name, - const GLchar **out_base_name_end); diff --git a/src/glsl/s_expression.cpp b/src/glsl/s_expression.cpp deleted file mode 100644 index f82e155a6b6..00000000000 --- a/src/glsl/s_expression.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#include -#include -#include -#include "s_expression.h" - -s_symbol::s_symbol(const char *str, size_t n) -{ - /* Assume the given string is already nul-terminated and in memory that - * will live as long as this node. - */ - assert(str[n] == '\0'); - this->str = str; -} - -s_list::s_list() -{ -} - -static void -skip_whitespace(const char *&src, char *&symbol_buffer) -{ - size_t n = strspn(src, " \v\t\r\n"); - src += n; - symbol_buffer += n; - /* Also skip Scheme-style comments: semi-colon 'til end of line */ - if (src[0] == ';') { - n = strcspn(src, "\n"); - src += n; - symbol_buffer += n; - skip_whitespace(src, symbol_buffer); - } -} - -static s_expression * -read_atom(void *ctx, const char *&src, char *&symbol_buffer) -{ - s_expression *expr = NULL; - - skip_whitespace(src, symbol_buffer); - - size_t n = strcspn(src, "( \v\t\r\n);"); - if (n == 0) - return NULL; // no atom - - // Check for the special symbol '+INF', which means +Infinity. Note: C99 - // requires strtof to parse '+INF' as +Infinity, but we still support some - // non-C99-compliant compilers (e.g. MSVC). - if (n == 4 && strncmp(src, "+INF", 4) == 0) { - expr = new(ctx) s_float(INFINITY); - } else { - // Check if the atom is a number. - char *float_end = NULL; - float f = _mesa_strtof(src, &float_end); - if (float_end != src) { - char *int_end = NULL; - int i = strtol(src, &int_end, 10); - // If strtof matched more characters, it must have a decimal part - if (float_end > int_end) - expr = new(ctx) s_float(f); - else - expr = new(ctx) s_int(i); - } else { - // Not a number; return a symbol. - symbol_buffer[n] = '\0'; - expr = new(ctx) s_symbol(symbol_buffer, n); - } - } - - src += n; - symbol_buffer += n; - - return expr; -} - -static s_expression * -__read_expression(void *ctx, const char *&src, char *&symbol_buffer) -{ - s_expression *atom = read_atom(ctx, src, symbol_buffer); - if (atom != NULL) - return atom; - - skip_whitespace(src, symbol_buffer); - if (src[0] == '(') { - ++src; - ++symbol_buffer; - - s_list *list = new(ctx) s_list; - s_expression *expr; - - while ((expr = __read_expression(ctx, src, symbol_buffer)) != NULL) { - list->subexpressions.push_tail(expr); - } - skip_whitespace(src, symbol_buffer); - if (src[0] != ')') { - printf("Unclosed expression (check your parenthesis).\n"); - return NULL; - } - ++src; - ++symbol_buffer; - return list; - } - return NULL; -} - -s_expression * -s_expression::read_expression(void *ctx, const char *&src) -{ - assert(src != NULL); - - /* When we encounter a Symbol, we need to save a nul-terminated copy of - * the string. However, ralloc_strndup'ing every individual Symbol is - * extremely expensive. We could avoid this by simply overwriting the - * next character (guaranteed to be whitespace, parens, or semicolon) with - * a nul-byte. But overwriting non-whitespace would mess up parsing. - * - * So, just copy the whole buffer ahead of time. Walk both, leaving the - * original source string unmodified, and altering the copy to contain the - * necessary nul-bytes whenever we encounter a symbol. - */ - char *symbol_buffer = ralloc_strdup(ctx, src); - return __read_expression(ctx, src, symbol_buffer); -} - -void s_int::print() -{ - printf("%d", this->val); -} - -void s_float::print() -{ - printf("%f", this->val); -} - -void s_symbol::print() -{ - printf("%s", this->str); -} - -void s_list::print() -{ - printf("("); - foreach_in_list(s_expression, expr, &this->subexpressions) { - expr->print(); - if (!expr->next->is_tail_sentinel()) - printf(" "); - } - printf(")"); -} - -// -------------------------------------------------- - -bool -s_pattern::match(s_expression *expr) -{ - switch (type) - { - case EXPR: *p_expr = expr; break; - case LIST: if (expr->is_list()) *p_list = (s_list *) expr; break; - case SYMBOL: if (expr->is_symbol()) *p_symbol = (s_symbol *) expr; break; - case NUMBER: if (expr->is_number()) *p_number = (s_number *) expr; break; - case INT: if (expr->is_int()) *p_int = (s_int *) expr; break; - case STRING: - s_symbol *sym = SX_AS_SYMBOL(expr); - if (sym != NULL && strcmp(sym->value(), literal) == 0) - return true; - return false; - }; - - return *p_expr == expr; -} - -bool -s_match(s_expression *top, unsigned n, s_pattern *pattern, bool partial) -{ - s_list *list = SX_AS_LIST(top); - if (list == NULL) - return false; - - unsigned i = 0; - foreach_in_list(s_expression, expr, &list->subexpressions) { - if (i >= n) - return partial; /* More actual items than the pattern expected */ - - if (expr == NULL || !pattern[i].match(expr)) - return false; - - i++; - } - - if (i < n) - return false; /* Less actual items than the pattern expected */ - - return true; -} diff --git a/src/glsl/s_expression.h b/src/glsl/s_expression.h deleted file mode 100644 index f0dffb1b2f8..00000000000 --- a/src/glsl/s_expression.h +++ /dev/null @@ -1,180 +0,0 @@ -/* -*- c++ -*- */ -/* - * Copyright © 2010 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef S_EXPRESSION_H -#define S_EXPRESSION_H - -#include "main/core.h" /* for Elements */ -#include "util/strtod.h" -#include "list.h" - -/* Type-safe downcasting macros (also safe to pass NULL) */ -#define SX_AS_(t,x) ((x) && ((s_expression*) x)->is_##t()) ? ((s_##t*) (x)) \ - : NULL -#define SX_AS_LIST(x) SX_AS_(list, x) -#define SX_AS_SYMBOL(x) SX_AS_(symbol, x) -#define SX_AS_NUMBER(x) SX_AS_(number, x) -#define SX_AS_INT(x) SX_AS_(int, x) - -/* Pattern matching macros */ -#define MATCH(list, pat) s_match(list, ARRAY_SIZE(pat), pat, false) -#define PARTIAL_MATCH(list, pat) s_match(list, ARRAY_SIZE(pat), pat, true) - -/* For our purposes, S-Expressions are: - * - - * - - * - symbol - * - (expr1 expr2 ... exprN) where exprN is an S-Expression - * - * Unlike LISP/Scheme, we do not support (foo . bar) pairs. - */ -class s_expression : public exec_node -{ -public: - /** - * Read an S-Expression from the given string. - * Advances the supplied pointer to just after the expression read. - * - * Any allocation will be performed with 'ctx' as the ralloc owner. - */ - static s_expression *read_expression(void *ctx, const char *&src); - - /** - * Print out an S-Expression. Useful for debugging. - */ - virtual void print() = 0; - - virtual bool is_list() const { return false; } - virtual bool is_symbol() const { return false; } - virtual bool is_number() const { return false; } - virtual bool is_int() const { return false; } - -protected: - s_expression() { } -}; - -/* Atoms */ - -class s_number : public s_expression -{ -public: - bool is_number() const { return true; } - - virtual float fvalue() = 0; - -protected: - s_number() { } -}; - -class s_int : public s_number -{ -public: - s_int(int x) : val(x) { } - - bool is_int() const { return true; } - - float fvalue() { return float(this->val); } - int value() { return this->val; } - - void print(); - -private: - int val; -}; - -class s_float : public s_number -{ -public: - s_float(float x) : val(x) { } - - float fvalue() { return this->val; } - - void print(); - -private: - float val; -}; - -class s_symbol : public s_expression -{ -public: - s_symbol(const char *, size_t); - - bool is_symbol() const { return true; } - - const char *value() { return this->str; } - - void print(); - -private: - const char *str; -}; - -/* Lists of expressions: (expr1 ... exprN) */ -class s_list : public s_expression -{ -public: - s_list(); - - virtual bool is_list() const { return true; } - - void print(); - - exec_list subexpressions; -}; - -// ------------------------------------------------------------ - -/** - * Part of a pattern to match - essentially a record holding a pointer to the - * storage for the component to match, along with the appropriate type. - */ -class s_pattern { -public: - s_pattern(s_expression *&s) : p_expr(&s), type(EXPR) { } - s_pattern(s_list *&s) : p_list(&s), type(LIST) { } - s_pattern(s_symbol *&s) : p_symbol(&s), type(SYMBOL) { } - s_pattern(s_number *&s) : p_number(&s), type(NUMBER) { } - s_pattern(s_int *&s) : p_int(&s), type(INT) { } - s_pattern(const char *str) : literal(str), type(STRING) { } - - bool match(s_expression *expr); - -private: - union { - s_expression **p_expr; - s_list **p_list; - s_symbol **p_symbol; - s_number **p_number; - s_int **p_int; - const char *literal; - }; - enum { EXPR, LIST, SYMBOL, NUMBER, INT, STRING } type; -}; - -bool -s_match(s_expression *top, unsigned n, s_pattern *pattern, bool partial); - -#endif /* S_EXPRESSION_H */ diff --git a/src/glsl/standalone_scaffolding.cpp b/src/glsl/standalone_scaffolding.cpp deleted file mode 100644 index d5d214b57cc..00000000000 --- a/src/glsl/standalone_scaffolding.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/* This file declares stripped-down versions of functions that - * normally exist outside of the glsl folder, so that they can be used - * when running the GLSL compiler standalone (for unit testing or - * compiling builtins). - */ - -#include "standalone_scaffolding.h" - -#include -#include -#include -#include "util/ralloc.h" -#include "util/strtod.h" - -void -_mesa_warning(struct gl_context *ctx, const char *fmt, ...) -{ - va_list vargs; - (void) ctx; - - va_start(vargs, fmt); - - /* This output is not thread-safe, but that's good enough for the - * standalone compiler. - */ - fprintf(stderr, "Mesa warning: "); - vfprintf(stderr, fmt, vargs); - fprintf(stderr, "\n"); - - va_end(vargs); -} - -void -_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, - struct gl_shader *sh) -{ - (void) ctx; - *ptr = sh; -} - -void -_mesa_shader_debug(struct gl_context *, GLenum, GLuint *, - const char *) -{ -} - -struct gl_shader * -_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type) -{ - struct gl_shader *shader; - - (void) ctx; - - assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER); - shader = rzalloc(NULL, struct gl_shader); - if (shader) { - shader->Type = type; - shader->Stage = _mesa_shader_enum_to_shader_stage(type); - shader->Name = name; - shader->RefCount = 1; - } - return shader; -} - -void -_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh) -{ - free((void *)sh->Source); - free(sh->Label); - ralloc_free(sh); -} - -void -_mesa_clear_shader_program_data(struct gl_shader_program *shProg) -{ - unsigned i; - - shProg->NumUniformStorage = 0; - shProg->UniformStorage = NULL; - shProg->NumUniformRemapTable = 0; - shProg->UniformRemapTable = NULL; - shProg->UniformHash = NULL; - - ralloc_free(shProg->InfoLog); - shProg->InfoLog = ralloc_strdup(shProg, ""); - - ralloc_free(shProg->BufferInterfaceBlocks); - shProg->BufferInterfaceBlocks = NULL; - shProg->NumBufferInterfaceBlocks = 0; - - ralloc_free(shProg->UniformBlocks); - shProg->UniformBlocks = NULL; - shProg->NumUniformBlocks = 0; - - ralloc_free(shProg->ShaderStorageBlocks); - shProg->ShaderStorageBlocks = NULL; - shProg->NumShaderStorageBlocks = 0; - - for (i = 0; i < MESA_SHADER_STAGES; i++) { - ralloc_free(shProg->InterfaceBlockStageIndex[i]); - shProg->InterfaceBlockStageIndex[i] = NULL; - } - - ralloc_free(shProg->UboInterfaceBlockIndex); - shProg->UboInterfaceBlockIndex = NULL; - ralloc_free(shProg->SsboInterfaceBlockIndex); - shProg->SsboInterfaceBlockIndex = NULL; - - ralloc_free(shProg->AtomicBuffers); - shProg->AtomicBuffers = NULL; - shProg->NumAtomicBuffers = 0; -} - -void initialize_context_to_defaults(struct gl_context *ctx, gl_api api) -{ - memset(ctx, 0, sizeof(*ctx)); - - ctx->API = api; - - ctx->Extensions.dummy_false = false; - ctx->Extensions.dummy_true = true; - ctx->Extensions.ARB_compute_shader = true; - ctx->Extensions.ARB_conservative_depth = true; - ctx->Extensions.ARB_draw_instanced = true; - ctx->Extensions.ARB_ES2_compatibility = true; - ctx->Extensions.ARB_ES3_compatibility = true; - ctx->Extensions.ARB_explicit_attrib_location = true; - ctx->Extensions.ARB_fragment_coord_conventions = true; - ctx->Extensions.ARB_fragment_layer_viewport = true; - ctx->Extensions.ARB_gpu_shader5 = true; - ctx->Extensions.ARB_gpu_shader_fp64 = true; - ctx->Extensions.ARB_sample_shading = true; - ctx->Extensions.ARB_shader_bit_encoding = true; - ctx->Extensions.ARB_shader_draw_parameters = true; - ctx->Extensions.ARB_shader_stencil_export = true; - ctx->Extensions.ARB_shader_subroutine = true; - ctx->Extensions.ARB_shader_texture_lod = true; - ctx->Extensions.ARB_shading_language_420pack = true; - ctx->Extensions.ARB_shading_language_packing = true; - ctx->Extensions.ARB_tessellation_shader = true; - ctx->Extensions.ARB_texture_cube_map_array = true; - ctx->Extensions.ARB_texture_gather = true; - ctx->Extensions.ARB_texture_multisample = true; - ctx->Extensions.ARB_texture_query_levels = true; - ctx->Extensions.ARB_texture_query_lod = true; - ctx->Extensions.ARB_uniform_buffer_object = true; - ctx->Extensions.ARB_viewport_array = true; - - ctx->Extensions.OES_EGL_image_external = true; - ctx->Extensions.OES_standard_derivatives = true; - - ctx->Extensions.EXT_shader_integer_mix = true; - ctx->Extensions.EXT_texture_array = true; - - ctx->Extensions.NV_texture_rectangle = true; - - ctx->Const.GLSLVersion = 120; - - /* 1.20 minimums. */ - ctx->Const.MaxLights = 8; - ctx->Const.MaxClipPlanes = 6; - ctx->Const.MaxTextureUnits = 2; - ctx->Const.MaxTextureCoordUnits = 2; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxAttribs = 16; - - ctx->Const.Program[MESA_SHADER_VERTEX].MaxUniformComponents = 512; - ctx->Const.Program[MESA_SHADER_VERTEX].MaxOutputComponents = 32; - ctx->Const.MaxVarying = 8; /* == gl_MaxVaryingFloats / 4 */ - ctx->Const.Program[MESA_SHADER_VERTEX].MaxTextureImageUnits = 0; - ctx->Const.MaxCombinedTextureImageUnits = 2; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits = 2; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxUniformComponents = 64; - ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxInputComponents = 32; - - ctx->Const.MaxDrawBuffers = 1; - ctx->Const.MaxComputeWorkGroupCount[0] = 65535; - ctx->Const.MaxComputeWorkGroupCount[1] = 65535; - ctx->Const.MaxComputeWorkGroupCount[2] = 65535; - ctx->Const.MaxComputeWorkGroupSize[0] = 1024; - ctx->Const.MaxComputeWorkGroupSize[1] = 1024; - ctx->Const.MaxComputeWorkGroupSize[2] = 64; - ctx->Const.MaxComputeWorkGroupInvocations = 1024; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits = 16; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxUniformComponents = 1024; - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxInputComponents = 0; /* not used */ - ctx->Const.Program[MESA_SHADER_COMPUTE].MaxOutputComponents = 0; /* not used */ - - /* Set up default shader compiler options. */ - struct gl_shader_compiler_options options; - memset(&options, 0, sizeof(options)); - options.MaxUnrollIterations = 32; - options.MaxIfDepth = UINT_MAX; - - for (int sh = 0; sh < MESA_SHADER_STAGES; ++sh) - memcpy(&ctx->Const.ShaderCompilerOptions[sh], &options, sizeof(options)); - - _mesa_locale_init(); -} diff --git a/src/glsl/standalone_scaffolding.h b/src/glsl/standalone_scaffolding.h deleted file mode 100644 index f853a187bf4..00000000000 --- a/src/glsl/standalone_scaffolding.h +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/* This file declares stripped-down versions of functions that - * normally exist outside of the glsl folder, so that they can be used - * when running the GLSL compiler standalone (for unit testing or - * compiling builtins). - */ - -#pragma once -#ifndef STANDALONE_SCAFFOLDING_H -#define STANDALONE_SCAFFOLDING_H - -#include -#include "main/mtypes.h" - -extern "C" void -_mesa_warning(struct gl_context *ctx, const char *fmtString, ... ); - -extern "C" void -_mesa_reference_shader(struct gl_context *ctx, struct gl_shader **ptr, - struct gl_shader *sh); - -extern "C" struct gl_shader * -_mesa_new_shader(struct gl_context *ctx, GLuint name, GLenum type); - -extern "C" void -_mesa_delete_shader(struct gl_context *ctx, struct gl_shader *sh); - -extern "C" void -_mesa_clear_shader_program_data(struct gl_shader_program *); - -extern "C" void -_mesa_shader_debug(struct gl_context *ctx, GLenum type, GLuint *id, - const char *msg); - -static inline gl_shader_stage -_mesa_shader_enum_to_shader_stage(GLenum v) -{ - switch (v) { - case GL_VERTEX_SHADER: - return MESA_SHADER_VERTEX; - case GL_FRAGMENT_SHADER: - return MESA_SHADER_FRAGMENT; - case GL_GEOMETRY_SHADER: - return MESA_SHADER_GEOMETRY; - case GL_TESS_CONTROL_SHADER: - return MESA_SHADER_TESS_CTRL; - case GL_TESS_EVALUATION_SHADER: - return MESA_SHADER_TESS_EVAL; - case GL_COMPUTE_SHADER: - return MESA_SHADER_COMPUTE; - default: - assert(!"bad value in _mesa_shader_enum_to_shader_stage()"); - return MESA_SHADER_VERTEX; - } -} - -/** - * Initialize the given gl_context structure to a reasonable set of - * defaults representing the minimum capabilities required by the - * OpenGL spec. - * - * This is used when compiling builtin functions and in testing, when - * we don't have a connection to an actual driver. - */ -void initialize_context_to_defaults(struct gl_context *ctx, gl_api api); - - -#endif /* STANDALONE_SCAFFOLDING_H */ diff --git a/src/glsl/test.cpp b/src/glsl/test.cpp deleted file mode 100644 index b1ff92ed1d4..00000000000 --- a/src/glsl/test.cpp +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file test.cpp - * - * Standalone tests for the GLSL compiler. - * - * This file provides a standalone executable which can be used to - * test components of the GLSL. - * - * Each test is a function with the same signature as main(). The - * main function interprets its first argument as the name of the test - * to run, strips out that argument, and then calls the test function. - */ - -#include -#include -#include - -#include "test_optpass.h" - -/** - * Print proper usage and exit with failure. - */ -static void -usage_fail(const char *name) -{ - printf("*** usage: %s \n", name); - printf("\n"); - printf("Possible commands are:\n"); - printf(" optpass: test an optimization pass in isolation\n"); - exit(EXIT_FAILURE); -} - -static const char *extract_command_from_argv(int *argc, char **argv) -{ - if (*argc < 2) { - usage_fail(argv[0]); - } - const char *command = argv[1]; - --*argc; - memmove(&argv[1], &argv[2], (*argc) * sizeof(argv[1])); - return command; -} - -int main(int argc, char **argv) -{ - const char *command = extract_command_from_argv(&argc, argv); - if (strcmp(command, "optpass") == 0) { - return test_optpass(argc, argv); - } else { - usage_fail(argv[0]); - } - - /* Execution should never reach here. */ - return EXIT_FAILURE; -} diff --git a/src/glsl/test_optpass.cpp b/src/glsl/test_optpass.cpp deleted file mode 100644 index fed1fabf301..00000000000 --- a/src/glsl/test_optpass.cpp +++ /dev/null @@ -1,276 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -/** - * \file test_optpass.cpp - * - * Standalone test for optimization passes. - * - * This file provides the "optpass" command for the standalone - * glsl_test app. It accepts either GLSL or high-level IR as input, - * and performs the optimiation passes specified on the command line. - * It outputs the IR, both before and after optimiations. - */ - -#include -#include -#include -#include - -#include "ast.h" -#include "ir_optimization.h" -#include "program.h" -#include "ir_reader.h" -#include "standalone_scaffolding.h" - -using namespace std; - -static string read_stdin_to_eof() -{ - stringbuf sb; - cin.get(sb, '\0'); - return sb.str(); -} - -static GLboolean -do_optimization(struct exec_list *ir, const char *optimization, - const struct gl_shader_compiler_options *options) -{ - int int_0; - int int_1; - int int_2; - int int_3; - int int_4; - - if (sscanf(optimization, "do_common_optimization ( %d ) ", &int_0) == 1) { - return do_common_optimization(ir, int_0 != 0, false, options, true); - } else if (strcmp(optimization, "do_algebraic") == 0) { - return do_algebraic(ir, true, options); - } else if (strcmp(optimization, "do_constant_folding") == 0) { - return do_constant_folding(ir); - } else if (strcmp(optimization, "do_constant_variable") == 0) { - return do_constant_variable(ir); - } else if (strcmp(optimization, "do_constant_variable_unlinked") == 0) { - return do_constant_variable_unlinked(ir); - } else if (strcmp(optimization, "do_copy_propagation") == 0) { - return do_copy_propagation(ir); - } else if (strcmp(optimization, "do_copy_propagation_elements") == 0) { - return do_copy_propagation_elements(ir); - } else if (strcmp(optimization, "do_constant_propagation") == 0) { - return do_constant_propagation(ir); - } else if (strcmp(optimization, "do_dead_code") == 0) { - return do_dead_code(ir, false); - } else if (strcmp(optimization, "do_dead_code_local") == 0) { - return do_dead_code_local(ir); - } else if (strcmp(optimization, "do_dead_code_unlinked") == 0) { - return do_dead_code_unlinked(ir); - } else if (strcmp(optimization, "do_dead_functions") == 0) { - return do_dead_functions(ir); - } else if (strcmp(optimization, "do_function_inlining") == 0) { - return do_function_inlining(ir); - } else if (sscanf(optimization, - "do_lower_jumps ( %d , %d , %d , %d , %d ) ", - &int_0, &int_1, &int_2, &int_3, &int_4) == 5) { - return do_lower_jumps(ir, int_0 != 0, int_1 != 0, int_2 != 0, - int_3 != 0, int_4 != 0); - } else if (strcmp(optimization, "do_lower_texture_projection") == 0) { - return do_lower_texture_projection(ir); - } else if (strcmp(optimization, "do_if_simplification") == 0) { - return do_if_simplification(ir); - } else if (sscanf(optimization, "lower_if_to_cond_assign ( %d ) ", - &int_0) == 1) { - return lower_if_to_cond_assign(ir, int_0); - } else if (strcmp(optimization, "do_mat_op_to_vec") == 0) { - return do_mat_op_to_vec(ir); - } else if (strcmp(optimization, "do_noop_swizzle") == 0) { - return do_noop_swizzle(ir); - } else if (strcmp(optimization, "do_structure_splitting") == 0) { - return do_structure_splitting(ir); - } else if (strcmp(optimization, "do_swizzle_swizzle") == 0) { - return do_swizzle_swizzle(ir); - } else if (strcmp(optimization, "do_tree_grafting") == 0) { - return do_tree_grafting(ir); - } else if (strcmp(optimization, "do_vec_index_to_cond_assign") == 0) { - return do_vec_index_to_cond_assign(ir); - } else if (strcmp(optimization, "do_vec_index_to_swizzle") == 0) { - return do_vec_index_to_swizzle(ir); - } else if (strcmp(optimization, "lower_discard") == 0) { - return lower_discard(ir); - } else if (sscanf(optimization, "lower_instructions ( %d ) ", - &int_0) == 1) { - return lower_instructions(ir, int_0); - } else if (strcmp(optimization, "lower_noise") == 0) { - return lower_noise(ir); - } else if (sscanf(optimization, "lower_variable_index_to_cond_assign " - "( %d , %d , %d , %d ) ", &int_0, &int_1, &int_2, - &int_3) == 4) { - return lower_variable_index_to_cond_assign(MESA_SHADER_VERTEX, ir, - int_0 != 0, int_1 != 0, - int_2 != 0, int_3 != 0); - } else if (sscanf(optimization, "lower_quadop_vector ( %d ) ", - &int_0) == 1) { - return lower_quadop_vector(ir, int_0 != 0); - } else if (strcmp(optimization, "optimize_redundant_jumps") == 0) { - return optimize_redundant_jumps(ir); - } else { - printf("Unrecognized optimization %s\n", optimization); - exit(EXIT_FAILURE); - return false; - } -} - -static GLboolean -do_optimization_passes(struct exec_list *ir, char **optimizations, - int num_optimizations, bool quiet, - const struct gl_shader_compiler_options *options) -{ - GLboolean overall_progress = false; - - for (int i = 0; i < num_optimizations; ++i) { - const char *optimization = optimizations[i]; - if (!quiet) { - printf("*** Running optimization %s...", optimization); - } - GLboolean progress = do_optimization(ir, optimization, options); - if (!quiet) { - printf("%s\n", progress ? "progress" : "no progress"); - } - validate_ir_tree(ir); - - overall_progress = overall_progress || progress; - } - - return overall_progress; -} - -int test_optpass(int argc, char **argv) -{ - int input_format_ir = 0; /* 0=glsl, 1=ir */ - int loop = 0; - int shader_type = GL_VERTEX_SHADER; - int quiet = 0; - - const struct option optpass_opts[] = { - { "input-ir", no_argument, &input_format_ir, 1 }, - { "input-glsl", no_argument, &input_format_ir, 0 }, - { "loop", no_argument, &loop, 1 }, - { "vertex-shader", no_argument, &shader_type, GL_VERTEX_SHADER }, - { "fragment-shader", no_argument, &shader_type, GL_FRAGMENT_SHADER }, - { "quiet", no_argument, &quiet, 1 }, - { NULL, 0, NULL, 0 } - }; - - int idx = 0; - int c; - while ((c = getopt_long(argc, argv, "", optpass_opts, &idx)) != -1) { - if (c != 0) { - printf("*** usage: %s optpass \n", argv[0]); - printf("\n"); - printf("Possible options are:\n"); - printf(" --input-ir: input format is IR\n"); - printf(" --input-glsl: input format is GLSL (the default)\n"); - printf(" --loop: run optimizations repeatedly until no progress\n"); - printf(" --vertex-shader: test with a vertex shader (the default)\n"); - printf(" --fragment-shader: test with a fragment shader\n"); - exit(EXIT_FAILURE); - } - } - - struct gl_context local_ctx; - struct gl_context *ctx = &local_ctx; - initialize_context_to_defaults(ctx, API_OPENGL_COMPAT); - - ctx->Driver.NewShader = _mesa_new_shader; - ir_variable::temporaries_allocate_names = true; - - struct gl_shader *shader = rzalloc(NULL, struct gl_shader); - shader->Type = shader_type; - shader->Stage = _mesa_shader_enum_to_shader_stage(shader_type); - - string input = read_stdin_to_eof(); - - struct _mesa_glsl_parse_state *state - = new(shader) _mesa_glsl_parse_state(ctx, shader->Stage, shader); - - if (input_format_ir) { - shader->ir = new(shader) exec_list; - _mesa_glsl_initialize_types(state); - _mesa_glsl_read_ir(state, shader->ir, input.c_str(), true); - } else { - shader->Source = input.c_str(); - const char *source = shader->Source; - state->error = glcpp_preprocess(state, &source, &state->info_log, - state->extensions, ctx) != 0; - - if (!state->error) { - _mesa_glsl_lexer_ctor(state, source); - _mesa_glsl_parse(state); - _mesa_glsl_lexer_dtor(state); - } - - shader->ir = new(shader) exec_list; - if (!state->error && !state->translation_unit.is_empty()) - _mesa_ast_to_hir(shader->ir, state); - } - - /* Print out the initial IR */ - if (!state->error && !quiet) { - printf("*** pre-optimization IR:\n"); - _mesa_print_ir(stdout, shader->ir, state); - printf("\n--\n"); - } - - /* Optimization passes */ - if (!state->error) { - GLboolean progress; - const struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader_type)]; - do { - progress = do_optimization_passes(shader->ir, &argv[optind], - argc - optind, quiet != 0, options); - } while (loop && progress); - } - - /* Print out the resulting IR */ - if (!state->error) { - if (!quiet) { - printf("*** resulting IR:\n"); - } - _mesa_print_ir(stdout, shader->ir, state); - if (!quiet) { - printf("\n--\n"); - } - } - - if (state->error) { - printf("*** error(s) occurred:\n"); - printf("%s\n", state->info_log); - printf("--\n"); - } - - ralloc_free(state); - ralloc_free(shader); - - return state->error; -} - diff --git a/src/glsl/test_optpass.h b/src/glsl/test_optpass.h deleted file mode 100644 index 923ccf3dece..00000000000 --- a/src/glsl/test_optpass.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright © 2011 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once -#ifndef TEST_OPTPASS_H -#define TEST_OPTPASS_H - -int test_optpass(int argc, char **argv); - -#endif /* TEST_OPTPASS_H */ diff --git a/src/glsl/tests/.gitignore b/src/glsl/tests/.gitignore deleted file mode 100644 index 13dcdc4ab73..00000000000 --- a/src/glsl/tests/.gitignore +++ /dev/null @@ -1,5 +0,0 @@ -blob-test -ralloc-test -uniform-initializer-test -sampler-types-test -general-ir-test diff --git a/src/glsl/tests/blob_test.c b/src/glsl/tests/blob_test.c deleted file mode 100644 index 4806029bca6..00000000000 --- a/src/glsl/tests/blob_test.c +++ /dev/null @@ -1,320 +0,0 @@ -/* - * Copyright © 2014 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/* A collection of unit tests for blob.c */ - -#include -#include -#include -#include - -#include "util/ralloc.h" -#include "blob.h" - -#define bytes_test_str "bytes_test" -#define reserve_test_str "reserve_test" - -/* This placeholder must be the same length as the next overwrite_test_str */ -#define placeholder_str "XXXXXXXXXXXXXX" -#define overwrite_test_str "overwrite_test" -#define uint32_test 0x12345678 -#define uint32_placeholder 0xDEADBEEF -#define uint32_overwrite 0xA1B2C3D4 -#define uint64_test 0x1234567890ABCDEF -#define string_test_str "string_test" - -bool error = false; - -static void -expect_equal(uint64_t expected, uint64_t actual, const char *test) -{ - if (actual != expected) { - fprintf (stderr, "Error: Test '%s' failed: Expected=%ld, Actual=%ld\n", - test, expected, actual); - error = true; - } -} - -static void -expect_unequal(uint64_t expected, uint64_t actual, const char *test) -{ - if (actual == expected) { - fprintf (stderr, "Error: Test '%s' failed: Result=%ld, but expected something different.\n", - test, actual); - error = true; - } -} - -static void -expect_equal_str(const char *expected, const char *actual, const char *test) -{ - if (strcmp(expected, actual)) { - fprintf (stderr, "Error: Test '%s' failed:\n\t" - "Expected=\"%s\", Actual=\"%s\"\n", - test, expected, actual); - error = true; - } -} - -static void -expect_equal_bytes(uint8_t *expected, uint8_t *actual, - size_t num_bytes, const char *test) -{ - size_t i; - - if (memcmp(expected, actual, num_bytes)) { - fprintf (stderr, "Error: Test '%s' failed:\n\t", test); - - fprintf (stderr, "Expected=["); - for (i = 0; i < num_bytes; i++) { - if (i != 0) - fprintf(stderr, ", "); - fprintf(stderr, "0x%02x", expected[i]); - } - fprintf (stderr, "]"); - - fprintf (stderr, "Actual=["); - for (i = 0; i < num_bytes; i++) { - if (i != 0) - fprintf(stderr, ", "); - fprintf(stderr, "0x%02x", actual[i]); - } - fprintf (stderr, "]\n"); - - error = true; - } -} - -/* Test at least one call of each blob_write_foo and blob_read_foo function, - * verifying that we read out everything we wrote, that every bytes is - * consumed, and that the overrun bit is not set. - */ -static void -test_write_and_read_functions (void) -{ - void *ctx = ralloc_context(NULL); - struct blob *blob; - struct blob_reader reader; - uint8_t *reserved; - size_t str_offset, uint_offset; - uint8_t reserve_buf[sizeof(reserve_test_str)]; - - blob = blob_create(ctx); - - /*** Test blob by writing one of every possible kind of value. */ - - blob_write_bytes(blob, bytes_test_str, sizeof(bytes_test_str)); - - reserved = blob_reserve_bytes(blob, sizeof(reserve_test_str)); - memcpy(reserved, reserve_test_str, sizeof(reserve_test_str)); - - /* Write a placeholder, (to be replaced later via overwrite_bytes) */ - str_offset = blob->size; - blob_write_bytes(blob, placeholder_str, sizeof(placeholder_str)); - - blob_write_uint32(blob, uint32_test); - - /* Write a placeholder, (to be replaced later via overwrite_uint32) */ - uint_offset = blob->size; - blob_write_uint32(blob, uint32_placeholder); - - blob_write_uint64(blob, uint64_test); - - blob_write_intptr(blob, (intptr_t) blob); - - blob_write_string(blob, string_test_str); - - /* Finally, overwrite our placeholders. */ - blob_overwrite_bytes(blob, str_offset, overwrite_test_str, - sizeof(overwrite_test_str)); - blob_overwrite_uint32(blob, uint_offset, uint32_overwrite); - - /*** Now read each value and verify. */ - blob_reader_init(&reader, blob->data, blob->size); - - expect_equal_str(bytes_test_str, - blob_read_bytes(&reader, sizeof(bytes_test_str)), - "blob_write/read_bytes"); - - blob_copy_bytes(&reader, reserve_buf, sizeof(reserve_buf)); - expect_equal_str(reserve_test_str, (char *) reserve_buf, - "blob_reserve_bytes/blob_copy_bytes"); - - expect_equal_str(overwrite_test_str, - blob_read_bytes(&reader, sizeof(overwrite_test_str)), - "blob_overwrite_bytes"); - - expect_equal(uint32_test, blob_read_uint32(&reader), - "blob_write/read_uint32"); - expect_equal(uint32_overwrite, blob_read_uint32(&reader), - "blob_overwrite_uint32"); - expect_equal(uint64_test, blob_read_uint64(&reader), - "blob_write/read_uint64"); - expect_equal((intptr_t) blob, blob_read_intptr(&reader), - "blob_write/read_intptr"); - expect_equal_str(string_test_str, blob_read_string(&reader), - "blob_write/read_string"); - - expect_equal(reader.end - reader.data, reader.current - reader.data, - "read_consumes_all_bytes"); - expect_equal(false, reader.overrun, "read_does_not_overrun"); - - ralloc_free(ctx); -} - -/* Test that data values are written and read with proper alignment. */ -static void -test_alignment(void) -{ - void *ctx = ralloc_context(NULL); - struct blob *blob; - struct blob_reader reader; - uint8_t bytes[] = "ABCDEFGHIJKLMNOP"; - size_t delta, last, num_bytes; - - blob = blob_create(ctx); - - /* First, write an intptr value to the blob and capture that size. This is - * the expected offset between any pair of intptr values (if written with - * alignment). - */ - blob_write_intptr(blob, (intptr_t) blob); - - delta = blob->size; - last = blob->size; - - /* Then loop doing the following: - * - * 1. Write an unaligned number of bytes - * 2. Verify that write results in an unaligned size - * 3. Write an intptr_t value - * 2. Verify that that write results in an aligned size - */ - for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) { - blob_write_bytes(blob, bytes, num_bytes); - - expect_unequal(delta, blob->size - last, "unaligned write of bytes"); - - blob_write_intptr(blob, (intptr_t) blob); - - expect_equal(2 * delta, blob->size - last, "aligned write of intptr"); - - last = blob->size; - } - - /* Finally, test that reading also does proper alignment. Since we know - * that values were written with all the right alignment, all we have to do - * here is verify that correct values are read. - */ - blob_reader_init(&reader, blob->data, blob->size); - - expect_equal((intptr_t) blob, blob_read_intptr(&reader), - "read of initial, aligned intptr_t"); - - for (num_bytes = 1; num_bytes < sizeof(intptr_t); num_bytes++) { - expect_equal_bytes(bytes, blob_read_bytes(&reader, num_bytes), - num_bytes, "unaligned read of bytes"); - expect_equal((intptr_t) blob, blob_read_intptr(&reader), - "aligned read of intptr_t"); - } - - ralloc_free(ctx); -} - -/* Test that we detect overrun. */ -static void -test_overrun(void) -{ - void *ctx =ralloc_context(NULL); - struct blob *blob; - struct blob_reader reader; - uint32_t value = 0xdeadbeef; - - blob = blob_create(ctx); - - blob_write_uint32(blob, value); - - blob_reader_init(&reader, blob->data, blob->size); - - expect_equal(value, blob_read_uint32(&reader), "read before overrun"); - expect_equal(false, reader.overrun, "overrun flag not set"); - expect_equal(0, blob_read_uint32(&reader), "read at overrun"); - expect_equal(true, reader.overrun, "overrun flag set"); - - ralloc_free(ctx); -} - -/* Test that we can read and write some large objects, (exercising the code in - * the blob_write functions to realloc blob->data. - */ -static void -test_big_objects(void) -{ - void *ctx = ralloc_context(NULL); - struct blob *blob; - struct blob_reader reader; - int size = 1000; - int count = 1000; - size_t i; - char *buf; - - blob = blob_create(ctx); - - /* Initialize our buffer. */ - buf = ralloc_size(ctx, size); - for (i = 0; i < size; i++) { - buf[i] = i % 256; - } - - /* Write it many times. */ - for (i = 0; i < count; i++) { - blob_write_bytes(blob, buf, size); - } - - blob_reader_init(&reader, blob->data, blob->size); - - /* Read and verify it many times. */ - for (i = 0; i < count; i++) { - expect_equal_bytes((uint8_t *) buf, blob_read_bytes(&reader, size), size, - "read of large objects"); - } - - expect_equal(reader.end - reader.data, reader.current - reader.data, - "number of bytes read reading large objects"); - - expect_equal(false, reader.overrun, - "overrun flag not set reading large objects"); - - ralloc_free(ctx); -} - -int -main (void) -{ - test_write_and_read_functions (); - test_alignment (); - test_overrun (); - test_big_objects (); - - return error ? 1 : 0; -} diff --git a/src/glsl/tests/builtin_variable_test.cpp b/src/glsl/tests/builtin_variable_test.cpp deleted file mode 100644 index 11e384a0722..00000000000 --- a/src/glsl/tests/builtin_variable_test.cpp +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "standalone_scaffolding.h" -#include "main/compiler.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "ir.h" -#include "glsl_parser_extras.h" -#include "glsl_symbol_table.h" - -class common_builtin : public ::testing::Test { -public: - common_builtin(GLenum shader_type) - : shader_type(shader_type) - { - /* empty */ - } - - virtual void SetUp(); - virtual void TearDown(); - - void string_starts_with_prefix(const char *str, const char *prefix); - void names_start_with_gl(); - void uniforms_and_system_values_dont_have_explicit_location(); - void constants_are_constant(); - void no_invalid_variable_modes(); - - GLenum shader_type; - struct _mesa_glsl_parse_state *state; - struct gl_shader *shader; - void *mem_ctx; - gl_context ctx; - exec_list ir; -}; - -void -common_builtin::SetUp() -{ - this->mem_ctx = ralloc_context(NULL); - this->ir.make_empty(); - - initialize_context_to_defaults(&this->ctx, API_OPENGL_COMPAT); - - this->shader = rzalloc(this->mem_ctx, gl_shader); - this->shader->Type = this->shader_type; - this->shader->Stage = _mesa_shader_enum_to_shader_stage(this->shader_type); - - this->state = - new(mem_ctx) _mesa_glsl_parse_state(&this->ctx, this->shader->Stage, - this->shader); - - _mesa_glsl_initialize_types(this->state); - _mesa_glsl_initialize_variables(&this->ir, this->state); -} - -void -common_builtin::TearDown() -{ - ralloc_free(this->mem_ctx); - this->mem_ctx = NULL; -} - -void -common_builtin::string_starts_with_prefix(const char *str, const char *prefix) -{ - const size_t len = strlen(prefix); - char *const name_prefix = new char[len + 1]; - - strncpy(name_prefix, str, len); - name_prefix[len] = '\0'; - EXPECT_STREQ(prefix, name_prefix) << "Bad name " << str; - - delete [] name_prefix; -} - -void -common_builtin::names_start_with_gl() -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - string_starts_with_prefix(var->name, "gl_"); - } -} - -void -common_builtin::uniforms_and_system_values_dont_have_explicit_location() -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - if (var->data.mode != ir_var_uniform && var->data.mode != ir_var_system_value) - continue; - - EXPECT_FALSE(var->data.explicit_location); - EXPECT_EQ(-1, var->data.location); - } -} - -void -common_builtin::constants_are_constant() -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - if (var->data.mode != ir_var_auto) - continue; - - EXPECT_FALSE(var->data.explicit_location); - EXPECT_EQ(-1, var->data.location); - EXPECT_TRUE(var->data.read_only); - } -} - -void -common_builtin::no_invalid_variable_modes() -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - switch (var->data.mode) { - case ir_var_auto: - case ir_var_uniform: - case ir_var_shader_in: - case ir_var_shader_out: - case ir_var_system_value: - break; - - default: - ADD_FAILURE() << "Built-in variable " << var->name - << " has an invalid mode " << int(var->data.mode); - break; - } - } -} - -/************************************************************/ - -class vertex_builtin : public common_builtin { -public: - vertex_builtin() - : common_builtin(GL_VERTEX_SHADER) - { - /* empty */ - } -}; - -TEST_F(vertex_builtin, names_start_with_gl) -{ - common_builtin::names_start_with_gl(); -} - -TEST_F(vertex_builtin, inputs_have_explicit_location) -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - if (var->data.mode != ir_var_shader_in) - continue; - - EXPECT_TRUE(var->data.explicit_location); - EXPECT_NE(-1, var->data.location); - EXPECT_GT(VERT_ATTRIB_GENERIC0, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - } -} - -TEST_F(vertex_builtin, outputs_have_explicit_location) -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - if (var->data.mode != ir_var_shader_out) - continue; - - EXPECT_TRUE(var->data.explicit_location); - EXPECT_NE(-1, var->data.location); - EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - - /* Several varyings only exist in the fragment shader. Be sure that no - * outputs with these locations exist. - */ - EXPECT_NE(VARYING_SLOT_PNTC, var->data.location); - EXPECT_NE(VARYING_SLOT_FACE, var->data.location); - EXPECT_NE(VARYING_SLOT_PRIMITIVE_ID, var->data.location); - } -} - -TEST_F(vertex_builtin, uniforms_and_system_values_dont_have_explicit_location) -{ - common_builtin::uniforms_and_system_values_dont_have_explicit_location(); -} - -TEST_F(vertex_builtin, constants_are_constant) -{ - common_builtin::constants_are_constant(); -} - -TEST_F(vertex_builtin, no_invalid_variable_modes) -{ - common_builtin::no_invalid_variable_modes(); -} - -/********************************************************************/ - -class fragment_builtin : public common_builtin { -public: - fragment_builtin() - : common_builtin(GL_FRAGMENT_SHADER) - { - /* empty */ - } -}; - -TEST_F(fragment_builtin, names_start_with_gl) -{ - common_builtin::names_start_with_gl(); -} - -TEST_F(fragment_builtin, inputs_have_explicit_location) -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - if (var->data.mode != ir_var_shader_in) - continue; - - EXPECT_TRUE(var->data.explicit_location); - EXPECT_NE(-1, var->data.location); - EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - - /* Several varyings only exist in the vertex / geometry shader. Be sure - * that no inputs with these locations exist. - */ - EXPECT_TRUE(_mesa_varying_slot_in_fs((gl_varying_slot) var->data.location)); - } -} - -TEST_F(fragment_builtin, outputs_have_explicit_location) -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - if (var->data.mode != ir_var_shader_out) - continue; - - EXPECT_TRUE(var->data.explicit_location); - EXPECT_NE(-1, var->data.location); - - /* gl_FragData[] has location FRAG_RESULT_DATA0. Locations beyond that - * are invalid. - */ - EXPECT_GE(FRAG_RESULT_DATA0, var->data.location); - - EXPECT_EQ(0u, var->data.location_frac); - } -} - -TEST_F(fragment_builtin, uniforms_and_system_values_dont_have_explicit_location) -{ - common_builtin::uniforms_and_system_values_dont_have_explicit_location(); -} - -TEST_F(fragment_builtin, constants_are_constant) -{ - common_builtin::constants_are_constant(); -} - -TEST_F(fragment_builtin, no_invalid_variable_modes) -{ - common_builtin::no_invalid_variable_modes(); -} - -/********************************************************************/ - -class geometry_builtin : public common_builtin { -public: - geometry_builtin() - : common_builtin(GL_GEOMETRY_SHADER) - { - /* empty */ - } -}; - -TEST_F(geometry_builtin, names_start_with_gl) -{ - common_builtin::names_start_with_gl(); -} - -TEST_F(geometry_builtin, inputs_have_explicit_location) -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - if (var->data.mode != ir_var_shader_in) - continue; - - if (var->is_interface_instance()) { - EXPECT_STREQ("gl_in", var->name); - EXPECT_FALSE(var->data.explicit_location); - EXPECT_EQ(-1, var->data.location); - - ASSERT_TRUE(var->type->is_array()); - - const glsl_type *const instance_type = var->type->fields.array; - - for (unsigned i = 0; i < instance_type->length; i++) { - const glsl_struct_field *const input = - &instance_type->fields.structure[i]; - - string_starts_with_prefix(input->name, "gl_"); - EXPECT_NE(-1, input->location); - EXPECT_GT(VARYING_SLOT_VAR0, input->location); - - /* Several varyings only exist in the fragment shader. Be sure - * that no inputs with these locations exist. - */ - EXPECT_NE(VARYING_SLOT_PNTC, input->location); - EXPECT_NE(VARYING_SLOT_FACE, input->location); - } - } else { - EXPECT_TRUE(var->data.explicit_location); - EXPECT_NE(-1, var->data.location); - EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - } - - /* Several varyings only exist in the fragment shader. Be sure that no - * inputs with these locations exist. - */ - EXPECT_NE(VARYING_SLOT_PNTC, var->data.location); - EXPECT_NE(VARYING_SLOT_FACE, var->data.location); - } -} - -TEST_F(geometry_builtin, outputs_have_explicit_location) -{ - foreach_in_list(ir_instruction, node, &this->ir) { - ir_variable *const var = node->as_variable(); - - if (var->data.mode != ir_var_shader_out) - continue; - - EXPECT_TRUE(var->data.explicit_location); - EXPECT_NE(-1, var->data.location); - EXPECT_GT(VARYING_SLOT_VAR0, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - - /* Several varyings only exist in the fragment shader. Be sure that no - * outputs with these locations exist. - */ - EXPECT_NE(VARYING_SLOT_PNTC, var->data.location); - EXPECT_NE(VARYING_SLOT_FACE, var->data.location); - } -} - -TEST_F(geometry_builtin, uniforms_and_system_values_dont_have_explicit_location) -{ - common_builtin::uniforms_and_system_values_dont_have_explicit_location(); -} - -TEST_F(geometry_builtin, constants_are_constant) -{ - common_builtin::constants_are_constant(); -} - -TEST_F(geometry_builtin, no_invalid_variable_modes) -{ - common_builtin::no_invalid_variable_modes(); -} diff --git a/src/glsl/tests/compare_ir b/src/glsl/tests/compare_ir deleted file mode 100755 index a40fc810cf3..00000000000 --- a/src/glsl/tests/compare_ir +++ /dev/null @@ -1,59 +0,0 @@ -#!/usr/bin/env python -# coding=utf-8 -# -# Copyright © 2011 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# Compare two files containing IR code. Ignore formatting differences -# and declaration order. - -import os -import os.path -import subprocess -import sys -import tempfile - -from sexps import * - -if len(sys.argv) != 3: - print 'Usage: compare_ir ' - exit(1) - -with open(sys.argv[1]) as f: - ir1 = sort_decls(parse_sexp(f.read())) -with open(sys.argv[2]) as f: - ir2 = sort_decls(parse_sexp(f.read())) - -if ir1 == ir2: - exit(0) -else: - file1, path1 = tempfile.mkstemp(os.path.basename(sys.argv[1])) - file2, path2 = tempfile.mkstemp(os.path.basename(sys.argv[2])) - try: - os.write(file1, '{0}\n'.format(sexp_to_string(ir1))) - os.close(file1) - os.write(file2, '{0}\n'.format(sexp_to_string(ir2))) - os.close(file2) - subprocess.call(['diff', '-u', path1, path2]) - finally: - os.remove(path1) - os.remove(path2) - exit(1) diff --git a/src/glsl/tests/copy_constant_to_storage_tests.cpp b/src/glsl/tests/copy_constant_to_storage_tests.cpp deleted file mode 100644 index cd48bc523c1..00000000000 --- a/src/glsl/tests/copy_constant_to_storage_tests.cpp +++ /dev/null @@ -1,300 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "main/compiler.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "util/ralloc.h" -#include "uniform_initializer_utils.h" - -namespace linker { -extern void -copy_constant_to_storage(union gl_constant_value *storage, - const ir_constant *val, - const enum glsl_base_type base_type, - const unsigned int elements, - unsigned int boolean_true); -} - -class copy_constant_to_storage : public ::testing::Test { -public: - void int_test(unsigned rows); - void uint_test(unsigned rows); - void bool_test(unsigned rows); - void sampler_test(); - void float_test(unsigned columns, unsigned rows); - - virtual void SetUp(); - virtual void TearDown(); - - gl_constant_value storage[17]; - void *mem_ctx; -}; - -void -copy_constant_to_storage::SetUp() -{ - this->mem_ctx = ralloc_context(NULL); -} - -void -copy_constant_to_storage::TearDown() -{ - ralloc_free(this->mem_ctx); - this->mem_ctx = NULL; -} - -void -copy_constant_to_storage::int_test(unsigned rows) -{ - ir_constant *val; - generate_data(mem_ctx, GLSL_TYPE_INT, 1, rows, val); - - const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); - fill_storage_array_with_sentinels(storage, - val->type->components(), - red_zone_size); - - linker::copy_constant_to_storage(storage, - val, - val->type->base_type, - val->type->components(), - 0xF00F); - - verify_data(storage, 0, val, red_zone_size, 0xF00F); -} - -void -copy_constant_to_storage::uint_test(unsigned rows) -{ - ir_constant *val; - generate_data(mem_ctx, GLSL_TYPE_UINT, 1, rows, val); - - const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); - fill_storage_array_with_sentinels(storage, - val->type->components(), - red_zone_size); - - linker::copy_constant_to_storage(storage, - val, - val->type->base_type, - val->type->components(), - 0xF00F); - - verify_data(storage, 0, val, red_zone_size, 0xF00F); -} - -void -copy_constant_to_storage::float_test(unsigned columns, unsigned rows) -{ - ir_constant *val; - generate_data(mem_ctx, GLSL_TYPE_FLOAT, columns, rows, val); - - const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); - fill_storage_array_with_sentinels(storage, - val->type->components(), - red_zone_size); - - linker::copy_constant_to_storage(storage, - val, - val->type->base_type, - val->type->components(), - 0xF00F); - - verify_data(storage, 0, val, red_zone_size, 0xF00F); -} - -void -copy_constant_to_storage::bool_test(unsigned rows) -{ - ir_constant *val; - generate_data(mem_ctx, GLSL_TYPE_BOOL, 1, rows, val); - - const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); - fill_storage_array_with_sentinels(storage, - val->type->components(), - red_zone_size); - - linker::copy_constant_to_storage(storage, - val, - val->type->base_type, - val->type->components(), - 0xF00F); - - verify_data(storage, 0, val, red_zone_size, 0xF00F); -} - -/** - * The only difference between this test and int_test is that the base type - * passed to \c linker::copy_constant_to_storage is hard-coded to \c - * GLSL_TYPE_SAMPLER instead of using the base type from the constant. - */ -void -copy_constant_to_storage::sampler_test(void) -{ - ir_constant *val; - generate_data(mem_ctx, GLSL_TYPE_INT, 1, 1, val); - - const unsigned red_zone_size = ARRAY_SIZE(storage) - val->type->components(); - fill_storage_array_with_sentinels(storage, - val->type->components(), - red_zone_size); - - linker::copy_constant_to_storage(storage, - val, - GLSL_TYPE_SAMPLER, - val->type->components(), - 0xF00F); - - verify_data(storage, 0, val, red_zone_size, 0xF00F); -} - -TEST_F(copy_constant_to_storage, bool_uniform) -{ - bool_test(1); -} - -TEST_F(copy_constant_to_storage, bvec2_uniform) -{ - bool_test(2); -} - -TEST_F(copy_constant_to_storage, bvec3_uniform) -{ - bool_test(3); -} - -TEST_F(copy_constant_to_storage, bvec4_uniform) -{ - bool_test(4); -} - -TEST_F(copy_constant_to_storage, int_uniform) -{ - int_test(1); -} - -TEST_F(copy_constant_to_storage, ivec2_uniform) -{ - int_test(2); -} - -TEST_F(copy_constant_to_storage, ivec3_uniform) -{ - int_test(3); -} - -TEST_F(copy_constant_to_storage, ivec4_uniform) -{ - int_test(4); -} - -TEST_F(copy_constant_to_storage, uint_uniform) -{ - uint_test(1); -} - -TEST_F(copy_constant_to_storage, uvec2_uniform) -{ - uint_test(2); -} - -TEST_F(copy_constant_to_storage, uvec3_uniform) -{ - uint_test(3); -} - -TEST_F(copy_constant_to_storage, uvec4_uniform) -{ - uint_test(4); -} - -TEST_F(copy_constant_to_storage, float_uniform) -{ - float_test(1, 1); -} - -TEST_F(copy_constant_to_storage, vec2_uniform) -{ - float_test(1, 2); -} - -TEST_F(copy_constant_to_storage, vec3_uniform) -{ - float_test(1, 3); -} - -TEST_F(copy_constant_to_storage, vec4_uniform) -{ - float_test(1, 4); -} - -TEST_F(copy_constant_to_storage, mat2x2_uniform) -{ - float_test(2, 2); -} - -TEST_F(copy_constant_to_storage, mat2x3_uniform) -{ - float_test(2, 3); -} - -TEST_F(copy_constant_to_storage, mat2x4_uniform) -{ - float_test(2, 4); -} - -TEST_F(copy_constant_to_storage, mat3x2_uniform) -{ - float_test(3, 2); -} - -TEST_F(copy_constant_to_storage, mat3x3_uniform) -{ - float_test(3, 3); -} - -TEST_F(copy_constant_to_storage, mat3x4_uniform) -{ - float_test(3, 4); -} - -TEST_F(copy_constant_to_storage, mat4x2_uniform) -{ - float_test(4, 2); -} - -TEST_F(copy_constant_to_storage, mat4x3_uniform) -{ - float_test(4, 3); -} - -TEST_F(copy_constant_to_storage, mat4x4_uniform) -{ - float_test(4, 4); -} - -TEST_F(copy_constant_to_storage, sampler_uniform) -{ - sampler_test(); -} diff --git a/src/glsl/tests/general_ir_test.cpp b/src/glsl/tests/general_ir_test.cpp deleted file mode 100644 index 217305bf847..00000000000 --- a/src/glsl/tests/general_ir_test.cpp +++ /dev/null @@ -1,80 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "main/compiler.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "ir.h" - -TEST(ir_variable_constructor, interface) -{ - void *mem_ctx = ralloc_context(NULL); - - static const glsl_struct_field f[] = { - glsl_struct_field(glsl_type::vec(4), "v") - }; - - const glsl_type *const interface = - glsl_type::get_interface_instance(f, - ARRAY_SIZE(f), - GLSL_INTERFACE_PACKING_STD140, - "simple_interface"); - - static const char name[] = "named_instance"; - - ir_variable *const v = - new(mem_ctx) ir_variable(interface, name, ir_var_uniform); - - EXPECT_STREQ(name, v->name); - EXPECT_NE(name, v->name); - EXPECT_EQ(interface, v->type); - EXPECT_EQ(interface, v->get_interface_type()); -} - -TEST(ir_variable_constructor, interface_array) -{ - void *mem_ctx = ralloc_context(NULL); - - static const glsl_struct_field f[] = { - glsl_struct_field(glsl_type::vec(4), "v") - }; - - const glsl_type *const interface = - glsl_type::get_interface_instance(f, - ARRAY_SIZE(f), - GLSL_INTERFACE_PACKING_STD140, - "simple_interface"); - - const glsl_type *const interface_array = - glsl_type::get_array_instance(interface, 2); - - static const char name[] = "array_instance"; - - ir_variable *const v = - new(mem_ctx) ir_variable(interface_array, name, ir_var_uniform); - - EXPECT_STREQ(name, v->name); - EXPECT_NE(name, v->name); - EXPECT_EQ(interface_array, v->type); - EXPECT_EQ(interface, v->get_interface_type()); -} diff --git a/src/glsl/tests/invalidate_locations_test.cpp b/src/glsl/tests/invalidate_locations_test.cpp deleted file mode 100644 index ba94d7e3a21..00000000000 --- a/src/glsl/tests/invalidate_locations_test.cpp +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "main/compiler.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "util/ralloc.h" -#include "ir.h" -#include "linker.h" - -/** - * \file varyings_test.cpp - * - * Test various aspects of linking shader stage inputs and outputs. - */ - -class invalidate_locations : public ::testing::Test { -public: - virtual void SetUp(); - virtual void TearDown(); - - void *mem_ctx; - exec_list ir; -}; - -void -invalidate_locations::SetUp() -{ - this->mem_ctx = ralloc_context(NULL); - this->ir.make_empty(); -} - -void -invalidate_locations::TearDown() -{ - ralloc_free(this->mem_ctx); - this->mem_ctx = NULL; -} - -TEST_F(invalidate_locations, simple_vertex_in_generic) -{ - ir_variable *const var = - new(mem_ctx) ir_variable(glsl_type::vec(4), - "a", - ir_var_shader_in); - - EXPECT_FALSE(var->data.explicit_location); - EXPECT_EQ(-1, var->data.location); - - var->data.location = VERT_ATTRIB_GENERIC0; - var->data.location_frac = 2; - - ir.push_tail(var); - - link_invalidate_variable_locations(&ir); - - EXPECT_EQ(-1, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - EXPECT_FALSE(var->data.explicit_location); - EXPECT_TRUE(var->data.is_unmatched_generic_inout); -} - -TEST_F(invalidate_locations, explicit_location_vertex_in_generic) -{ - ir_variable *const var = - new(mem_ctx) ir_variable(glsl_type::vec(4), - "a", - ir_var_shader_in); - - EXPECT_FALSE(var->data.explicit_location); - EXPECT_EQ(-1, var->data.location); - - var->data.location = VERT_ATTRIB_GENERIC0; - var->data.explicit_location = true; - - ir.push_tail(var); - - link_invalidate_variable_locations(&ir); - - EXPECT_EQ(VERT_ATTRIB_GENERIC0, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - EXPECT_TRUE(var->data.explicit_location); - EXPECT_FALSE(var->data.is_unmatched_generic_inout); -} - -TEST_F(invalidate_locations, explicit_location_frac_vertex_in_generic) -{ - ir_variable *const var = - new(mem_ctx) ir_variable(glsl_type::vec(4), - "a", - ir_var_shader_in); - - EXPECT_FALSE(var->data.explicit_location); - EXPECT_EQ(-1, var->data.location); - - var->data.location = VERT_ATTRIB_GENERIC0; - var->data.location_frac = 2; - var->data.explicit_location = true; - - ir.push_tail(var); - - link_invalidate_variable_locations(&ir); - - EXPECT_EQ(VERT_ATTRIB_GENERIC0, var->data.location); - EXPECT_EQ(2u, var->data.location_frac); - EXPECT_TRUE(var->data.explicit_location); - EXPECT_FALSE(var->data.is_unmatched_generic_inout); -} - -TEST_F(invalidate_locations, vertex_in_builtin) -{ - ir_variable *const var = - new(mem_ctx) ir_variable(glsl_type::vec(4), - "gl_Vertex", - ir_var_shader_in); - - EXPECT_FALSE(var->data.explicit_location); - EXPECT_EQ(-1, var->data.location); - - var->data.location = VERT_ATTRIB_POS; - var->data.explicit_location = true; - - ir.push_tail(var); - - link_invalidate_variable_locations(&ir); - - EXPECT_EQ(VERT_ATTRIB_POS, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - EXPECT_TRUE(var->data.explicit_location); - EXPECT_FALSE(var->data.is_unmatched_generic_inout); -} - -TEST_F(invalidate_locations, simple_vertex_out_generic) -{ - ir_variable *const var = - new(mem_ctx) ir_variable(glsl_type::vec(4), - "a", - ir_var_shader_out); - - EXPECT_FALSE(var->data.explicit_location); - EXPECT_EQ(-1, var->data.location); - - var->data.location = VARYING_SLOT_VAR0; - - ir.push_tail(var); - - link_invalidate_variable_locations(&ir); - - EXPECT_EQ(-1, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - EXPECT_FALSE(var->data.explicit_location); - EXPECT_TRUE(var->data.is_unmatched_generic_inout); -} - -TEST_F(invalidate_locations, vertex_out_builtin) -{ - ir_variable *const var = - new(mem_ctx) ir_variable(glsl_type::vec(4), - "gl_FrontColor", - ir_var_shader_out); - - EXPECT_FALSE(var->data.explicit_location); - EXPECT_EQ(-1, var->data.location); - - var->data.location = VARYING_SLOT_COL0; - var->data.explicit_location = true; - - ir.push_tail(var); - - link_invalidate_variable_locations(&ir); - - EXPECT_EQ(VARYING_SLOT_COL0, var->data.location); - EXPECT_EQ(0u, var->data.location_frac); - EXPECT_TRUE(var->data.explicit_location); - EXPECT_FALSE(var->data.is_unmatched_generic_inout); -} diff --git a/src/glsl/tests/lower_jumps/.gitignore b/src/glsl/tests/lower_jumps/.gitignore deleted file mode 100644 index e98df627fd8..00000000000 --- a/src/glsl/tests/lower_jumps/.gitignore +++ /dev/null @@ -1,3 +0,0 @@ -*.opt_test -*.expected -*.out diff --git a/src/glsl/tests/lower_jumps/create_test_cases.py b/src/glsl/tests/lower_jumps/create_test_cases.py deleted file mode 100644 index 3be1079bc14..00000000000 --- a/src/glsl/tests/lower_jumps/create_test_cases.py +++ /dev/null @@ -1,643 +0,0 @@ -# coding=utf-8 -# -# Copyright © 2011 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -import os -import os.path -import re -import subprocess -import sys - -sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..')) # For access to sexps.py, which is in parent dir -from sexps import * - -def make_test_case(f_name, ret_type, body): - """Create a simple optimization test case consisting of a single - function with the given name, return type, and body. - - Global declarations are automatically created for any undeclared - variables that are referenced by the function. All undeclared - variables are assumed to be floats. - """ - check_sexp(body) - declarations = {} - def make_declarations(sexp, already_declared = ()): - if isinstance(sexp, list): - if len(sexp) == 2 and sexp[0] == 'var_ref': - if sexp[1] not in already_declared: - declarations[sexp[1]] = [ - 'declare', ['in'], 'float', sexp[1]] - elif len(sexp) == 4 and sexp[0] == 'assign': - assert sexp[2][0] == 'var_ref' - if sexp[2][1] not in already_declared: - declarations[sexp[2][1]] = [ - 'declare', ['out'], 'float', sexp[2][1]] - make_declarations(sexp[3], already_declared) - else: - already_declared = set(already_declared) - for s in sexp: - if isinstance(s, list) and len(s) >= 4 and \ - s[0] == 'declare': - already_declared.add(s[3]) - else: - make_declarations(s, already_declared) - make_declarations(body) - return declarations.values() + \ - [['function', f_name, ['signature', ret_type, ['parameters'], body]]] - - -# The following functions can be used to build expressions. - -def const_float(value): - """Create an expression representing the given floating point value.""" - return ['constant', 'float', ['{0:.6f}'.format(value)]] - -def const_bool(value): - """Create an expression representing the given boolean value. - - If value is not a boolean, it is converted to a boolean. So, for - instance, const_bool(1) is equivalent to const_bool(True). - """ - return ['constant', 'bool', ['{0}'.format(1 if value else 0)]] - -def gt_zero(var_name): - """Create Construct the expression var_name > 0""" - return ['expression', 'bool', '>', ['var_ref', var_name], const_float(0)] - - -# The following functions can be used to build complex control flow -# statements. All of these functions return statement lists (even -# those which only create a single statement), so that statements can -# be sequenced together using the '+' operator. - -def return_(value = None): - """Create a return statement.""" - if value is not None: - return [['return', value]] - else: - return [['return']] - -def break_(): - """Create a break statement.""" - return ['break'] - -def continue_(): - """Create a continue statement.""" - return ['continue'] - -def simple_if(var_name, then_statements, else_statements = None): - """Create a statement of the form - - if (var_name > 0.0) { - - } else { - - } - - else_statements may be omitted. - """ - if else_statements is None: - else_statements = [] - check_sexp(then_statements) - check_sexp(else_statements) - return [['if', gt_zero(var_name), then_statements, else_statements]] - -def loop(statements): - """Create a loop containing the given statements as its loop - body. - """ - check_sexp(statements) - return [['loop', statements]] - -def declare_temp(var_type, var_name): - """Create a declaration of the form - - (declare (temporary) to the variable - . The assignment uses the mask (x). - """ - check_sexp(value) - return [['assign', ['x'], ['var_ref', var_name], value]] - -def complex_if(var_prefix, statements): - """Create a statement of the form - - if (a > 0.0) { - if (b > 0.0) { - - } - } - - This is useful in testing jump lowering, because if - ends in a jump, lower_jumps.cpp won't try to combine this - construct with the code that follows it, as it might do for a - simple if. - - All variables used in the if statement are prefixed with - var_prefix. This can be used to ensure uniqueness. - """ - check_sexp(statements) - return simple_if(var_prefix + 'a', simple_if(var_prefix + 'b', statements)) - -def declare_execute_flag(): - """Create the statements that lower_jumps.cpp uses to declare and - initialize the temporary boolean execute_flag. - """ - return declare_temp('bool', 'execute_flag') + \ - assign_x('execute_flag', const_bool(True)) - -def declare_return_flag(): - """Create the statements that lower_jumps.cpp uses to declare and - initialize the temporary boolean return_flag. - """ - return declare_temp('bool', 'return_flag') + \ - assign_x('return_flag', const_bool(False)) - -def declare_return_value(): - """Create the statements that lower_jumps.cpp uses to declare and - initialize the temporary variable return_value. Assume that - return_value is a float. - """ - return declare_temp('float', 'return_value') - -def declare_break_flag(): - """Create the statements that lower_jumps.cpp uses to declare and - initialize the temporary boolean break_flag. - """ - return declare_temp('bool', 'break_flag') + \ - assign_x('break_flag', const_bool(False)) - -def lowered_return_simple(value = None): - """Create the statements that lower_jumps.cpp lowers a return - statement to, in situations where it does not need to clear the - execute flag. - """ - if value: - result = assign_x('return_value', value) - else: - result = [] - return result + assign_x('return_flag', const_bool(True)) - -def lowered_return(value = None): - """Create the statements that lower_jumps.cpp lowers a return - statement to, in situations where it needs to clear the execute - flag. - """ - return lowered_return_simple(value) + \ - assign_x('execute_flag', const_bool(False)) - -def lowered_continue(): - """Create the statement that lower_jumps.cpp lowers a continue - statement to. - """ - return assign_x('execute_flag', const_bool(False)) - -def lowered_break_simple(): - """Create the statement that lower_jumps.cpp lowers a break - statement to, in situations where it does not need to clear the - execute flag. - """ - return assign_x('break_flag', const_bool(True)) - -def lowered_break(): - """Create the statement that lower_jumps.cpp lowers a break - statement to, in situations where it needs to clear the execute - flag. - """ - return lowered_break_simple() + assign_x('execute_flag', const_bool(False)) - -def if_execute_flag(statements): - """Wrap statements in an if test so that they will only execute if - execute_flag is True. - """ - check_sexp(statements) - return [['if', ['var_ref', 'execute_flag'], statements, []]] - -def if_not_return_flag(statements): - """Wrap statements in an if test so that they will only execute if - return_flag is False. - """ - check_sexp(statements) - return [['if', ['var_ref', 'return_flag'], [], statements]] - -def final_return(): - """Create the return statement that lower_jumps.cpp places at the - end of a function when lowering returns. - """ - return [['return', ['var_ref', 'return_value']]] - -def final_break(): - """Create the conditional break statement that lower_jumps.cpp - places at the end of a function when lowering breaks. - """ - return [['if', ['var_ref', 'break_flag'], break_(), []]] - -def bash_quote(*args): - """Quote the arguments appropriately so that bash will understand - each argument as a single word. - """ - def quote_word(word): - for c in word: - if not (c.isalpha() or c.isdigit() or c in '@%_-+=:,./'): - break - else: - if not word: - return "''" - return word - return "'{0}'".format(word.replace("'", "'\"'\"'")) - return ' '.join(quote_word(word) for word in args) - -def create_test_case(doc_string, input_sexp, expected_sexp, test_name, - pull_out_jumps=False, lower_sub_return=False, - lower_main_return=False, lower_continue=False, - lower_break=False): - """Create a test case that verifies that do_lower_jumps transforms - the given code in the expected way. - """ - doc_lines = [line.strip() for line in doc_string.splitlines()] - doc_string = ''.join('# {0}\n'.format(line) for line in doc_lines if line != '') - check_sexp(input_sexp) - check_sexp(expected_sexp) - input_str = sexp_to_string(sort_decls(input_sexp)) - expected_output = sexp_to_string(sort_decls(expected_sexp)) - - optimization = ( - 'do_lower_jumps({0:d}, {1:d}, {2:d}, {3:d}, {4:d})'.format( - pull_out_jumps, lower_sub_return, lower_main_return, - lower_continue, lower_break)) - args = ['../../glsl_test', 'optpass', '--quiet', '--input-ir', optimization] - test_file = '{0}.opt_test'.format(test_name) - with open(test_file, 'w') as f: - f.write('#!/usr/bin/env bash\n#\n# This file was generated by create_test_cases.py.\n#\n') - f.write(doc_string) - f.write('{0} < "$test.out" 2>&1 - total=$((total+1)) - if $PYTHON2 $PYTHON_FLAGS $compare_ir "$test.expected" "$test.out" >/dev/null 2>&1; then - echo "PASS" - pass=$((pass+1)) - else - echo "FAIL" - $PYTHON2 $PYTHON_FLAGS $compare_ir "$test.expected" "$test.out" - fi -done - -echo "" -echo "$pass/$total tests returned correct results" -echo "" - -if [[ $pass == $total ]]; then - exit 0 -else - exit 1 -fi diff --git a/src/glsl/tests/sampler_types_test.cpp b/src/glsl/tests/sampler_types_test.cpp deleted file mode 100644 index 04dd65e6e8d..00000000000 --- a/src/glsl/tests/sampler_types_test.cpp +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "main/compiler.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "ir.h" - -/** - * \file sampler_types_test.cpp - * - * Test that built-in sampler types have the right properties. - */ - -#define ARRAY EXPECT_TRUE(type->sampler_array); -#define NONARRAY EXPECT_FALSE(type->sampler_array); -#define SHADOW EXPECT_TRUE(type->sampler_shadow); -#define COLOR EXPECT_FALSE(type->sampler_shadow); - -#define T(TYPE, DIM, DATA_TYPE, ARR, SHAD, COMPS) \ -TEST(sampler_types, TYPE) \ -{ \ - const glsl_type *type = glsl_type::TYPE##_type; \ - EXPECT_EQ(GLSL_TYPE_SAMPLER, type->base_type); \ - EXPECT_EQ(DIM, type->sampler_dimensionality); \ - EXPECT_EQ(DATA_TYPE, type->sampler_type); \ - ARR; \ - SHAD; \ - EXPECT_EQ(COMPS, type->coordinate_components()); \ -} - -T( sampler1D, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 1) -T( sampler2D, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) -T( sampler3D, GLSL_SAMPLER_DIM_3D, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 3) -T( samplerCube, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 3) -T( sampler1DArray, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, ARRAY, COLOR, 2) -T( sampler2DArray, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, ARRAY, COLOR, 3) -T( samplerCubeArray, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, ARRAY, COLOR, 4) -T( sampler2DRect, GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) -T( samplerBuffer, GLSL_SAMPLER_DIM_BUF, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 1) -T( sampler2DMS, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) -T( sampler2DMSArray, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_FLOAT, ARRAY, COLOR, 3) -T(isampler1D, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_INT, NONARRAY, COLOR, 1) -T(isampler2D, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_INT, NONARRAY, COLOR, 2) -T(isampler3D, GLSL_SAMPLER_DIM_3D, GLSL_TYPE_INT, NONARRAY, COLOR, 3) -T(isamplerCube, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_INT, NONARRAY, COLOR, 3) -T(isampler1DArray, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_INT, ARRAY, COLOR, 2) -T(isampler2DArray, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_INT, ARRAY, COLOR, 3) -T(isamplerCubeArray, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_INT, ARRAY, COLOR, 4) -T(isampler2DRect, GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_INT, NONARRAY, COLOR, 2) -T(isamplerBuffer, GLSL_SAMPLER_DIM_BUF, GLSL_TYPE_INT, NONARRAY, COLOR, 1) -T(isampler2DMS, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_INT, NONARRAY, COLOR, 2) -T(isampler2DMSArray, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_INT, ARRAY, COLOR, 3) -T(usampler1D, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_UINT, NONARRAY, COLOR, 1) -T(usampler2D, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_UINT, NONARRAY, COLOR, 2) -T(usampler3D, GLSL_SAMPLER_DIM_3D, GLSL_TYPE_UINT, NONARRAY, COLOR, 3) -T(usamplerCube, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_UINT, NONARRAY, COLOR, 3) -T(usampler1DArray, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_UINT, ARRAY, COLOR, 2) -T(usampler2DArray, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_UINT, ARRAY, COLOR, 3) -T(usamplerCubeArray, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_UINT, ARRAY, COLOR, 4) -T(usampler2DRect, GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_UINT, NONARRAY, COLOR, 2) -T(usamplerBuffer, GLSL_SAMPLER_DIM_BUF, GLSL_TYPE_UINT, NONARRAY, COLOR, 1) -T(usampler2DMS, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_UINT, NONARRAY, COLOR, 2) -T(usampler2DMSArray, GLSL_SAMPLER_DIM_MS, GLSL_TYPE_UINT, ARRAY, COLOR, 3) - -T(sampler1DShadow, GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 1) -T(sampler2DShadow, GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 2) -T(samplerCubeShadow, GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 3) - -T(sampler1DArrayShadow, - GLSL_SAMPLER_DIM_1D, GLSL_TYPE_FLOAT, ARRAY, SHADOW, 2) -T(sampler2DArrayShadow, - GLSL_SAMPLER_DIM_2D, GLSL_TYPE_FLOAT, ARRAY, SHADOW, 3) -T(samplerCubeArrayShadow, - GLSL_SAMPLER_DIM_CUBE, GLSL_TYPE_FLOAT, ARRAY, SHADOW, 4) -T(sampler2DRectShadow, - GLSL_SAMPLER_DIM_RECT, GLSL_TYPE_FLOAT, NONARRAY, SHADOW, 2) - -T(samplerExternalOES, - GLSL_SAMPLER_DIM_EXTERNAL, GLSL_TYPE_FLOAT, NONARRAY, COLOR, 2) diff --git a/src/glsl/tests/set_uniform_initializer_tests.cpp b/src/glsl/tests/set_uniform_initializer_tests.cpp deleted file mode 100644 index 0b1f66cb342..00000000000 --- a/src/glsl/tests/set_uniform_initializer_tests.cpp +++ /dev/null @@ -1,594 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "main/compiler.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "util/ralloc.h" -#include "uniform_initializer_utils.h" - -namespace linker { -extern void -set_uniform_initializer(void *mem_ctx, gl_shader_program *prog, - const char *name, const glsl_type *type, - ir_constant *val, unsigned int boolean_true); -} - -class set_uniform_initializer : public ::testing::Test { -public: - virtual void SetUp(); - virtual void TearDown(); - - /** - * Index of the uniform to be tested. - * - * All of the \c set_uniform_initializer tests create several slots for - * unifroms. All but one of the slots is fake. This field holds the index - * of the slot for the uniform being tested. - */ - unsigned actual_index; - - /** - * Name of the uniform to be tested. - */ - const char *name; - - /** - * Shader program used in the test. - */ - struct gl_shader_program *prog; - - /** - * Ralloc memory context used for all temporary allocations. - */ - void *mem_ctx; -}; - -void -set_uniform_initializer::SetUp() -{ - this->mem_ctx = ralloc_context(NULL); - this->prog = rzalloc(NULL, struct gl_shader_program); - - /* Set default values used by the test cases. - */ - this->actual_index = 1; - this->name = "i"; -} - -void -set_uniform_initializer::TearDown() -{ - ralloc_free(this->mem_ctx); - this->mem_ctx = NULL; - - ralloc_free(this->prog); - this->prog = NULL; -} - -/** - * Create some uniform storage for a program. - * - * \param prog Program to get some storage - * \param num_storage Total number of storage slots - * \param index_to_set Storage slot that will actually get a value - * \param name Name for the actual storage slot - * \param type Type for the elements of the actual storage slot - * \param array_size Size for the array of the actual storage slot. This - * should be zero for non-arrays. - */ -static unsigned -establish_uniform_storage(struct gl_shader_program *prog, unsigned num_storage, - unsigned index_to_set, const char *name, - const glsl_type *type, unsigned array_size) -{ - const unsigned elements = MAX2(1, array_size); - const unsigned data_components = elements * type->components(); - const unsigned total_components = MAX2(17, (data_components - + type->components())); - const unsigned red_zone_components = total_components - data_components; - - prog->UniformStorage = rzalloc_array(prog, struct gl_uniform_storage, - num_storage); - prog->NumUniformStorage = num_storage; - - prog->UniformStorage[index_to_set].name = (char *) name; - prog->UniformStorage[index_to_set].type = type; - prog->UniformStorage[index_to_set].array_elements = array_size; - prog->UniformStorage[index_to_set].initialized = false; - for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { - prog->UniformStorage[index_to_set].opaque[sh].index = ~0; - prog->UniformStorage[index_to_set].opaque[sh].active = false; - } - prog->UniformStorage[index_to_set].num_driver_storage = 0; - prog->UniformStorage[index_to_set].driver_storage = NULL; - prog->UniformStorage[index_to_set].storage = - rzalloc_array(prog, union gl_constant_value, total_components); - - fill_storage_array_with_sentinels(prog->UniformStorage[index_to_set].storage, - data_components, - red_zone_components); - - for (unsigned i = 0; i < num_storage; i++) { - if (i == index_to_set) - continue; - - prog->UniformStorage[i].name = (char *) "invalid slot"; - prog->UniformStorage[i].type = glsl_type::void_type; - prog->UniformStorage[i].array_elements = 0; - prog->UniformStorage[i].initialized = false; - for (int sh = 0; sh < MESA_SHADER_STAGES; sh++) { - prog->UniformStorage[i].opaque[sh].index = ~0; - prog->UniformStorage[i].opaque[sh].active = false; - } - prog->UniformStorage[i].num_driver_storage = 0; - prog->UniformStorage[i].driver_storage = NULL; - prog->UniformStorage[i].storage = NULL; - } - - return red_zone_components; -} - -/** - * Verify that the correct uniform is marked as having been initialized. - */ -static void -verify_initialization(struct gl_shader_program *prog, unsigned actual_index) -{ - for (unsigned i = 0; i < prog->NumUniformStorage; i++) { - if (i == actual_index) { - EXPECT_TRUE(prog->UniformStorage[actual_index].initialized); - } else { - EXPECT_FALSE(prog->UniformStorage[i].initialized); - } - } -} - -static void -non_array_test(void *mem_ctx, struct gl_shader_program *prog, - unsigned actual_index, const char *name, - enum glsl_base_type base_type, - unsigned columns, unsigned rows) -{ - const glsl_type *const type = - glsl_type::get_instance(base_type, rows, columns); - - unsigned red_zone_components = - establish_uniform_storage(prog, 3, actual_index, name, type, 0); - - ir_constant *val; - generate_data(mem_ctx, base_type, columns, rows, val); - - linker::set_uniform_initializer(mem_ctx, prog, name, type, val, 0xF00F); - - verify_initialization(prog, actual_index); - verify_data(prog->UniformStorage[actual_index].storage, 0, val, - red_zone_components, 0xF00F); -} - -TEST_F(set_uniform_initializer, int_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 1); -} - -TEST_F(set_uniform_initializer, ivec2_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 2); -} - -TEST_F(set_uniform_initializer, ivec3_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 3); -} - -TEST_F(set_uniform_initializer, ivec4_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 4); -} - -TEST_F(set_uniform_initializer, uint_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 1); -} - -TEST_F(set_uniform_initializer, uvec2_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 2); -} - -TEST_F(set_uniform_initializer, uvec3_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 3); -} - -TEST_F(set_uniform_initializer, uvec4_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 4); -} - -TEST_F(set_uniform_initializer, bool_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 1); -} - -TEST_F(set_uniform_initializer, bvec2_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 2); -} - -TEST_F(set_uniform_initializer, bvec3_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 3); -} - -TEST_F(set_uniform_initializer, bvec4_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 4); -} - -TEST_F(set_uniform_initializer, float_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2); -} - -TEST_F(set_uniform_initializer, vec2_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2); -} - -TEST_F(set_uniform_initializer, vec3_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 3); -} - -TEST_F(set_uniform_initializer, vec4_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 4); -} - -TEST_F(set_uniform_initializer, mat2x2_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 2); -} - -TEST_F(set_uniform_initializer, mat2x3_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 3); -} - -TEST_F(set_uniform_initializer, mat2x4_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 4); -} - -TEST_F(set_uniform_initializer, mat3x2_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 2); -} - -TEST_F(set_uniform_initializer, mat3x3_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 3); -} - -TEST_F(set_uniform_initializer, mat3x4_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 4); -} - -TEST_F(set_uniform_initializer, mat4x2_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 2); -} - -TEST_F(set_uniform_initializer, mat4x3_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 3); -} - -TEST_F(set_uniform_initializer, mat4x4_uniform) -{ - non_array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 4); -} - -static void -array_test(void *mem_ctx, struct gl_shader_program *prog, - unsigned actual_index, const char *name, - enum glsl_base_type base_type, - unsigned columns, unsigned rows, unsigned array_size, - unsigned excess_data_size) -{ - const glsl_type *const element_type = - glsl_type::get_instance(base_type, rows, columns); - - const unsigned red_zone_components = - establish_uniform_storage(prog, 3, actual_index, name, element_type, - array_size); - - /* The constant value generated may have more array elements than the - * uniform that it initializes. In the real compiler and linker this can - * happen when a uniform array is compacted because some of the tail - * elements are not used. In this case, the type of the uniform will be - * modified, but the initializer will not. - */ - ir_constant *val; - generate_array_data(mem_ctx, base_type, columns, rows, - array_size + excess_data_size, val); - - linker::set_uniform_initializer(mem_ctx, prog, name, element_type, val, - 0xF00F); - - verify_initialization(prog, actual_index); - verify_data(prog->UniformStorage[actual_index].storage, array_size, - val, red_zone_components, 0xF00F); -} - -TEST_F(set_uniform_initializer, int_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 1, 4, 0); -} - -TEST_F(set_uniform_initializer, ivec2_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 2, 4, 0); -} - -TEST_F(set_uniform_initializer, ivec3_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 3, 4, 0); -} - -TEST_F(set_uniform_initializer, ivec4_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 4, 4, 0); -} - -TEST_F(set_uniform_initializer, uint_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 1, 4, 0); -} - -TEST_F(set_uniform_initializer, uvec2_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 2, 4, 0); -} - -TEST_F(set_uniform_initializer, uvec3_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 3, 4, 0); -} - -TEST_F(set_uniform_initializer, uvec4_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 4, 4, 0); -} - -TEST_F(set_uniform_initializer, bool_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 1, 4, 0); -} - -TEST_F(set_uniform_initializer, bvec2_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 2, 4, 0); -} - -TEST_F(set_uniform_initializer, bvec3_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 3, 4, 0); -} - -TEST_F(set_uniform_initializer, bvec4_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 4, 4, 0); -} - -TEST_F(set_uniform_initializer, float_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 1, 4, 0); -} - -TEST_F(set_uniform_initializer, vec2_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2, 4, 0); -} - -TEST_F(set_uniform_initializer, vec3_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 3, 4, 0); -} - -TEST_F(set_uniform_initializer, vec4_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 4, 4, 0); -} - -TEST_F(set_uniform_initializer, mat2x2_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 2, 4, 0); -} - -TEST_F(set_uniform_initializer, mat2x3_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 3, 4, 0); -} - -TEST_F(set_uniform_initializer, mat2x4_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 4, 4, 0); -} - -TEST_F(set_uniform_initializer, mat3x2_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 2, 4, 0); -} - -TEST_F(set_uniform_initializer, mat3x3_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 3, 4, 0); -} - -TEST_F(set_uniform_initializer, mat3x4_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 4, 4, 0); -} - -TEST_F(set_uniform_initializer, mat4x2_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 2, 4, 0); -} - -TEST_F(set_uniform_initializer, mat4x3_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 3, 4, 0); -} - -TEST_F(set_uniform_initializer, mat4x4_array_uniform) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 4, 4, 0); -} - -TEST_F(set_uniform_initializer, int_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 1, 4, 5); -} - -TEST_F(set_uniform_initializer, ivec2_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 2, 4, 5); -} - -TEST_F(set_uniform_initializer, ivec3_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 3, 4, 5); -} - -TEST_F(set_uniform_initializer, ivec4_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_INT, 1, 4, 4, 5); -} - -TEST_F(set_uniform_initializer, uint_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 1, 4, 5); -} - -TEST_F(set_uniform_initializer, uvec2_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 2, 4, 5); -} - -TEST_F(set_uniform_initializer, uvec3_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 3, 4, 5); -} - -TEST_F(set_uniform_initializer, uvec4_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_UINT, 1, 4, 4, 5); -} - -TEST_F(set_uniform_initializer, bool_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 1, 4, 5); -} - -TEST_F(set_uniform_initializer, bvec2_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 2, 4, 5); -} - -TEST_F(set_uniform_initializer, bvec3_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 3, 4, 5); -} - -TEST_F(set_uniform_initializer, bvec4_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_BOOL, 1, 4, 4, 5); -} - -TEST_F(set_uniform_initializer, float_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 1, 4, 5); -} - -TEST_F(set_uniform_initializer, vec2_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 2, 4, 5); -} - -TEST_F(set_uniform_initializer, vec3_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 3, 4, 5); -} - -TEST_F(set_uniform_initializer, vec4_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 1, 4, 4, 5); -} - -TEST_F(set_uniform_initializer, mat2x2_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 2, 4, 5); -} - -TEST_F(set_uniform_initializer, mat2x3_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 3, 4, 5); -} - -TEST_F(set_uniform_initializer, mat2x4_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 2, 4, 4, 5); -} - -TEST_F(set_uniform_initializer, mat3x2_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 2, 4, 5); -} - -TEST_F(set_uniform_initializer, mat3x3_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 3, 4, 5); -} - -TEST_F(set_uniform_initializer, mat3x4_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 3, 4, 4, 5); -} - -TEST_F(set_uniform_initializer, mat4x2_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 2, 4, 5); -} - -TEST_F(set_uniform_initializer, mat4x3_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 3, 4, 5); -} - -TEST_F(set_uniform_initializer, mat4x4_array_uniform_excess_initializer) -{ - array_test(mem_ctx, prog, actual_index, name, GLSL_TYPE_FLOAT, 4, 4, 4, 5); -} diff --git a/src/glsl/tests/sexps.py b/src/glsl/tests/sexps.py deleted file mode 100644 index a714af8d236..00000000000 --- a/src/glsl/tests/sexps.py +++ /dev/null @@ -1,103 +0,0 @@ -# coding=utf-8 -# -# Copyright © 2011 Intel Corporation -# -# Permission is hereby granted, free of charge, to any person obtaining a -# copy of this software and associated documentation files (the "Software"), -# to deal in the Software without restriction, including without limitation -# the rights to use, copy, modify, merge, publish, distribute, sublicense, -# and/or sell copies of the Software, and to permit persons to whom the -# Software is furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice (including the next -# paragraph) shall be included in all copies or substantial portions of the -# Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL -# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -# DEALINGS IN THE SOFTWARE. - -# This file contains helper functions for manipulating sexps in Python. -# -# We represent a sexp in Python using nested lists containing strings. -# So, for example, the sexp (constant float (1.000000)) is represented -# as ['constant', 'float', ['1.000000']]. - -import re - -def check_sexp(sexp): - """Verify that the argument is a proper sexp. - - That is, raise an exception if the argument is not a string or a - list, or if it contains anything that is not a string or a list at - any nesting level. - """ - if isinstance(sexp, list): - for s in sexp: - check_sexp(s) - elif not isinstance(sexp, basestring): - raise Exception('Not a sexp: {0!r}'.format(sexp)) - -def parse_sexp(sexp): - """Convert a string, of the form that would be output by mesa, - into a sexp represented as nested lists containing strings. - """ - sexp_token_regexp = re.compile( - '[a-zA-Z_]+(@[0-9]+)?|[0-9]+(\\.[0-9]+)?|[^ \n]') - stack = [[]] - for match in sexp_token_regexp.finditer(sexp): - token = match.group(0) - if token == '(': - stack.append([]) - elif token == ')': - if len(stack) == 1: - raise Exception('Unmatched )') - sexp = stack.pop() - stack[-1].append(sexp) - else: - stack[-1].append(token) - if len(stack) != 1: - raise Exception('Unmatched (') - if len(stack[0]) != 1: - raise Exception('Multiple sexps') - return stack[0][0] - -def sexp_to_string(sexp): - """Convert a sexp, represented as nested lists containing strings, - into a single string of the form parseable by mesa. - """ - if isinstance(sexp, basestring): - return sexp - assert isinstance(sexp, list) - result = '' - for s in sexp: - sub_result = sexp_to_string(s) - if result == '': - result = sub_result - elif '\n' not in result and '\n' not in sub_result and \ - len(result) + len(sub_result) + 1 <= 70: - result += ' ' + sub_result - else: - result += '\n' + sub_result - return '({0})'.format(result.replace('\n', '\n ')) - -def sort_decls(sexp): - """Sort all toplevel variable declarations in sexp. - - This is used to work around the fact that - ir_reader::read_instructions reorders declarations. - """ - assert isinstance(sexp, list) - decls = [] - other_code = [] - for s in sexp: - if isinstance(s, list) and len(s) >= 4 and s[0] == 'declare': - decls.append(s) - else: - other_code.append(s) - return sorted(decls) + other_code - diff --git a/src/glsl/tests/uniform_initializer_utils.cpp b/src/glsl/tests/uniform_initializer_utils.cpp deleted file mode 100644 index 5006387036f..00000000000 --- a/src/glsl/tests/uniform_initializer_utils.cpp +++ /dev/null @@ -1,255 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "main/mtypes.h" -#include "main/macros.h" -#include "util/ralloc.h" -#include "uniform_initializer_utils.h" -#include - -void -fill_storage_array_with_sentinels(gl_constant_value *storage, - unsigned data_size, - unsigned red_zone_size) -{ - for (unsigned i = 0; i < data_size; i++) - storage[i].u = 0xDEADBEEF; - - for (unsigned i = 0; i < red_zone_size; i++) - storage[data_size + i].u = 0xBADDC0DE; -} - -/** - * Verfiy that markers past the end of the real uniform are unmodified - */ -static ::testing::AssertionResult -red_zone_is_intact(gl_constant_value *storage, - unsigned data_size, - unsigned red_zone_size) -{ - for (unsigned i = 0; i < red_zone_size; i++) { - const unsigned idx = data_size + i; - - if (storage[idx].u != 0xBADDC0DE) - return ::testing::AssertionFailure() - << "storage[" << idx << "].u = " << storage[idx].u - << ", exepected data values = " << data_size - << ", red-zone size = " << red_zone_size; - } - - return ::testing::AssertionSuccess(); -} - -static const int values[] = { - 2, 0, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53 -}; - -/** - * Generate a single data element. - * - * This is by both \c generate_data and \c generate_array_data to create the - * data. - */ -static void -generate_data_element(void *mem_ctx, const glsl_type *type, - ir_constant *&val, unsigned data_index_base) -{ - /* Set the initial data values for the generated constant. - */ - ir_constant_data data; - memset(&data, 0, sizeof(data)); - for (unsigned i = 0; i < type->components(); i++) { - const unsigned idx = (i + data_index_base) % ARRAY_SIZE(values); - switch (type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - data.i[i] = values[idx]; - break; - case GLSL_TYPE_FLOAT: - data.f[i] = float(values[idx]); - break; - case GLSL_TYPE_BOOL: - data.b[i] = bool(values[idx]); - break; - case GLSL_TYPE_DOUBLE: - data.d[i] = double(values[idx]); - break; - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_SUBROUTINE: - ASSERT_TRUE(false); - break; - } - } - - /* Generate and verify the constant. - */ - val = new(mem_ctx) ir_constant(type, &data); - - for (unsigned i = 0; i < type->components(); i++) { - switch (type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - ASSERT_EQ(data.i[i], val->value.i[i]); - break; - case GLSL_TYPE_FLOAT: - ASSERT_EQ(data.f[i], val->value.f[i]); - break; - case GLSL_TYPE_BOOL: - ASSERT_EQ(data.b[i], val->value.b[i]); - break; - case GLSL_TYPE_DOUBLE: - ASSERT_EQ(data.d[i], val->value.d[i]); - break; - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_SUBROUTINE: - ASSERT_TRUE(false); - break; - } - } -} - -void -generate_data(void *mem_ctx, enum glsl_base_type base_type, - unsigned columns, unsigned rows, - ir_constant *&val) -{ - /* Determine what the type of the generated constant should be. - */ - const glsl_type *const type = - glsl_type::get_instance(base_type, rows, columns); - ASSERT_FALSE(type->is_error()); - - generate_data_element(mem_ctx, type, val, 0); -} - -void -generate_array_data(void *mem_ctx, enum glsl_base_type base_type, - unsigned columns, unsigned rows, unsigned array_size, - ir_constant *&val) -{ - /* Determine what the type of the generated constant should be. - */ - const glsl_type *const element_type = - glsl_type::get_instance(base_type, rows, columns); - ASSERT_FALSE(element_type->is_error()); - - const glsl_type *const array_type = - glsl_type::get_array_instance(element_type, array_size); - ASSERT_FALSE(array_type->is_error()); - - /* Set the initial data values for the generated constant. - */ - exec_list values_for_array; - for (unsigned i = 0; i < array_size; i++) { - ir_constant *element; - - generate_data_element(mem_ctx, element_type, element, i); - values_for_array.push_tail(element); - } - - val = new(mem_ctx) ir_constant(array_type, &values_for_array); -} - -/** - * Verify that the data stored for the uniform matches the initializer - * - * \param storage Backing storage for the uniform - * \param storage_array_size Array size of the backing storage. This must be - * less than or equal to the array size of the type - * of \c val. If \c val is not an array, this must - * be zero. - * \param val Value of the initializer for the unifrom. - * \param red_zone - */ -void -verify_data(gl_constant_value *storage, unsigned storage_array_size, - ir_constant *val, unsigned red_zone_size, - unsigned int boolean_true) -{ - if (val->type->base_type == GLSL_TYPE_ARRAY) { - const glsl_type *const element_type = val->array_elements[0]->type; - - for (unsigned i = 0; i < storage_array_size; i++) { - verify_data(storage + (i * element_type->components()), 0, - val->array_elements[i], 0, boolean_true); - } - - const unsigned components = element_type->components(); - - if (red_zone_size > 0) { - EXPECT_TRUE(red_zone_is_intact(storage, - storage_array_size * components, - red_zone_size)); - } - } else { - ASSERT_EQ(0u, storage_array_size); - for (unsigned i = 0; i < val->type->components(); i++) { - switch (val->type->base_type) { - case GLSL_TYPE_UINT: - case GLSL_TYPE_INT: - case GLSL_TYPE_SAMPLER: - case GLSL_TYPE_IMAGE: - EXPECT_EQ(val->value.i[i], storage[i].i); - break; - case GLSL_TYPE_FLOAT: - EXPECT_EQ(val->value.f[i], storage[i].f); - break; - case GLSL_TYPE_BOOL: - EXPECT_EQ(val->value.b[i] ? boolean_true : 0, storage[i].i); - break; - case GLSL_TYPE_DOUBLE: - EXPECT_EQ(val->value.d[i], *(double *)&storage[i*2].i); - break; - case GLSL_TYPE_ATOMIC_UINT: - case GLSL_TYPE_STRUCT: - case GLSL_TYPE_ARRAY: - case GLSL_TYPE_VOID: - case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: - case GLSL_TYPE_SUBROUTINE: - ASSERT_TRUE(false); - break; - } - } - - if (red_zone_size > 0) { - EXPECT_TRUE(red_zone_is_intact(storage, - val->type->components(), - red_zone_size)); - } - } -} diff --git a/src/glsl/tests/uniform_initializer_utils.h b/src/glsl/tests/uniform_initializer_utils.h deleted file mode 100644 index b4d0c10220f..00000000000 --- a/src/glsl/tests/uniform_initializer_utils.h +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ - -#pragma once - -#include "program/prog_parameter.h" -#include "ir.h" -#include "ir_uniform.h" - -extern void -fill_storage_array_with_sentinels(gl_constant_value *storage, - unsigned data_size, - unsigned red_zone_size); - -extern void -generate_data(void *mem_ctx, enum glsl_base_type base_type, - unsigned columns, unsigned rows, - ir_constant *&val); - -extern void -generate_array_data(void *mem_ctx, enum glsl_base_type base_type, - unsigned columns, unsigned rows, unsigned array_size, - ir_constant *&val); - -extern void -verify_data(gl_constant_value *storage, unsigned storage_array_size, - ir_constant *val, unsigned red_zone_size, - unsigned int boolean_true); diff --git a/src/glsl/tests/varyings_test.cpp b/src/glsl/tests/varyings_test.cpp deleted file mode 100644 index 0c4e0a471b8..00000000000 --- a/src/glsl/tests/varyings_test.cpp +++ /dev/null @@ -1,349 +0,0 @@ -/* - * Copyright © 2013 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER - * DEALINGS IN THE SOFTWARE. - */ -#include -#include "main/compiler.h" -#include "main/mtypes.h" -#include "main/macros.h" -#include "util/ralloc.h" -#include "ir.h" -#include "program/hash_table.h" - -/** - * \file varyings_test.cpp - * - * Test various aspects of linking shader stage inputs and outputs. - */ - -namespace linker { -bool -populate_consumer_input_sets(void *mem_ctx, exec_list *ir, - hash_table *consumer_inputs, - hash_table *consumer_interface_inputs, - ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX]); - -ir_variable * -get_matching_input(void *mem_ctx, - const ir_variable *output_var, - hash_table *consumer_inputs, - hash_table *consumer_interface_inputs, - ir_variable *consumer_inputs_with_locations[VARYING_SLOT_MAX]); -} - -class link_varyings : public ::testing::Test { -public: - link_varyings(); - - virtual void SetUp(); - virtual void TearDown(); - - char *interface_field_name(const glsl_type *iface, unsigned field = 0) - { - return ralloc_asprintf(mem_ctx, - "%s.%s", - iface->name, - iface->fields.structure[field].name); - } - - void *mem_ctx; - exec_list ir; - hash_table *consumer_inputs; - hash_table *consumer_interface_inputs; - - const glsl_type *simple_interface; - ir_variable *junk[VARYING_SLOT_TESS_MAX]; -}; - -link_varyings::link_varyings() -{ - static const glsl_struct_field f[] = { - glsl_struct_field(glsl_type::vec(4), "v") - }; - - this->simple_interface = - glsl_type::get_interface_instance(f, - ARRAY_SIZE(f), - GLSL_INTERFACE_PACKING_STD140, - "simple_interface"); -} - -void -link_varyings::SetUp() -{ - this->mem_ctx = ralloc_context(NULL); - this->ir.make_empty(); - - this->consumer_inputs - = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); - - this->consumer_interface_inputs - = hash_table_ctor(0, hash_table_string_hash, hash_table_string_compare); -} - -void -link_varyings::TearDown() -{ - ralloc_free(this->mem_ctx); - this->mem_ctx = NULL; - - hash_table_dtor(this->consumer_inputs); - this->consumer_inputs = NULL; - hash_table_dtor(this->consumer_interface_inputs); - this->consumer_interface_inputs = NULL; -} - -/** - * Hash table callback function that counts the elements in the table - * - * \sa num_elements - */ -static void -ht_count_callback(const void *, void *, void *closure) -{ - unsigned int *counter = (unsigned int *) closure; - - (*counter)++; -} - -/** - * Helper function to count the number of elements in a hash table. - */ -static unsigned -num_elements(hash_table *ht) -{ - unsigned int counter = 0; - - hash_table_call_foreach(ht, ht_count_callback, (void *) &counter); - - return counter; -} - -/** - * Helper function to determine whether a hash table is empty. - */ -static bool -is_empty(hash_table *ht) -{ - return num_elements(ht) == 0; -} - -TEST_F(link_varyings, single_simple_input) -{ - ir_variable *const v = - new(mem_ctx) ir_variable(glsl_type::vec(4), - "a", - ir_var_shader_in); - - - ir.push_tail(v); - - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); - - EXPECT_EQ((void *) v, hash_table_find(consumer_inputs, "a")); - EXPECT_EQ(1u, num_elements(consumer_inputs)); - EXPECT_TRUE(is_empty(consumer_interface_inputs)); -} - -TEST_F(link_varyings, gl_ClipDistance) -{ - const glsl_type *const array_8_of_float = - glsl_type::get_array_instance(glsl_type::vec(1), 8); - - ir_variable *const clipdistance = - new(mem_ctx) ir_variable(array_8_of_float, - "gl_ClipDistance", - ir_var_shader_in); - - clipdistance->data.explicit_location = true; - clipdistance->data.location = VARYING_SLOT_CLIP_DIST0; - clipdistance->data.explicit_index = 0; - - ir.push_tail(clipdistance); - - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); - - EXPECT_EQ(clipdistance, junk[VARYING_SLOT_CLIP_DIST0]); - EXPECT_TRUE(is_empty(consumer_inputs)); - EXPECT_TRUE(is_empty(consumer_interface_inputs)); -} - -TEST_F(link_varyings, single_interface_input) -{ - ir_variable *const v = - new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, - simple_interface->fields.structure[0].name, - ir_var_shader_in); - - v->init_interface_type(simple_interface); - - ir.push_tail(v); - - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); - char *const full_name = interface_field_name(simple_interface); - - EXPECT_EQ((void *) v, hash_table_find(consumer_interface_inputs, full_name)); - EXPECT_EQ(1u, num_elements(consumer_interface_inputs)); - EXPECT_TRUE(is_empty(consumer_inputs)); -} - -TEST_F(link_varyings, one_interface_and_one_simple_input) -{ - ir_variable *const v = - new(mem_ctx) ir_variable(glsl_type::vec(4), - "a", - ir_var_shader_in); - - - ir.push_tail(v); - - ir_variable *const iface = - new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, - simple_interface->fields.structure[0].name, - ir_var_shader_in); - - iface->init_interface_type(simple_interface); - - ir.push_tail(iface); - - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); - - char *const iface_field_name = interface_field_name(simple_interface); - - EXPECT_EQ((void *) iface, hash_table_find(consumer_interface_inputs, - iface_field_name)); - EXPECT_EQ(1u, num_elements(consumer_interface_inputs)); - - EXPECT_EQ((void *) v, hash_table_find(consumer_inputs, "a")); - EXPECT_EQ(1u, num_elements(consumer_inputs)); -} - -TEST_F(link_varyings, invalid_interface_input) -{ - ir_variable *const v = - new(mem_ctx) ir_variable(simple_interface, - "named_interface", - ir_var_shader_in); - - ASSERT_EQ(simple_interface, v->get_interface_type()); - - ir.push_tail(v); - - EXPECT_FALSE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); -} - -TEST_F(link_varyings, interface_field_doesnt_match_noninterface) -{ - char *const iface_field_name = interface_field_name(simple_interface); - - /* The input shader has a single input variable name "a.v" - */ - ir_variable *const in_v = - new(mem_ctx) ir_variable(glsl_type::vec(4), - iface_field_name, - ir_var_shader_in); - - ir.push_tail(in_v); - - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); - - /* Create an output variable, "v", that is part of an interface block named - * "a". They should not match. - */ - ir_variable *const out_v = - new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, - simple_interface->fields.structure[0].name, - ir_var_shader_in); - - out_v->init_interface_type(simple_interface); - - ir_variable *const match = - linker::get_matching_input(mem_ctx, - out_v, - consumer_inputs, - consumer_interface_inputs, - junk); - - EXPECT_EQ(NULL, match); -} - -TEST_F(link_varyings, interface_field_doesnt_match_noninterface_vice_versa) -{ - char *const iface_field_name = interface_field_name(simple_interface); - - /* In input shader has a single variable, "v", that is part of an interface - * block named "a". - */ - ir_variable *const in_v = - new(mem_ctx) ir_variable(simple_interface->fields.structure[0].type, - simple_interface->fields.structure[0].name, - ir_var_shader_in); - - in_v->init_interface_type(simple_interface); - - ir.push_tail(in_v); - - ASSERT_TRUE(linker::populate_consumer_input_sets(mem_ctx, - &ir, - consumer_inputs, - consumer_interface_inputs, - junk)); - - /* Create an output variable "a.v". They should not match. - */ - ir_variable *const out_v = - new(mem_ctx) ir_variable(glsl_type::vec(4), - iface_field_name, - ir_var_shader_out); - - ir_variable *const match = - linker::get_matching_input(mem_ctx, - out_v, - consumer_inputs, - consumer_interface_inputs, - junk); - - EXPECT_EQ(NULL, match); -} diff --git a/src/mesa/Makefile.am b/src/mesa/Makefile.am index 69506f2a3fd..8dc44fda0f2 100644 --- a/src/mesa/Makefile.am +++ b/src/mesa/Makefile.am @@ -165,7 +165,7 @@ libmesa_la_SOURCES = \ $(MESA_ASM_FILES_FOR_ARCH) libmesa_la_LIBADD = \ - $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/compiler/glsl/libglsl.la \ $(ARCH_LIBS) libmesagallium_la_SOURCES = \ @@ -175,7 +175,7 @@ libmesagallium_la_SOURCES = \ $(MESA_ASM_FILES_FOR_ARCH) libmesagallium_la_LIBADD = \ - $(top_builddir)/src/glsl/libglsl.la \ + $(top_builddir)/src/compiler/glsl/libglsl.la \ $(ARCH_LIBS) libmesa_sse41_la_SOURCES = \ diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources index 386d6aa3815..7af8becd607 100644 --- a/src/mesa/Makefile.sources +++ b/src/mesa/Makefile.sources @@ -623,10 +623,7 @@ MESA_GALLIUM_FILES = \ INCLUDE_DIRS = \ -I$(top_srcdir)/include \ -I$(top_srcdir)/src \ - -I$(top_srcdir)/src/glsl \ -I$(top_builddir)/src/compiler/nir \ - -I$(top_builddir)/src/glsl \ - -I$(top_srcdir)/src/glsl/glcpp \ -I$(top_srcdir)/src/mesa \ -I$(top_builddir)/src/mesa \ -I$(top_srcdir)/src/mesa/main \ diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index 9eadb7e00fa..0ab9ebdab3c 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -32,7 +32,7 @@ #include "intel_batchbuffer.h" #include "brw_nir.h" #include "brw_program.h" -#include "glsl/ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" static void assign_cs_binding_table_offsets(const struct brw_device_info *devinfo, diff --git a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp index 3d6aa74d830..2ff9ec16fdb 100644 --- a/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp +++ b/src/mesa/drivers/dri/i965/brw_cubemap_normalize.cpp @@ -31,7 +31,7 @@ */ #include "compiler/glsl_types.h" -#include "glsl/ir.h" +#include "compiler/glsl/ir.h" #include "program/prog_instruction.h" /* For WRITEMASK_* */ class brw_cubemap_normalize_visitor : public ir_hierarchical_visitor { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index b75ebfedb3d..4612a287bb7 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -30,7 +30,7 @@ #include "brw_shader.h" #include "brw_ir_fs.h" #include "brw_fs_builder.h" -#include "glsl/ir.h" +#include "compiler/glsl/ir.h" #include "compiler/nir/nir.h" struct bblock_t; diff --git a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp index 22877797522..689e0911b17 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp @@ -41,8 +41,8 @@ * we do retain the vector types in that case. */ -#include "glsl/ir.h" -#include "glsl/ir_expression_flattening.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_expression_flattening.h" #include "compiler/glsl_types.h" class ir_channel_expressions_visitor : public ir_hierarchical_visitor { diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index f9df2a4e3b6..68a86a9a85e 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -21,7 +21,7 @@ * IN THE SOFTWARE. */ -#include "glsl/ir.h" +#include "compiler/glsl/ir.h" #include "main/shaderimage.h" #include "brw_fs.h" #include "brw_fs_surface_builder.h" diff --git a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp index 0970959bbd6..5fe24debeec 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_vector_splitting.cpp @@ -38,8 +38,8 @@ */ #include "main/imports.h" -#include "glsl/ir.h" -#include "glsl/ir_rvalue_visitor.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_rvalue_visitor.h" #include "compiler/glsl_types.h" #include "util/hash_table.h" diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index edb52ac44b7..c888ef2bd67 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -34,7 +34,7 @@ #include "brw_ff_gs.h" #include "brw_nir.h" #include "brw_program.h" -#include "glsl/ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" static void brw_gs_debug_recompile(struct brw_context *brw, diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp index 234afd554df..db4ba88cb1c 100644 --- a/src/mesa/drivers/dri/i965/brw_link.cpp +++ b/src/mesa/drivers/dri/i965/brw_link.cpp @@ -26,7 +26,7 @@ #include "brw_fs.h" #include "brw_nir.h" #include "brw_program.h" -#include "glsl/ir_optimization.h" +#include "compiler/glsl/ir_optimization.h" #include "program/program.h" #include "main/shaderapi.h" #include "main/uniforms.h" diff --git a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp index c83b2728b98..0848e1ec536 100644 --- a/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp +++ b/src/mesa/drivers/dri/i965/brw_lower_texture_gradients.cpp @@ -25,8 +25,8 @@ * \file brw_lower_texture_gradients.cpp */ -#include "glsl/ir.h" -#include "glsl/ir_builder.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_builder.h" #include "program/prog_instruction.h" #include "brw_context.h" diff --git a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp index eafbdf68ea0..f5d7baee6db 100644 --- a/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp +++ b/src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp @@ -32,8 +32,8 @@ */ #include "compiler/glsl_types.h" -#include "glsl/ir.h" -#include "glsl/ir_builder.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_builder.h" using namespace ir_builder; diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp index 0849ca40046..f3361d69786 100644 --- a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -23,7 +23,7 @@ #include "brw_shader.h" #include "brw_nir.h" -#include "glsl/ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" static void brw_nir_setup_glsl_builtin_uniform(nir_variable *var, diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index 20d4e0d6c4a..368efeecb2d 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -37,7 +37,7 @@ #include "program/programopt.h" #include "tnl/tnl.h" #include "util/ralloc.h" -#include "glsl/ir.h" +#include "compiler/glsl/ir.h" #include "brw_program.h" #include "brw_context.h" diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 4e00fd0ec0d..1460f4599bf 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -31,7 +31,7 @@ #include "brw_ir_vec4.h" #endif -#include "glsl/ir.h" +#include "compiler/glsl/ir.h" #include "compiler/nir/nir.h" diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index 6d6988c6a41..e07c05a92c1 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -31,7 +31,7 @@ #include "intel_batchbuffer.h" #include "brw_state.h" #include "program/prog_statevars.h" -#include "glsl/ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" static unsigned get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data) diff --git a/src/mesa/drivers/dri/i965/intel_resolve_map.h b/src/mesa/drivers/dri/i965/intel_resolve_map.h index 9bb9fca0455..726b287722a 100644 --- a/src/mesa/drivers/dri/i965/intel_resolve_map.h +++ b/src/mesa/drivers/dri/i965/intel_resolve_map.h @@ -24,7 +24,7 @@ #pragma once #include -#include "glsl/list.h" +#include "compiler/glsl/list.h" #ifdef __cplusplus extern "C" { diff --git a/src/mesa/main/context.c b/src/mesa/main/context.c index f3fd01f395e..8b415ed6019 100644 --- a/src/mesa/main/context.c +++ b/src/mesa/main/context.c @@ -142,7 +142,7 @@ #include "sparc/sparc.h" #endif -#include "glsl_parser_extras.h" +#include "compiler/glsl/glsl_parser_extras.h" #include diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index b2b1ee9ba44..b0ce8c472ef 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -36,10 +36,10 @@ #include "main/texenvprogram.h" #include "main/texobj.h" #include "main/uniforms.h" -#include "glsl/ir_builder.h" -#include "glsl/ir_optimization.h" -#include "glsl/glsl_parser_extras.h" -#include "glsl/glsl_symbol_table.h" +#include "compiler/glsl/ir_builder.h" +#include "compiler/glsl/ir_optimization.h" +#include "compiler/glsl/glsl_parser_extras.h" +#include "compiler/glsl/glsl_symbol_table.h" #include "compiler/glsl_types.h" #include "program/ir_to_mesa.h" #include "program/program.h" diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index f2a872dc83a..0e4a0af0b0f 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -43,8 +43,8 @@ #include "main/shaderobj.h" #include "main/transformfeedback.h" #include "main/uniforms.h" -#include "glsl/glsl_parser_extras.h" -#include "glsl/ir_uniform.h" +#include "compiler/glsl/glsl_parser_extras.h" +#include "compiler/glsl/ir_uniform.h" #include "program/program.h" #include "program/prog_parameter.h" #include "util/ralloc.h" diff --git a/src/mesa/main/program_resource.c b/src/mesa/main/program_resource.c index 9a15cfe70b8..50659b0cd21 100644 --- a/src/mesa/main/program_resource.c +++ b/src/mesa/main/program_resource.c @@ -30,7 +30,7 @@ #include "main/shaderobj.h" #include "main/context.h" #include "program_resource.h" -#include "ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" static bool supported_interface_enum(struct gl_context *ctx, GLenum iface) { diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index e902585924a..4967e4b1df1 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -34,9 +34,9 @@ #include "main/shaderapi.h" #include "main/shaderobj.h" #include "main/uniforms.h" -#include "glsl/glsl_symbol_table.h" -#include "glsl/ir.h" -#include "glsl/program.h" +#include "compiler/glsl/glsl_symbol_table.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/program.h" #include "program/hash_table.h" #include "util/strndup.h" diff --git a/src/mesa/main/shaderapi.c b/src/mesa/main/shaderapi.c index a988f41697b..df92c0912af 100644 --- a/src/mesa/main/shaderapi.c +++ b/src/mesa/main/shaderapi.c @@ -49,10 +49,10 @@ #include "main/shaderobj.h" #include "main/transformfeedback.h" #include "main/uniforms.h" -#include "glsl/glsl_parser_extras.h" -#include "glsl/ir.h" -#include "glsl/ir_uniform.h" -#include "glsl/program.h" +#include "compiler/glsl/glsl_parser_extras.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_uniform.h" +#include "compiler/glsl/program.h" #include "program/program.h" #include "program/prog_print.h" #include "program/prog_parameter.h" diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 766a465cb11..2ced201ebca 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -31,10 +31,10 @@ #include "main/shaderapi.h" #include "main/shaderobj.h" #include "main/uniforms.h" -#include "glsl/ir.h" -#include "glsl/ir_uniform.h" -#include "glsl/glsl_parser_extras.h" -#include "glsl/program.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_uniform.h" +#include "compiler/glsl/glsl_parser_extras.h" +#include "compiler/glsl/program.h" #include "program/hash_table.h" diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c index 47010badbc6..b1968b3f795 100644 --- a/src/mesa/main/uniforms.c +++ b/src/mesa/main/uniforms.c @@ -43,7 +43,7 @@ #include "main/shaderobj.h" #include "main/uniforms.h" #include "main/enums.h" -#include "ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" #include "compiler/glsl_types.h" #include "program/program.h" diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index baf49a752b1..88fe9580167 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -28,7 +28,7 @@ #include "main/glheader.h" #include "compiler/glsl_types.h" -#include "glsl/ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" #include "program/prog_parameter.h" #ifdef __cplusplus diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 70c477ec4fd..c9c30449734 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -35,16 +35,16 @@ #include "main/shaderapi.h" #include "main/shaderobj.h" #include "main/uniforms.h" -#include "glsl/ast.h" -#include "glsl/ir.h" -#include "glsl/ir_expression_flattening.h" -#include "glsl/ir_visitor.h" -#include "glsl/ir_optimization.h" -#include "glsl/ir_uniform.h" -#include "glsl/glsl_parser_extras.h" +#include "compiler/glsl/ast.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_expression_flattening.h" +#include "compiler/glsl/ir_visitor.h" +#include "compiler/glsl/ir_optimization.h" +#include "compiler/glsl/ir_uniform.h" +#include "compiler/glsl/glsl_parser_extras.h" #include "compiler/glsl_types.h" -#include "glsl/linker.h" -#include "glsl/program.h" +#include "compiler/glsl/linker.h" +#include "compiler/glsl/program.h" #include "program/hash_table.h" #include "program/prog_instruction.h" #include "program/prog_optimize.h" diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index a2ebb1ba77c..ebcc5288c2e 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -25,7 +25,7 @@ #include "compiler/nir/nir.h" #include "compiler/nir/nir_builder.h" -#include "glsl/list.h" +#include "compiler/glsl/list.h" #include "main/imports.h" #include "util/ralloc.h" diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index 9137d1858e9..f118552d64e 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -25,10 +25,10 @@ #include "main/mtypes.h" #include "compiler/glsl_types.h" -#include "glsl/ir.h" -#include "glsl/ir_uniform.h" -#include "glsl/ir_visitor.h" -#include "glsl/program.h" +#include "compiler/glsl/ir.h" +#include "compiler/glsl/ir_uniform.h" +#include "compiler/glsl/ir_visitor.h" +#include "compiler/glsl/program.h" #include "program/hash_table.h" #include "program/prog_parameter.h" #include "program/program.h" diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 03788f33468..10e294cd147 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -42,7 +42,7 @@ #include "main/macros.h" #include "main/varray.h" -#include "glsl/ir_uniform.h" +#include "compiler/glsl/ir_uniform.h" #include "vbo/vbo.h" diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index d424e3b335f..f5b8c33622e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -32,8 +32,8 @@ #include "st_glsl_to_tgsi.h" -#include "glsl_parser_extras.h" -#include "ir_optimization.h" +#include "compiler/glsl/glsl_parser_extras.h" +#include "compiler/glsl/ir_optimization.h" #include "main/errors.h" #include "main/shaderobj.h" -- cgit v1.2.3